2006-06-25 20:48:02 -04:00
|
|
|
/*
|
|
|
|
|
* Health-checks functions.
|
|
|
|
|
*
|
2009-03-08 04:38:41 -04:00
|
|
|
* Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
|
2009-09-23 16:09:24 -04:00
|
|
|
* Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
|
2006-06-25 20:48:02 -04:00
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2008-01-18 06:18:15 -05:00
|
|
|
#include <assert.h>
|
2009-09-23 16:09:24 -04:00
|
|
|
#include <ctype.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <errno.h>
|
|
|
|
|
#include <fcntl.h>
|
2014-07-07 18:54:10 -04:00
|
|
|
#include <signal.h>
|
2015-01-29 21:23:00 -05:00
|
|
|
#include <stdarg.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <stdio.h>
|
2007-10-14 17:40:01 -04:00
|
|
|
#include <stdlib.h>
|
2006-06-29 11:53:05 -04:00
|
|
|
#include <string.h>
|
2007-10-14 17:40:01 -04:00
|
|
|
#include <time.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <unistd.h>
|
2019-03-01 05:15:10 -05:00
|
|
|
#include <sys/resource.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <sys/socket.h>
|
2009-08-24 07:11:06 -04:00
|
|
|
#include <sys/types.h>
|
2014-06-19 23:30:16 -04:00
|
|
|
#include <sys/wait.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <netinet/in.h>
|
2009-07-15 01:16:31 -04:00
|
|
|
#include <netinet/tcp.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <arpa/inet.h>
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
#include <common/cfgparse.h>
|
2012-08-24 13:22:53 -04:00
|
|
|
#include <common/chunk.h>
|
2006-06-29 11:53:05 -04:00
|
|
|
#include <common/compat.h>
|
|
|
|
|
#include <common/config.h>
|
|
|
|
|
#include <common/mini-clist.h>
|
2007-04-15 14:56:27 -04:00
|
|
|
#include <common/standard.h>
|
2006-06-29 11:53:05 -04:00
|
|
|
#include <common/time.h>
|
2017-10-20 09:40:23 -04:00
|
|
|
#include <common/hathreads.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
#include <types/global.h>
|
2015-04-13 19:15:08 -04:00
|
|
|
#include <types/dns.h>
|
2016-11-21 11:49:11 -05:00
|
|
|
#include <types/stats.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2020-02-21 12:14:59 -05:00
|
|
|
#include <proto/action.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/backend.h>
|
2009-09-23 16:09:24 -04:00
|
|
|
#include <proto/checks.h>
|
2016-11-21 11:49:11 -05:00
|
|
|
#include <proto/stats.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/fd.h>
|
|
|
|
|
#include <proto/log.h>
|
MEDIUM: connection: start to introduce a mux layer between xprt and data
For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside
a connection. After quite a long brainstorming, it appears that the
connection interface to the existing streams is appropriate just like
the connection interface to the lower layers. In fact we need to have
the mux layer in the middle of the connection, between the transport
and the data layer.
A mux can exist on two directions/sides. On the inbound direction, it
instanciates new streams from incoming connections, while on the outbound
direction it muxes streams into outgoing connections. The difference is
visible on the mux->init() call : in one case, an upper context is already
known (outgoing connection), and in the other case, the upper context is
not yet known (incoming connection) and will have to be allocated by the
mux. The session doesn't have to create the new streams anymore, as this
is performed by the mux itself.
This patch introduces this and creates a pass-through mux called
"mux_pt" which is used for all new connections and which only
calls the data layer's recv,send,wake() calls. One incoming stream
is immediately created when init() is called on the inbound direction.
There should not be any visible impact.
Note that the connection's mux is purposely not set until the session
is completed so that we don't accidently run with the wrong mux. This
must not cause any issue as the xprt_done_cb function is always called
prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
|
|
|
#include <proto/mux_pt.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/queue.h>
|
2009-06-10 05:09:37 -04:00
|
|
|
#include <proto/port_range.h>
|
2008-01-13 12:40:14 -05:00
|
|
|
#include <proto/proto_tcp.h>
|
2013-12-10 18:52:19 -05:00
|
|
|
#include <proto/protocol.h>
|
2006-12-31 11:46:05 -05:00
|
|
|
#include <proto/proxy.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/server.h>
|
2016-06-21 10:27:34 -04:00
|
|
|
#include <proto/signal.h>
|
2011-03-10 08:03:36 -05:00
|
|
|
#include <proto/stream_interface.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/task.h>
|
2020-02-21 12:13:44 -05:00
|
|
|
#include <proto/vars.h>
|
2015-04-13 19:15:08 -04:00
|
|
|
#include <proto/log.h>
|
|
|
|
|
#include <proto/dns.h>
|
|
|
|
|
#include <proto/proto_udp.h>
|
2017-10-17 11:33:43 -04:00
|
|
|
#include <proto/ssl_sock.h>
|
2020-03-30 09:19:03 -04:00
|
|
|
#include <proto/sample.h>
|
2017-10-17 11:33:43 -04:00
|
|
|
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
static int httpchk_expect(struct server *s, int done);
|
2020-03-30 05:05:10 -04:00
|
|
|
static int tcpcheck_get_step_id(struct check *, struct tcpcheck_rule *);
|
|
|
|
|
static char *tcpcheck_get_step_comment(struct check *, struct tcpcheck_rule *);
|
2017-10-04 12:41:00 -04:00
|
|
|
static int tcpcheck_main(struct check *);
|
2018-07-17 12:49:38 -04:00
|
|
|
static void __event_srv_chk_w(struct conn_stream *cs);
|
2018-08-28 13:36:18 -04:00
|
|
|
static int wake_srv_chk(struct conn_stream *cs);
|
2018-08-09 07:06:55 -04:00
|
|
|
static void __event_srv_chk_r(struct conn_stream *cs);
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
|
2020-03-26 16:10:03 -04:00
|
|
|
static int srv_check_healthcheck_port(struct check *chk);
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
/* Global list to share all tcp-checks */
|
|
|
|
|
struct list tcpchecks_list = LIST_HEAD_INIT(tcpchecks_list);
|
|
|
|
|
|
|
|
|
|
|
2018-11-26 05:58:30 -05:00
|
|
|
DECLARE_STATIC_POOL(pool_head_email_alert, "email_alert", sizeof(struct email_alert));
|
|
|
|
|
DECLARE_STATIC_POOL(pool_head_tcpcheck_rule, "tcpcheck_rule", sizeof(struct tcpcheck_rule));
|
2017-10-23 09:45:20 -04:00
|
|
|
|
2020-02-14 11:42:54 -05:00
|
|
|
/* Dummy frontend used to create all checks sessions. */
|
|
|
|
|
static struct proxy checks_fe;
|
2017-10-23 09:45:20 -04:00
|
|
|
|
2012-03-18 18:24:41 -04:00
|
|
|
static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
|
|
|
|
|
[HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
|
2009-09-27 09:50:02 -04:00
|
|
|
[HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2014-05-20 14:56:30 -04:00
|
|
|
/* Below we have finished checks */
|
|
|
|
|
[HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
|
2009-12-15 16:31:24 -05:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
|
|
|
|
|
[HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
|
|
|
|
|
[HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
|
|
|
|
|
[HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
|
|
|
|
|
[HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
|
|
|
|
|
[HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2009-09-27 09:50:02 -04:00
|
|
|
[HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
[HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
|
|
|
|
|
[HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
|
|
|
|
|
[HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
[HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
|
|
|
|
|
[HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
|
2014-08-06 19:55:37 -04:00
|
|
|
[HCHK_STATUS_PROCOK] = { CHK_RES_PASSED, "PROCOK", "External check passed" },
|
2009-09-23 16:09:24 -04:00
|
|
|
};
|
|
|
|
|
|
2014-12-27 16:28:38 -05:00
|
|
|
const struct extcheck_env extcheck_envs[EXTCHK_SIZE] = {
|
|
|
|
|
[EXTCHK_PATH] = { "PATH", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_PROXY_NAME] = { "HAPROXY_PROXY_NAME", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_PROXY_ID] = { "HAPROXY_PROXY_ID", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_PROXY_ADDR] = { "HAPROXY_PROXY_ADDR", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_PROXY_PORT] = { "HAPROXY_PROXY_PORT", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_SERVER_NAME] = { "HAPROXY_SERVER_NAME", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_SERVER_ID] = { "HAPROXY_SERVER_ID", EXTCHK_SIZE_EVAL_INIT },
|
2020-04-26 03:50:31 -04:00
|
|
|
[EXTCHK_HAPROXY_SERVER_ADDR] = { "HAPROXY_SERVER_ADDR", EXTCHK_SIZE_ADDR },
|
|
|
|
|
[EXTCHK_HAPROXY_SERVER_PORT] = { "HAPROXY_SERVER_PORT", EXTCHK_SIZE_UINT },
|
2014-12-27 16:28:38 -05:00
|
|
|
[EXTCHK_HAPROXY_SERVER_MAXCONN] = { "HAPROXY_SERVER_MAXCONN", EXTCHK_SIZE_EVAL_INIT },
|
|
|
|
|
[EXTCHK_HAPROXY_SERVER_CURCONN] = { "HAPROXY_SERVER_CURCONN", EXTCHK_SIZE_ULONG },
|
|
|
|
|
};
|
|
|
|
|
|
2012-03-18 18:24:41 -04:00
|
|
|
static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
|
2009-12-15 16:31:24 -05:00
|
|
|
[HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
|
|
|
|
|
|
|
|
|
|
[HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
|
|
|
|
|
[HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
|
|
|
|
|
|
|
|
|
|
[HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
|
|
|
|
|
[HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
|
|
|
|
|
[HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
|
|
|
|
|
[HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
|
|
|
|
|
|
|
|
|
|
[HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
|
|
|
|
|
[HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
|
|
|
|
|
[HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
|
|
|
|
|
};
|
|
|
|
|
|
2019-12-27 06:03:27 -05:00
|
|
|
/* checks if <err> is a real error for errno or one that can be ignored, and
|
|
|
|
|
* return 0 for these ones or <err> for real ones.
|
|
|
|
|
*/
|
|
|
|
|
static inline int unclean_errno(int err)
|
|
|
|
|
{
|
|
|
|
|
if (err == EAGAIN || err == EINPROGRESS ||
|
|
|
|
|
err == EISCONN || err == EALREADY)
|
|
|
|
|
return 0;
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-23 16:09:24 -04:00
|
|
|
/*
|
|
|
|
|
* Convert check_status code to description
|
|
|
|
|
*/
|
|
|
|
|
const char *get_check_status_description(short check_status) {
|
|
|
|
|
|
|
|
|
|
const char *desc;
|
|
|
|
|
|
|
|
|
|
if (check_status < HCHK_STATUS_SIZE)
|
2009-09-27 09:50:02 -04:00
|
|
|
desc = check_statuses[check_status].desc;
|
2009-09-23 16:09:24 -04:00
|
|
|
else
|
|
|
|
|
desc = NULL;
|
|
|
|
|
|
|
|
|
|
if (desc && *desc)
|
|
|
|
|
return desc;
|
|
|
|
|
else
|
2009-09-27 09:50:02 -04:00
|
|
|
return check_statuses[HCHK_STATUS_UNKNOWN].desc;
|
2009-09-23 16:09:24 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Convert check_status code to short info
|
|
|
|
|
*/
|
|
|
|
|
const char *get_check_status_info(short check_status) {
|
|
|
|
|
|
|
|
|
|
const char *info;
|
|
|
|
|
|
|
|
|
|
if (check_status < HCHK_STATUS_SIZE)
|
2009-09-27 09:50:02 -04:00
|
|
|
info = check_statuses[check_status].info;
|
2009-09-23 16:09:24 -04:00
|
|
|
else
|
|
|
|
|
info = NULL;
|
|
|
|
|
|
|
|
|
|
if (info && *info)
|
|
|
|
|
return info;
|
|
|
|
|
else
|
2009-09-27 09:50:02 -04:00
|
|
|
return check_statuses[HCHK_STATUS_UNKNOWN].info;
|
2009-09-23 16:09:24 -04:00
|
|
|
}
|
|
|
|
|
|
2009-12-15 16:31:24 -05:00
|
|
|
const char *get_analyze_status(short analyze_status) {
|
|
|
|
|
|
|
|
|
|
const char *desc;
|
|
|
|
|
|
|
|
|
|
if (analyze_status < HANA_STATUS_SIZE)
|
|
|
|
|
desc = analyze_statuses[analyze_status].desc;
|
|
|
|
|
else
|
|
|
|
|
desc = NULL;
|
|
|
|
|
|
|
|
|
|
if (desc && *desc)
|
|
|
|
|
return desc;
|
|
|
|
|
else
|
|
|
|
|
return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-23 16:09:24 -04:00
|
|
|
/*
|
2013-02-23 01:35:38 -05:00
|
|
|
* Set check->status, update check->duration and fill check->result with
|
2014-05-20 08:55:13 -04:00
|
|
|
* an adequate CHK_RES_* value. The new check->health is computed based
|
|
|
|
|
* on the result.
|
2009-09-27 09:50:02 -04:00
|
|
|
*
|
|
|
|
|
* Show information in logs about failed health check if server is UP
|
|
|
|
|
* or succeeded health checks if server is DOWN.
|
2009-09-23 16:09:24 -04:00
|
|
|
*/
|
2013-02-23 01:35:38 -05:00
|
|
|
static void set_server_check_status(struct check *check, short status, const char *desc)
|
2012-10-29 11:51:55 -04:00
|
|
|
{
|
2013-02-23 01:35:38 -05:00
|
|
|
struct server *s = check->server;
|
2014-05-13 15:01:39 -04:00
|
|
|
short prev_status = check->status;
|
2014-05-20 08:55:13 -04:00
|
|
|
int report = 0;
|
2013-02-23 01:35:38 -05:00
|
|
|
|
2009-09-27 09:50:02 -04:00
|
|
|
if (status == HCHK_STATUS_START) {
|
2013-12-11 11:09:34 -05:00
|
|
|
check->result = CHK_RES_UNKNOWN; /* no result yet */
|
2013-02-23 01:35:38 -05:00
|
|
|
check->desc[0] = '\0';
|
|
|
|
|
check->start = now;
|
2009-09-27 09:50:02 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
if (!check->status)
|
2009-09-27 09:50:02 -04:00
|
|
|
return;
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2009-10-10 15:06:49 -04:00
|
|
|
if (desc && *desc) {
|
2013-02-23 01:35:38 -05:00
|
|
|
strncpy(check->desc, desc, HCHK_DESC_LEN-1);
|
|
|
|
|
check->desc[HCHK_DESC_LEN-1] = '\0';
|
2009-10-10 15:06:49 -04:00
|
|
|
} else
|
2013-02-23 01:35:38 -05:00
|
|
|
check->desc[0] = '\0';
|
2009-10-10 15:06:49 -04:00
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
check->status = status;
|
2009-09-27 09:50:02 -04:00
|
|
|
if (check_statuses[status].result)
|
2013-02-23 01:35:38 -05:00
|
|
|
check->result = check_statuses[status].result;
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2009-12-15 16:31:24 -05:00
|
|
|
if (status == HCHK_STATUS_HANA)
|
2013-02-23 01:35:38 -05:00
|
|
|
check->duration = -1;
|
|
|
|
|
else if (!tv_iszero(&check->start)) {
|
2009-09-27 09:50:02 -04:00
|
|
|
/* set_server_check_status() may be called more than once */
|
2013-02-23 01:35:38 -05:00
|
|
|
check->duration = tv_ms_elapsed(&check->start, &now);
|
|
|
|
|
tv_zero(&check->start);
|
2009-09-27 09:50:02 -04:00
|
|
|
}
|
|
|
|
|
|
2014-05-20 14:56:30 -04:00
|
|
|
/* no change is expected if no state change occurred */
|
|
|
|
|
if (check->result == CHK_RES_NEUTRAL)
|
|
|
|
|
return;
|
|
|
|
|
|
2019-01-11 12:43:04 -05:00
|
|
|
/* If the check was really just sending a mail, it won't have an
|
|
|
|
|
* associated server, so we're done now.
|
|
|
|
|
*/
|
|
|
|
|
if (!s)
|
|
|
|
|
return;
|
2014-05-20 08:55:13 -04:00
|
|
|
report = 0;
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
switch (check->result) {
|
|
|
|
|
case CHK_RES_FAILED:
|
2014-05-23 05:32:36 -04:00
|
|
|
/* Failure to connect to the agent as a secondary check should not
|
|
|
|
|
* cause the server to be marked down.
|
|
|
|
|
*/
|
|
|
|
|
if ((!(check->state & CHK_ST_AGENT) ||
|
2015-02-25 21:26:17 -05:00
|
|
|
(check->status >= HCHK_STATUS_L57DATA)) &&
|
BUG/MINOR: checks: Fix check->health computation for flapping servers
This patch fixes an old bug introduced in the commit 7b1d47ce ("MAJOR: checks:
move health checks changes to set_server_check_status()"). When a DOWN server is
flapping, everytime a check succeds, check->health is incremented. But when a
check fails, it is decremented only when it is higher than the rise value. So if
only one check succeds for a DOWN server, check->health will remain set to 1 for
all subsequent failing checks.
So, at first glance, it seems not that terrible because the server remains
DOWN. But it is reported in the transitional state "DOWN server, going up". And
it will remain in this state until it is UP again. And there is also an
insidious side effect. If a DOWN server is flapping time to time, It will end to
be considered UP after a uniq successful check, , regardless the rise threshold,
because check->health will be increased slowly and never decreased.
To fix the bug, we just need to reset check->health to 0 when a check fails for
a DOWN server. To do so, we just need to relax the condition to handle a failure
in the function set_server_check_status.
This patch must be backported to haproxy 1.5 and newer.
2018-05-02 06:12:45 -04:00
|
|
|
(check->health > 0)) {
|
2019-03-08 12:49:32 -05:00
|
|
|
_HA_ATOMIC_ADD(&s->counters.failed_checks, 1);
|
2014-05-20 08:55:13 -04:00
|
|
|
report = 1;
|
|
|
|
|
check->health--;
|
|
|
|
|
if (check->health < check->rise)
|
|
|
|
|
check->health = 0;
|
|
|
|
|
}
|
|
|
|
|
break;
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
case CHK_RES_PASSED:
|
|
|
|
|
case CHK_RES_CONDPASS: /* "condpass" cannot make the first step but it OK after a "passed" */
|
|
|
|
|
if ((check->health < check->rise + check->fall - 1) &&
|
|
|
|
|
(check->result == CHK_RES_PASSED || check->health > 0)) {
|
|
|
|
|
report = 1;
|
|
|
|
|
check->health++;
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
if (check->health >= check->rise)
|
|
|
|
|
check->health = check->rise + check->fall - 1; /* OK now */
|
|
|
|
|
}
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
/* clear consecutive_errors if observing is enabled */
|
|
|
|
|
if (s->onerror)
|
|
|
|
|
s->consecutive_errors = 0;
|
|
|
|
|
break;
|
2009-12-15 16:31:24 -05:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
if (s->proxy->options2 & PR_O2_LOGHCHKS &&
|
|
|
|
|
(status != prev_status || report)) {
|
|
|
|
|
chunk_printf(&trash,
|
2014-05-23 05:32:36 -04:00
|
|
|
"%s check for %sserver %s/%s %s%s",
|
|
|
|
|
(check->state & CHK_ST_AGENT) ? "Agent" : "Health",
|
2014-05-13 09:54:22 -04:00
|
|
|
s->flags & SRV_F_BACKUP ? "backup " : "",
|
2012-10-29 11:51:55 -04:00
|
|
|
s->proxy->id, s->id,
|
2013-12-11 11:09:34 -05:00
|
|
|
(check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
|
2014-05-20 08:55:13 -04:00
|
|
|
(check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2017-10-19 08:42:30 -04:00
|
|
|
srv_append_status(&trash, s, check, -1, 0);
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2012-10-29 11:51:55 -04:00
|
|
|
chunk_appendf(&trash, ", status: %d/%d %s",
|
2014-05-20 08:55:13 -04:00
|
|
|
(check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
|
|
|
|
|
(check->health >= check->rise) ? check->fall : check->rise,
|
|
|
|
|
(check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
|
2009-09-27 09:50:02 -04:00
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
ha_warning("%s.\n", trash.area);
|
|
|
|
|
send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
|
|
|
|
|
send_email_alert(s, LOG_INFO, "%s", trash.area);
|
2009-09-27 09:50:02 -04:00
|
|
|
}
|
2009-09-23 16:09:24 -04:00
|
|
|
}
|
|
|
|
|
|
2014-05-20 16:32:27 -04:00
|
|
|
/* Marks the check <check>'s server down if the current check is already failed
|
|
|
|
|
* and the server is not down yet nor in maintenance.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2014-05-20 16:32:27 -04:00
|
|
|
static void check_notify_failure(struct check *check)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2013-02-23 01:35:38 -05:00
|
|
|
struct server *s = check->server;
|
2011-06-21 01:34:58 -04:00
|
|
|
|
2014-05-20 08:55:13 -04:00
|
|
|
/* The agent secondary check should only cause a server to be marked
|
|
|
|
|
* as down if check->status is HCHK_STATUS_L7STS, which indicates
|
|
|
|
|
* that the agent returned "fail", "stopped" or "down".
|
|
|
|
|
* The implication here is that failure to connect to the agent
|
|
|
|
|
* as a secondary check should not cause the server to be marked
|
|
|
|
|
* down. */
|
|
|
|
|
if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
|
|
|
|
|
return;
|
|
|
|
|
|
2014-05-20 16:32:27 -04:00
|
|
|
if (check->health > 0)
|
|
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2014-05-20 16:32:27 -04:00
|
|
|
/* We only report a reason for the check if we did not do so previously */
|
2017-10-19 08:42:30 -04:00
|
|
|
srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2014-05-16 11:37:50 -04:00
|
|
|
/* Marks the check <check> as valid and tries to set its server up, provided
|
2014-05-21 04:30:54 -04:00
|
|
|
* it isn't in maintenance, it is not tracking a down server and other checks
|
|
|
|
|
* comply. The rule is simple : by default, a server is up, unless any of the
|
|
|
|
|
* following conditions is true :
|
|
|
|
|
* - health check failed (check->health < rise)
|
|
|
|
|
* - agent check failed (agent->health < rise)
|
|
|
|
|
* - the server tracks a down server (track && track->state == STOPPED)
|
|
|
|
|
* Note that if the server has a slowstart, it will switch to STARTING instead
|
|
|
|
|
* of RUNNING. Also, only the health checks support the nolb mode, so the
|
|
|
|
|
* agent's success may not take the server out of this mode.
|
2014-05-16 11:37:50 -04:00
|
|
|
*/
|
2014-05-21 04:30:54 -04:00
|
|
|
static void check_notify_success(struct check *check)
|
2014-05-16 11:37:50 -04:00
|
|
|
{
|
2013-02-23 01:35:38 -05:00
|
|
|
struct server *s = check->server;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->next_admin & SRV_ADMF_MAINT)
|
2014-05-16 11:37:50 -04:00
|
|
|
return;
|
2010-01-31 16:34:03 -05:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->track && s->track->next_state == SRV_ST_STOPPED)
|
2014-05-16 11:37:50 -04:00
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2014-05-21 04:30:54 -04:00
|
|
|
if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
|
|
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2014-05-21 04:30:54 -04:00
|
|
|
if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
|
|
|
|
|
return;
|
2014-05-16 11:37:50 -04:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
|
2014-05-21 04:30:54 -04:00
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2017-10-19 08:42:30 -04:00
|
|
|
srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
|
2008-02-17 19:26:35 -05:00
|
|
|
}
|
|
|
|
|
|
2014-05-21 07:57:23 -04:00
|
|
|
/* Marks the check <check> as valid and tries to set its server into stopping mode
|
|
|
|
|
* if it was running or starting, and provided it isn't in maintenance and other
|
|
|
|
|
* checks comply. The conditions for the server to be marked in stopping mode are
|
|
|
|
|
* the same as for it to be turned up. Also, only the health checks support the
|
|
|
|
|
* nolb mode.
|
2014-05-16 11:37:50 -04:00
|
|
|
*/
|
2014-05-21 07:57:23 -04:00
|
|
|
static void check_notify_stopping(struct check *check)
|
2014-05-16 11:37:50 -04:00
|
|
|
{
|
2013-02-23 01:35:38 -05:00
|
|
|
struct server *s = check->server;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->next_admin & SRV_ADMF_MAINT)
|
2014-05-16 11:37:50 -04:00
|
|
|
return;
|
|
|
|
|
|
2014-05-21 07:57:23 -04:00
|
|
|
if (check->state & CHK_ST_AGENT)
|
|
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->track && s->track->next_state == SRV_ST_STOPPED)
|
2014-05-21 07:57:23 -04:00
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2014-05-21 07:57:23 -04:00
|
|
|
if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
|
|
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2014-05-21 07:57:23 -04:00
|
|
|
if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
|
|
|
|
|
return;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2017-12-23 05:16:49 -05:00
|
|
|
srv_set_stopping(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL);
|
2008-02-17 19:26:35 -05:00
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2013-12-31 17:47:37 -05:00
|
|
|
/* note: use health_adjust() only, which first checks that the observe mode is
|
|
|
|
|
* enabled.
|
|
|
|
|
*/
|
|
|
|
|
void __health_adjust(struct server *s, short status)
|
2012-10-29 11:51:55 -04:00
|
|
|
{
|
2009-12-15 16:31:24 -05:00
|
|
|
int failed;
|
|
|
|
|
int expire;
|
|
|
|
|
|
|
|
|
|
if (s->observe >= HANA_OBS_SIZE)
|
|
|
|
|
return;
|
|
|
|
|
|
2013-01-23 18:37:39 -05:00
|
|
|
if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
|
2009-12-15 16:31:24 -05:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
switch (analyze_statuses[status].lr[s->observe - 1]) {
|
|
|
|
|
case 1:
|
|
|
|
|
failed = 1;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
failed = 0;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!failed) {
|
|
|
|
|
/* good: clear consecutive_errors */
|
|
|
|
|
s->consecutive_errors = 0;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-08 12:49:32 -05:00
|
|
|
_HA_ATOMIC_ADD(&s->consecutive_errors, 1);
|
2009-12-15 16:31:24 -05:00
|
|
|
|
|
|
|
|
if (s->consecutive_errors < s->consecutive_errors_limit)
|
|
|
|
|
return;
|
|
|
|
|
|
2012-10-29 11:51:55 -04:00
|
|
|
chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
|
|
|
|
|
s->consecutive_errors, get_analyze_status(status));
|
2009-12-15 16:31:24 -05:00
|
|
|
|
|
|
|
|
switch (s->onerror) {
|
|
|
|
|
case HANA_ONERR_FASTINTER:
|
|
|
|
|
/* force fastinter - nothing to do here as all modes force it */
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case HANA_ONERR_SUDDTH:
|
|
|
|
|
/* simulate a pre-fatal failed health check */
|
2013-11-24 20:46:38 -05:00
|
|
|
if (s->check.health > s->check.rise)
|
|
|
|
|
s->check.health = s->check.rise + 1;
|
2009-12-15 16:31:24 -05:00
|
|
|
|
|
|
|
|
/* no break - fall through */
|
|
|
|
|
|
|
|
|
|
case HANA_ONERR_FAILCHK:
|
|
|
|
|
/* simulate a failed health check */
|
2018-07-13 04:54:26 -04:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_HANA,
|
|
|
|
|
trash.area);
|
2014-05-20 16:32:27 -04:00
|
|
|
check_notify_failure(&s->check);
|
2009-12-15 16:31:24 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case HANA_ONERR_MARKDWN:
|
|
|
|
|
/* mark server down */
|
2013-11-24 20:46:38 -05:00
|
|
|
s->check.health = s->check.rise;
|
2018-07-13 04:54:26 -04:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_HANA,
|
|
|
|
|
trash.area);
|
2014-05-20 16:32:27 -04:00
|
|
|
check_notify_failure(&s->check);
|
2009-12-15 16:31:24 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
/* write a warning? */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s->consecutive_errors = 0;
|
2019-03-08 12:49:32 -05:00
|
|
|
_HA_ATOMIC_ADD(&s->counters.failed_hana, 1);
|
2009-12-15 16:31:24 -05:00
|
|
|
|
2013-02-22 20:16:43 -05:00
|
|
|
if (s->check.fastinter) {
|
|
|
|
|
expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
|
2013-09-21 05:05:00 -04:00
|
|
|
if (s->check.task->expire > expire) {
|
2012-09-28 09:01:02 -04:00
|
|
|
s->check.task->expire = expire;
|
2013-09-21 05:05:00 -04:00
|
|
|
/* requeue check task with new expire */
|
|
|
|
|
task_queue(s->check.task);
|
|
|
|
|
}
|
2009-12-15 16:31:24 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-14 09:04:54 -04:00
|
|
|
static int httpchk_build_status_header(struct server *s, char *buffer, int size)
|
2010-01-27 05:53:01 -05:00
|
|
|
{
|
|
|
|
|
int sv_state;
|
|
|
|
|
int ratio;
|
|
|
|
|
int hlen = 0;
|
2015-01-15 20:52:59 -05:00
|
|
|
char addr[46];
|
|
|
|
|
char port[6];
|
2010-01-27 05:53:01 -05:00
|
|
|
const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
|
|
|
|
|
"UP %d/%d", "UP",
|
|
|
|
|
"NOLB %d/%d", "NOLB",
|
|
|
|
|
"no check" };
|
|
|
|
|
|
|
|
|
|
memcpy(buffer + hlen, "X-Haproxy-Server-State: ", 24);
|
|
|
|
|
hlen += 24;
|
|
|
|
|
|
2013-12-11 14:36:34 -05:00
|
|
|
if (!(s->check.state & CHK_ST_ENABLED))
|
|
|
|
|
sv_state = 6;
|
2017-08-31 08:41:55 -04:00
|
|
|
else if (s->cur_state != SRV_ST_STOPPED) {
|
2013-11-24 20:46:38 -05:00
|
|
|
if (s->check.health == s->check.rise + s->check.fall - 1)
|
2010-01-27 05:53:01 -05:00
|
|
|
sv_state = 3; /* UP */
|
|
|
|
|
else
|
|
|
|
|
sv_state = 2; /* going down */
|
|
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->cur_state == SRV_ST_STOPPING)
|
2010-01-27 05:53:01 -05:00
|
|
|
sv_state += 2;
|
|
|
|
|
} else {
|
2013-02-24 03:23:38 -05:00
|
|
|
if (s->check.health)
|
2010-01-27 05:53:01 -05:00
|
|
|
sv_state = 1; /* going up */
|
|
|
|
|
else
|
|
|
|
|
sv_state = 0; /* DOWN */
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-14 09:04:54 -04:00
|
|
|
hlen += snprintf(buffer + hlen, size - hlen,
|
2010-01-27 05:53:01 -05:00
|
|
|
srv_hlt_st[sv_state],
|
2017-08-31 08:41:55 -04:00
|
|
|
(s->cur_state != SRV_ST_STOPPED) ? (s->check.health - s->check.rise + 1) : (s->check.health),
|
|
|
|
|
(s->cur_state != SRV_ST_STOPPED) ? (s->check.fall) : (s->check.rise));
|
2010-01-27 05:53:01 -05:00
|
|
|
|
2015-01-15 20:52:59 -05:00
|
|
|
addr_to_str(&s->addr, addr, sizeof(addr));
|
2017-01-06 11:41:29 -05:00
|
|
|
if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
|
|
|
|
|
snprintf(port, sizeof(port), "%u", s->svc_port);
|
|
|
|
|
else
|
|
|
|
|
*port = 0;
|
2015-01-15 20:52:59 -05:00
|
|
|
|
|
|
|
|
hlen += snprintf(buffer + hlen, size - hlen, "; address=%s; port=%s; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
|
|
|
|
|
addr, port, s->proxy->id, s->id,
|
2010-01-27 05:53:01 -05:00
|
|
|
global.node,
|
2017-08-31 08:41:55 -04:00
|
|
|
(s->cur_eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
|
2010-01-27 05:53:01 -05:00
|
|
|
(s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
|
|
|
|
|
s->cur_sess, s->proxy->beconn - s->proxy->nbpend,
|
|
|
|
|
s->nbpend);
|
|
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if ((s->cur_state == SRV_ST_STARTING) &&
|
2010-01-27 05:53:01 -05:00
|
|
|
now.tv_sec < s->last_change + s->slowstart &&
|
|
|
|
|
now.tv_sec >= s->last_change) {
|
|
|
|
|
ratio = MAX(1, 100 * (now.tv_sec - s->last_change) / s->slowstart);
|
2014-04-14 09:04:54 -04:00
|
|
|
hlen += snprintf(buffer + hlen, size - hlen, "; throttle=%d%%", ratio);
|
2010-01-27 05:53:01 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buffer[hlen++] = '\r';
|
|
|
|
|
buffer[hlen++] = '\n';
|
|
|
|
|
|
|
|
|
|
return hlen;
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-04 18:31:46 -05:00
|
|
|
/* Check the connection. If an error has already been reported or the socket is
|
|
|
|
|
* closed, keep errno intact as it is supposed to contain the valid error code.
|
|
|
|
|
* If no error is reported, check the socket's error queue using getsockopt().
|
|
|
|
|
* Warning, this must be done only once when returning from poll, and never
|
|
|
|
|
* after an I/O error was attempted, otherwise the error queue might contain
|
|
|
|
|
* inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
|
|
|
|
|
* socket. Returns non-zero if an error was reported, zero if everything is
|
|
|
|
|
* clean (including a properly closed socket).
|
|
|
|
|
*/
|
|
|
|
|
static int retrieve_errno_from_socket(struct connection *conn)
|
|
|
|
|
{
|
|
|
|
|
int skerr;
|
|
|
|
|
socklen_t lskerr = sizeof(skerr);
|
|
|
|
|
|
2019-12-27 06:03:27 -05:00
|
|
|
if (conn->flags & CO_FL_ERROR && (unclean_errno(errno) || !conn->ctrl))
|
2013-12-04 18:31:46 -05:00
|
|
|
return 1;
|
|
|
|
|
|
2014-01-23 07:50:42 -05:00
|
|
|
if (!conn_ctrl_ready(conn))
|
2013-12-04 18:31:46 -05:00
|
|
|
return 0;
|
|
|
|
|
|
2017-08-24 08:31:19 -04:00
|
|
|
if (getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
|
2013-12-04 18:31:46 -05:00
|
|
|
errno = skerr;
|
|
|
|
|
|
2019-12-27 06:03:27 -05:00
|
|
|
errno = unclean_errno(errno);
|
2013-12-04 18:31:46 -05:00
|
|
|
|
|
|
|
|
if (!errno) {
|
|
|
|
|
/* we could not retrieve an error, that does not mean there is
|
|
|
|
|
* none. Just don't change anything and only report the prior
|
|
|
|
|
* error if any.
|
|
|
|
|
*/
|
|
|
|
|
if (conn->flags & CO_FL_ERROR)
|
|
|
|
|
return 1;
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-04 05:17:05 -05:00
|
|
|
/* Try to collect as much information as possible on the connection status,
|
|
|
|
|
* and adjust the server status accordingly. It may make use of <errno_bck>
|
|
|
|
|
* if non-null when the caller is absolutely certain of its validity (eg:
|
|
|
|
|
* checked just after a syscall). If the caller doesn't have a valid errno,
|
|
|
|
|
* it can pass zero, and retrieve_errno_from_socket() will be called to try
|
|
|
|
|
* to extract errno from the socket. If no error is reported, it will consider
|
|
|
|
|
* the <expired> flag. This is intended to be used when a connection error was
|
|
|
|
|
* reported in conn->flags or when a timeout was reported in <expired>. The
|
|
|
|
|
* function takes care of not updating a server status which was already set.
|
|
|
|
|
* All situations where at least one of <expired> or CO_FL_ERROR are set
|
|
|
|
|
* produce a status.
|
|
|
|
|
*/
|
2017-10-04 08:47:29 -04:00
|
|
|
static void chk_report_conn_err(struct check *check, int errno_bck, int expired)
|
2013-12-04 05:17:05 -05:00
|
|
|
{
|
2017-09-13 12:30:23 -04:00
|
|
|
struct conn_stream *cs = check->cs;
|
|
|
|
|
struct connection *conn = cs_conn(cs);
|
2013-12-04 05:17:05 -05:00
|
|
|
const char *err_msg;
|
2018-07-13 05:56:34 -04:00
|
|
|
struct buffer *chk;
|
2014-10-02 08:51:02 -04:00
|
|
|
int step;
|
2015-05-01 02:03:04 -04:00
|
|
|
char *comment;
|
2013-12-04 05:17:05 -05:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
if (check->result != CHK_RES_UNKNOWN)
|
2013-12-04 05:17:05 -05:00
|
|
|
return;
|
|
|
|
|
|
2019-12-27 06:03:27 -05:00
|
|
|
errno = unclean_errno(errno_bck);
|
|
|
|
|
if (conn && errno)
|
2013-12-04 05:17:05 -05:00
|
|
|
retrieve_errno_from_socket(conn);
|
|
|
|
|
|
2017-10-16 09:17:17 -04:00
|
|
|
if (conn && !(conn->flags & CO_FL_ERROR) &&
|
|
|
|
|
!(cs->flags & CS_FL_ERROR) && !expired)
|
2013-12-04 05:17:05 -05:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* we'll try to build a meaningful error message depending on the
|
|
|
|
|
* context of the error possibly present in conn->err_code, and the
|
|
|
|
|
* socket error possibly collected above. This is useful to know the
|
|
|
|
|
* exact step of the L6 layer (eg: SSL handshake).
|
|
|
|
|
*/
|
2013-10-06 17:24:13 -04:00
|
|
|
chk = get_trash_chunk();
|
|
|
|
|
|
|
|
|
|
if (check->type == PR_O2_TCPCHK_CHK) {
|
2020-03-30 05:05:10 -04:00
|
|
|
step = tcpcheck_get_step_id(check, NULL);
|
2014-10-02 08:51:02 -04:00
|
|
|
if (!step)
|
|
|
|
|
chunk_printf(chk, " at initial connection step of tcp-check");
|
|
|
|
|
else {
|
|
|
|
|
chunk_printf(chk, " at step %d of tcp-check", step);
|
|
|
|
|
/* we were looking for a string */
|
2020-03-30 05:05:10 -04:00
|
|
|
if (check->current_step && check->current_step->action == TCPCHK_ACT_CONNECT) {
|
|
|
|
|
if (check->current_step->connect.port)
|
|
|
|
|
chunk_appendf(chk, " (connect port %d)" ,check->current_step->connect.port);
|
2014-10-02 08:51:02 -04:00
|
|
|
else
|
|
|
|
|
chunk_appendf(chk, " (connect)");
|
|
|
|
|
}
|
2020-03-30 05:05:10 -04:00
|
|
|
else if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
|
|
|
|
|
struct tcpcheck_expect *expect = &check->current_step->expect;
|
2020-02-07 09:37:17 -05:00
|
|
|
|
|
|
|
|
switch (expect->type) {
|
|
|
|
|
case TCPCHK_EXPECT_STRING:
|
|
|
|
|
chunk_appendf(chk, " (expect string '%s')", expect->string);
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_BINARY:
|
|
|
|
|
chunk_appendf(chk, " (expect binary '%s')", expect->string);
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_REGEX:
|
2014-10-02 08:51:02 -04:00
|
|
|
chunk_appendf(chk, " (expect regex)");
|
2020-02-07 09:37:17 -05:00
|
|
|
break;
|
2020-02-07 09:37:17 -05:00
|
|
|
case TCPCHK_EXPECT_REGEX_BINARY:
|
|
|
|
|
chunk_appendf(chk, " (expect binary regex)");
|
|
|
|
|
break;
|
2020-02-07 09:37:17 -05:00
|
|
|
case TCPCHK_EXPECT_UNDEF:
|
|
|
|
|
chunk_appendf(chk, " (undefined expect!)");
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-10-02 08:51:02 -04:00
|
|
|
}
|
2020-03-30 05:05:10 -04:00
|
|
|
else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
|
2014-10-02 08:51:02 -04:00
|
|
|
chunk_appendf(chk, " (send)");
|
|
|
|
|
}
|
2015-05-01 02:03:04 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
comment = tcpcheck_get_step_comment(check, NULL);
|
2015-05-01 02:03:04 -04:00
|
|
|
if (comment)
|
|
|
|
|
chunk_appendf(chk, " comment: '%s'", comment);
|
2013-10-06 17:24:13 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
if (conn && conn->err_code) {
|
2019-12-27 06:03:27 -05:00
|
|
|
if (unclean_errno(errno))
|
2018-07-13 04:54:26 -04:00
|
|
|
chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno),
|
|
|
|
|
chk->area);
|
2013-12-04 05:17:05 -05:00
|
|
|
else
|
2018-07-13 04:54:26 -04:00
|
|
|
chunk_printf(&trash, "%s%s", conn_err_code_str(conn),
|
|
|
|
|
chk->area);
|
|
|
|
|
err_msg = trash.area;
|
2013-12-04 05:17:05 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2019-12-27 06:03:27 -05:00
|
|
|
if (unclean_errno(errno)) {
|
2018-07-13 04:54:26 -04:00
|
|
|
chunk_printf(&trash, "%s%s", strerror(errno),
|
|
|
|
|
chk->area);
|
|
|
|
|
err_msg = trash.area;
|
2013-12-04 05:17:05 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2018-07-13 04:54:26 -04:00
|
|
|
err_msg = chk->area;
|
2013-12-04 05:17:05 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
if (check->state & CHK_ST_PORT_MISS) {
|
2016-06-13 08:15:41 -04:00
|
|
|
/* NOTE: this is reported after <fall> tries */
|
|
|
|
|
chunk_printf(chk, "No port available for the TCP connection");
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
if (!conn) {
|
|
|
|
|
/* connection allocation error before the connection was established */
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
|
|
|
|
|
}
|
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_*
Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done
callback.") broke the master CLI for a very obscure reason. It happens
that short requests immediately terminated by a shutdown are properly
received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain
from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not
yet set on the connection since we've not passed again through
conn_fd_handler() and it was not done in conn_complete_session(). While
commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in
conn_complete_session()") fixed the issue, such accident may happen
again as the root cause is deeper and actually comes down to the fact
that CO_FL_CONNECTED is lazily set at various check points in the code
but not every time we drop one wait bit. It is not the first time we
face this situation.
Originally this flag was used to detect the transition between WAIT_*
and CONNECTED in order to call ->wake() from the FD handler. But since
at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update
CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is
always synchronized against the two others before being checked. Moreover,
with the I/Os moved to tasklets, the decision to call the ->wake() function
is performed after the I/Os in si_cs_process() and equivalent, which don't
care about this transition either.
So in essence, checking for CO_FL_CONNECTED has become a lazy wait to
check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always
relies on someone else having synchronized it.
This patch addresses it once for all by killing this flag and only checking
the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This
revealed a number of inconsistencies that were purposely not addressed here
for the sake of bisectability:
- while most places do check both L4+L6 and HANDSHAKE at the same time,
some places like assign_server() or back_handle_st_con() and a few
sample fetches looking for proxy protocol do check for L4+L6 but
don't care about HANDSHAKE ; these ones will probably fail on TCP
request session rules if the handshake is not complete.
- some handshake handlers do validate that a connection is established
at L4 but didn't clear CO_FL_WAIT_L4_CONN
- the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6
before declaring the mux ready while the snd_buf function also checks
for the handshake's completion. Likely the former should validate the
handshake as well and we should get rid of these extra tests in snd_buf.
- raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only
later clear CO_FL_WAIT_L4_CONN.
- xprt_handshake would set CO_FL_CONNECTED itself without actually
clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if
waiting for a pure Rx handshake.
- most places in ssl_sock that were checking CO_FL_CONNECTED don't need
to include the L4 check as an L6 check is enough to decide whether to
wait for more info or not.
It also becomes obvious when reading the test in si_cs_recv() that caused
the failure mentioned above that once converted it doesn't make any sense
anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete
cannot happen since for CS_FL_EOS to be set, the other ones must have been
validated.
Some of these parts will still deserve further cleanup, and some of the
observations above may induce some backports of potential bug fixes once
totally analyzed in their context. The risk of breaking existing stuff
is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
|
|
|
else if (conn->flags & CO_FL_WAIT_L4_CONN) {
|
2013-12-04 05:17:05 -05:00
|
|
|
/* L4 not established (yet) */
|
2017-10-16 09:17:17 -04:00
|
|
|
if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)
|
2013-12-04 05:17:05 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
|
|
|
|
|
else if (expired)
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
|
2015-04-13 19:15:08 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* might be due to a server IP change.
|
|
|
|
|
* Let's trigger a DNS resolution if none are currently running.
|
|
|
|
|
*/
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
dns_trigger_resolution(check->server->dns_requester);
|
2015-04-13 19:15:08 -04:00
|
|
|
|
2013-12-04 05:17:05 -05:00
|
|
|
}
|
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_*
Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done
callback.") broke the master CLI for a very obscure reason. It happens
that short requests immediately terminated by a shutdown are properly
received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain
from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not
yet set on the connection since we've not passed again through
conn_fd_handler() and it was not done in conn_complete_session(). While
commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in
conn_complete_session()") fixed the issue, such accident may happen
again as the root cause is deeper and actually comes down to the fact
that CO_FL_CONNECTED is lazily set at various check points in the code
but not every time we drop one wait bit. It is not the first time we
face this situation.
Originally this flag was used to detect the transition between WAIT_*
and CONNECTED in order to call ->wake() from the FD handler. But since
at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update
CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is
always synchronized against the two others before being checked. Moreover,
with the I/Os moved to tasklets, the decision to call the ->wake() function
is performed after the I/Os in si_cs_process() and equivalent, which don't
care about this transition either.
So in essence, checking for CO_FL_CONNECTED has become a lazy wait to
check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always
relies on someone else having synchronized it.
This patch addresses it once for all by killing this flag and only checking
the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This
revealed a number of inconsistencies that were purposely not addressed here
for the sake of bisectability:
- while most places do check both L4+L6 and HANDSHAKE at the same time,
some places like assign_server() or back_handle_st_con() and a few
sample fetches looking for proxy protocol do check for L4+L6 but
don't care about HANDSHAKE ; these ones will probably fail on TCP
request session rules if the handshake is not complete.
- some handshake handlers do validate that a connection is established
at L4 but didn't clear CO_FL_WAIT_L4_CONN
- the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6
before declaring the mux ready while the snd_buf function also checks
for the handshake's completion. Likely the former should validate the
handshake as well and we should get rid of these extra tests in snd_buf.
- raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only
later clear CO_FL_WAIT_L4_CONN.
- xprt_handshake would set CO_FL_CONNECTED itself without actually
clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if
waiting for a pure Rx handshake.
- most places in ssl_sock that were checking CO_FL_CONNECTED don't need
to include the L4 check as an L6 check is enough to decide whether to
wait for more info or not.
It also becomes obvious when reading the test in si_cs_recv() that caused
the failure mentioned above that once converted it doesn't make any sense
anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete
cannot happen since for CS_FL_EOS to be set, the other ones must have been
validated.
Some of these parts will still deserve further cleanup, and some of the
observations above may induce some backports of potential bug fixes once
totally analyzed in their context. The risk of breaking existing stuff
is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
|
|
|
else if (conn->flags & CO_FL_WAIT_L6_CONN) {
|
2013-12-04 05:17:05 -05:00
|
|
|
/* L6 not established (yet) */
|
2017-10-16 09:17:17 -04:00
|
|
|
if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)
|
2013-12-04 05:17:05 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
|
|
|
|
|
else if (expired)
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
|
|
|
|
|
}
|
2017-10-16 09:17:17 -04:00
|
|
|
else if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR) {
|
2013-12-04 05:17:05 -05:00
|
|
|
/* I/O error after connection was established and before we could diagnose */
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
|
|
|
|
|
}
|
|
|
|
|
else if (expired) {
|
2020-04-01 05:04:52 -04:00
|
|
|
enum healthcheck_status tout = HCHK_STATUS_L7TOUT;
|
|
|
|
|
|
2013-12-04 05:17:05 -05:00
|
|
|
/* connection established but expired check */
|
|
|
|
|
if (check->type == PR_O2_SSL3_CHK)
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
|
2020-04-01 05:04:52 -04:00
|
|
|
else { /* HTTP, SMTP, ... */
|
|
|
|
|
if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT)
|
|
|
|
|
tout = check->current_step->expect.tout_status;
|
|
|
|
|
set_server_check_status(check, tout, err_msg);
|
|
|
|
|
}
|
2013-12-04 05:17:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-14 11:04:58 -04:00
|
|
|
/* This function checks if any I/O is wanted, and if so, attempts to do so */
|
|
|
|
|
static struct task *event_srv_chk_io(struct task *t, void *ctx, unsigned short state)
|
2018-07-17 12:49:38 -04:00
|
|
|
{
|
2018-09-12 09:15:12 -04:00
|
|
|
struct check *check = ctx;
|
|
|
|
|
struct conn_stream *cs = check->cs;
|
2019-01-11 12:43:04 -05:00
|
|
|
struct email_alertq *q = container_of(check, typeof(*q), check);
|
2019-07-09 11:28:51 -04:00
|
|
|
int ret = 0;
|
2018-08-28 13:36:18 -04:00
|
|
|
|
2018-12-19 07:59:17 -05:00
|
|
|
if (!(check->wait_list.events & SUB_RETRY_SEND))
|
2019-07-09 11:28:51 -04:00
|
|
|
ret = wake_srv_chk(cs);
|
|
|
|
|
if (ret == 0 && !(check->wait_list.events & SUB_RETRY_RECV)) {
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
|
|
|
|
|
else
|
|
|
|
|
HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2018-08-09 07:06:55 -04:00
|
|
|
__event_srv_chk_r(cs);
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
|
|
|
|
|
else
|
|
|
|
|
HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2018-08-09 07:06:55 -04:00
|
|
|
}
|
2018-07-17 12:49:38 -04:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* same as above but protected by the server lock.
|
2017-11-05 04:11:13 -05:00
|
|
|
*
|
|
|
|
|
* Please do NOT place any return statement in this function and only leave
|
2018-07-17 12:49:38 -04:00
|
|
|
* via the out label. NOTE THAT THIS FUNCTION DOESN'T LOCK, YOU PROBABLY WANT
|
|
|
|
|
* TO USE event_srv_chk_w() instead.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2018-07-17 12:49:38 -04:00
|
|
|
static void __event_srv_chk_w(struct conn_stream *cs)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2017-09-13 12:30:23 -04:00
|
|
|
struct connection *conn = cs->conn;
|
|
|
|
|
struct check *check = cs->data;
|
2013-02-23 01:35:38 -05:00
|
|
|
struct server *s = check->server;
|
|
|
|
|
struct task *t = check->task;
|
2012-09-28 08:40:02 -04:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
if (unlikely(check->result == CHK_RES_FAILED))
|
2013-12-04 05:17:05 -05:00
|
|
|
goto out_wakeup;
|
|
|
|
|
|
2013-12-04 18:31:46 -05:00
|
|
|
if (retrieve_errno_from_socket(conn)) {
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, errno, 0);
|
2013-12-04 18:31:46 -05:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
2010-01-02 16:03:01 -05:00
|
|
|
|
2013-12-04 19:53:08 -05:00
|
|
|
/* here, we know that the connection is established. That's enough for
|
|
|
|
|
* a pure TCP check.
|
|
|
|
|
*/
|
|
|
|
|
if (!check->type)
|
|
|
|
|
goto out_wakeup;
|
|
|
|
|
|
2017-10-04 05:58:22 -04:00
|
|
|
/* wake() will take care of calling tcpcheck_main() */
|
2017-11-05 04:11:13 -05:00
|
|
|
if (check->type == PR_O2_TCPCHK_CHK)
|
2018-07-17 12:49:38 -04:00
|
|
|
goto out;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (b_data(&check->bo)) {
|
2018-08-16 09:41:52 -04:00
|
|
|
cs->conn->mux->snd_buf(cs, &check->bo, b_data(&check->bo), 0);
|
2018-07-10 11:43:27 -04:00
|
|
|
b_realign_if_empty(&check->bo);
|
2017-10-16 09:17:17 -04:00
|
|
|
if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR) {
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, errno, 0);
|
2013-12-04 05:17:05 -05:00
|
|
|
goto out_wakeup;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
2018-07-17 12:49:38 -04:00
|
|
|
if (b_data(&check->bo)) {
|
2018-12-19 07:59:17 -05:00
|
|
|
conn->mux->subscribe(cs, SUB_RETRY_SEND, &check->wait_list);
|
2018-07-17 12:49:38 -04:00
|
|
|
goto out;
|
|
|
|
|
}
|
2013-12-04 05:17:05 -05:00
|
|
|
}
|
2011-03-10 16:26:24 -05:00
|
|
|
|
2013-12-04 05:17:05 -05:00
|
|
|
/* full request sent, we allow up to <timeout.check> if nonzero for a response */
|
|
|
|
|
if (s->proxy->timeout.check) {
|
|
|
|
|
t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
|
|
|
|
|
task_queue(t);
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
2018-10-10 09:46:36 -04:00
|
|
|
goto out;
|
2013-12-04 05:17:05 -05:00
|
|
|
|
2007-04-15 14:56:27 -04:00
|
|
|
out_wakeup:
|
2008-08-29 12:19:04 -04:00
|
|
|
task_wakeup(t, TASK_WOKEN_IO);
|
2018-07-17 12:49:38 -04:00
|
|
|
out:
|
|
|
|
|
return;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2006-07-09 10:42:34 -04:00
|
|
|
* This function is used only for server health-checks. It handles the server's
|
2010-10-18 09:58:36 -04:00
|
|
|
* reply to an HTTP request, SSL HELLO or MySQL client Auth. It calls
|
2013-02-23 01:35:38 -05:00
|
|
|
* set_server_check_status() to update check->status, check->duration
|
|
|
|
|
* and check->result.
|
2009-09-27 09:50:02 -04:00
|
|
|
|
|
|
|
|
* The set_server_check_status function is called with HCHK_STATUS_L7OKD if
|
|
|
|
|
* an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP server
|
|
|
|
|
* returns 2xx, HCHK_STATUS_L6OK if an SSL server returns at least 5 bytes in
|
|
|
|
|
* response to an SSL HELLO (the principle is that this is enough to
|
|
|
|
|
* distinguish between an SSL server and a pure TCP relay). All other cases will
|
|
|
|
|
* call it with a proper error status like HCHK_STATUS_L7STS, HCHK_STATUS_L6RSP,
|
|
|
|
|
* etc.
|
2017-11-05 04:11:13 -05:00
|
|
|
*
|
|
|
|
|
* Please do NOT place any return statement in this function and only leave
|
2018-08-09 07:06:55 -04:00
|
|
|
* via the out label.
|
|
|
|
|
*
|
|
|
|
|
* This must be called with the server lock held.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2018-08-09 07:06:55 -04:00
|
|
|
static void __event_srv_chk_r(struct conn_stream *cs)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2017-09-13 12:30:23 -04:00
|
|
|
struct connection *conn = cs->conn;
|
|
|
|
|
struct check *check = cs->data;
|
2013-02-23 01:35:38 -05:00
|
|
|
struct server *s = check->server;
|
|
|
|
|
struct task *t = check->task;
|
2009-10-10 15:06:49 -04:00
|
|
|
char *desc;
|
2010-03-17 16:52:07 -04:00
|
|
|
int done;
|
2010-09-29 12:17:05 -04:00
|
|
|
unsigned short msglen;
|
2007-04-15 14:56:27 -04:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
if (unlikely(check->result == CHK_RES_FAILED))
|
2013-12-04 18:31:46 -05:00
|
|
|
goto out_wakeup;
|
2007-04-15 14:56:27 -04:00
|
|
|
|
2017-10-04 05:58:22 -04:00
|
|
|
/* wake() will take care of calling tcpcheck_main() */
|
2017-11-05 04:11:13 -05:00
|
|
|
if (check->type == PR_O2_TCPCHK_CHK)
|
2018-08-09 07:06:55 -04:00
|
|
|
goto out;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2007-04-15 14:56:27 -04:00
|
|
|
/* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
|
|
|
|
|
* but the connection was closed on the remote end. Fortunately, recv still
|
|
|
|
|
* works correctly and we don't need to do the getsockopt() on linux.
|
|
|
|
|
*/
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
|
|
|
|
|
/* Set buffer to point to the end of the data already read, and check
|
|
|
|
|
* that there is free space remaining. If the buffer is full, proceed
|
|
|
|
|
* with running the checks without attempting another socket read.
|
|
|
|
|
*/
|
|
|
|
|
|
2010-03-17 16:52:07 -04:00
|
|
|
done = 0;
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
|
2018-08-16 09:30:32 -04:00
|
|
|
cs->conn->mux->rcv_buf(cs, &check->bi, b_size(&check->bi), 0);
|
2017-10-16 09:17:17 -04:00
|
|
|
if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH) || cs->flags & CS_FL_ERROR) {
|
2010-03-17 16:52:07 -04:00
|
|
|
done = 1;
|
2018-07-10 11:43:27 -04:00
|
|
|
if ((conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR) && !b_data(&check->bi)) {
|
2012-09-28 13:39:36 -04:00
|
|
|
/* Report network errors only if we got no other data. Otherwise
|
|
|
|
|
* we'll let the upper layers decide whether the response is OK
|
|
|
|
|
* or not. It is very common that an RST sent by the server is
|
|
|
|
|
* reported as an error just after the last data chunk.
|
|
|
|
|
*/
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, errno, 0);
|
2010-03-16 15:55:43 -04:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: checks: make sure the connection is ready before trying to recv
As identified in issue #278, the backport of commit c594039225 ("BUG/MINOR:
checks: do not uselessly poll for reads before the connection is up")
introduced a regression in 2.0 when default checks are enabled (not
"option tcp-check"), but it did not affect 2.1.
What happens is that in 2.0 and earlier we have the fd cache which makes
a speculative call to the I/O functions after an attempt to connect, and
the __event_srv_chk_r() function was absolutely not designed to be called
while a connection attempt is still pending. Thus what happens is that the
test for success/failure expects the verdict to be final before waking up
the check task, and since the connection is not yet validated, it fails.
It will usually work over the loopback depending on scheduling, which is
why it doesn't fail in reg tests.
In 2.1 after the failed connect(), we subscribe to polling and usually come
back with a validated connection, so the function is not expected to be
called before it completes, except if it happens as a side effect of some
spurious wake calls, which should not have any effect on such a check.
The other check types are not impacted by this issue because they all
check for a minimum data length in the buffer, and wait for more data
until they are satisfied.
This patch fixes the issue by explicitly checking that the connection
is established before trying to read or to give a verdict. This way the
function becomes safe to call regardless of the connection status (even
if it's still totally ugly).
This fix must be backported to 2.0.
2019-09-24 04:43:03 -04:00
|
|
|
/* the rest of the code below expects the connection to be ready! */
|
2020-01-23 10:27:54 -05:00
|
|
|
if (conn->flags & CO_FL_WAIT_XPRT && !done)
|
BUG/MEDIUM: checks: make sure the connection is ready before trying to recv
As identified in issue #278, the backport of commit c594039225 ("BUG/MINOR:
checks: do not uselessly poll for reads before the connection is up")
introduced a regression in 2.0 when default checks are enabled (not
"option tcp-check"), but it did not affect 2.1.
What happens is that in 2.0 and earlier we have the fd cache which makes
a speculative call to the I/O functions after an attempt to connect, and
the __event_srv_chk_r() function was absolutely not designed to be called
while a connection attempt is still pending. Thus what happens is that the
test for success/failure expects the verdict to be final before waking up
the check task, and since the connection is not yet validated, it fails.
It will usually work over the loopback depending on scheduling, which is
why it doesn't fail in reg tests.
In 2.1 after the failed connect(), we subscribe to polling and usually come
back with a validated connection, so the function is not expected to be
called before it completes, except if it happens as a side effect of some
spurious wake calls, which should not have any effect on such a check.
The other check types are not impacted by this issue because they all
check for a minimum data length in the buffer, and wait for more data
until they are satisfied.
This patch fixes the issue by explicitly checking that the connection
is established before trying to read or to give a verdict. This way the
function becomes safe to call regardless of the connection status (even
if it's still totally ugly).
This fix must be backported to 2.0.
2019-09-24 04:43:03 -04:00
|
|
|
goto wait_more_data;
|
2013-12-04 05:17:05 -05:00
|
|
|
|
2010-03-17 16:52:07 -04:00
|
|
|
/* Intermediate or complete response received.
|
2018-07-10 11:43:27 -04:00
|
|
|
* Terminate string in b_head(&check->bi) buffer.
|
2010-03-17 16:52:07 -04:00
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
if (b_data(&check->bi) < b_size(&check->bi))
|
|
|
|
|
b_head(&check->bi)[b_data(&check->bi)] = '\0';
|
2010-03-17 16:52:07 -04:00
|
|
|
else {
|
2018-07-10 11:43:27 -04:00
|
|
|
b_head(&check->bi)[b_data(&check->bi) - 1] = '\0';
|
2010-03-17 16:52:07 -04:00
|
|
|
done = 1; /* buffer full, don't wait for more data */
|
|
|
|
|
}
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
|
|
|
|
|
/* Run the checks... */
|
2013-02-23 01:35:38 -05:00
|
|
|
switch (check->type) {
|
2011-08-06 11:05:02 -04:00
|
|
|
case PR_O2_HTTP_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < strlen("HTTP/1.0 000\r"))
|
2010-03-17 16:52:07 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
/* Check if the server speaks HTTP 1.X */
|
2018-07-10 11:43:27 -04:00
|
|
|
if ((b_data(&check->bi) < strlen("HTTP/1.0 000\r")) ||
|
|
|
|
|
(memcmp(b_head(&check->bi), "HTTP/1.", 7) != 0 ||
|
|
|
|
|
(*(b_head(&check->bi) + 12) != ' ' && *(b_head(&check->bi) + 12) != '\r')) ||
|
|
|
|
|
!isdigit((unsigned char) *(b_head(&check->bi) + 9)) || !isdigit((unsigned char) *(b_head(&check->bi) + 10)) ||
|
|
|
|
|
!isdigit((unsigned char) *(b_head(&check->bi) + 11))) {
|
|
|
|
|
cut_crlf(b_head(&check->bi));
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, b_head(&check->bi));
|
2009-10-10 15:06:49 -04:00
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
check->code = str2uic(b_head(&check->bi) + 9);
|
|
|
|
|
desc = ltrim(b_head(&check->bi) + 12, ' ');
|
2017-10-23 09:54:24 -04:00
|
|
|
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
if ((s->proxy->options & PR_O_DISABLE404) &&
|
2017-08-31 08:41:55 -04:00
|
|
|
(s->next_state != SRV_ST_STOPPED) && (check->code == 404)) {
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
/* 404 may be accepted as "stopping" only if the server was up */
|
|
|
|
|
cut_crlf(desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKCD, desc);
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
}
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
else if (s->proxy->options2 & PR_O2_EXP_TYPE) {
|
|
|
|
|
/* Run content verification check... We know we have at least 13 chars */
|
|
|
|
|
if (!httpchk_expect(s, done))
|
|
|
|
|
goto wait_more_data;
|
|
|
|
|
}
|
|
|
|
|
/* check the reply : HTTP/1.X 2xx and 3xx are OK */
|
2018-07-10 11:43:27 -04:00
|
|
|
else if (*(b_head(&check->bi) + 9) == '2' || *(b_head(&check->bi) + 9) == '3') {
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
cut_crlf(desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
}
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
else {
|
|
|
|
|
cut_crlf(desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
}
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PR_O2_SSL3_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < 5)
|
2010-03-17 16:52:07 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
/* Check for SSLv3 alert or handshake */
|
2018-07-10 11:43:27 -04:00
|
|
|
if ((b_data(&check->bi) >= 5) && (*b_head(&check->bi) == 0x15 || *b_head(&check->bi) == 0x16))
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
|
2009-09-27 09:50:02 -04:00
|
|
|
else
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L6RSP, NULL);
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PR_O2_SMTP_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < strlen("000\r"))
|
2010-03-17 16:52:07 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2019-08-06 10:26:31 -04:00
|
|
|
/* do not reset when closing, servers don't like this */
|
|
|
|
|
if (conn_ctrl_ready(cs->conn))
|
|
|
|
|
fdtab[cs->conn->handle.fd].linger_risk = 0;
|
|
|
|
|
|
2009-09-23 16:09:24 -04:00
|
|
|
/* Check if the server speaks SMTP */
|
2018-07-10 11:43:27 -04:00
|
|
|
if ((b_data(&check->bi) < strlen("000\r")) ||
|
|
|
|
|
(*(b_head(&check->bi) + 3) != ' ' && *(b_head(&check->bi) + 3) != '\r') ||
|
|
|
|
|
!isdigit((unsigned char) *b_head(&check->bi)) || !isdigit((unsigned char) *(b_head(&check->bi) + 1)) ||
|
|
|
|
|
!isdigit((unsigned char) *(b_head(&check->bi) + 2))) {
|
|
|
|
|
cut_crlf(b_head(&check->bi));
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, b_head(&check->bi));
|
2009-09-23 16:09:24 -04:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
check->code = str2uic(b_head(&check->bi));
|
2009-09-23 16:09:24 -04:00
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
desc = ltrim(b_head(&check->bi) + 3, ' ');
|
2009-10-10 15:06:49 -04:00
|
|
|
cut_crlf(desc);
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
/* Check for SMTP code 2xx (should be 250) */
|
2018-07-10 11:43:27 -04:00
|
|
|
if (*b_head(&check->bi) == '2')
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
|
2009-09-27 09:50:02 -04:00
|
|
|
else
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
2013-02-11 20:45:54 -05:00
|
|
|
case PR_O2_LB_AGENT_CHK: {
|
2013-12-09 14:51:51 -05:00
|
|
|
int status = HCHK_STATUS_CHECKED;
|
|
|
|
|
const char *hs = NULL; /* health status */
|
|
|
|
|
const char *as = NULL; /* admin status */
|
|
|
|
|
const char *ps = NULL; /* performance status */
|
2016-04-24 17:10:06 -04:00
|
|
|
const char *cs = NULL; /* maxconn */
|
2013-12-09 14:51:51 -05:00
|
|
|
const char *err = NULL; /* first error to report */
|
|
|
|
|
const char *wrn = NULL; /* first warning to report */
|
|
|
|
|
char *cmd, *p;
|
|
|
|
|
|
|
|
|
|
/* We're getting an agent check response. The agent could
|
|
|
|
|
* have been disabled in the mean time with a long check
|
|
|
|
|
* still pending. It is important that we ignore the whole
|
|
|
|
|
* response.
|
|
|
|
|
*/
|
|
|
|
|
if (!(check->server->agent.state & CHK_ST_ENABLED))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* The agent supports strings made of a single line ended by the
|
|
|
|
|
* first CR ('\r') or LF ('\n'). This line is composed of words
|
|
|
|
|
* delimited by spaces (' '), tabs ('\t'), or commas (','). The
|
|
|
|
|
* line may optionally contained a description of a state change
|
|
|
|
|
* after a sharp ('#'), which is only considered if a health state
|
|
|
|
|
* is announced.
|
|
|
|
|
*
|
|
|
|
|
* Words may be composed of :
|
|
|
|
|
* - a numeric weight suffixed by the percent character ('%').
|
|
|
|
|
* - a health status among "up", "down", "stopped", and "fail".
|
|
|
|
|
* - an admin status among "ready", "drain", "maint".
|
|
|
|
|
*
|
|
|
|
|
* These words may appear in any order. If multiple words of the
|
|
|
|
|
* same category appear, the last one wins.
|
|
|
|
|
*/
|
2013-02-11 20:45:54 -05:00
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
p = b_head(&check->bi);
|
2013-12-11 15:40:11 -05:00
|
|
|
while (*p && *p != '\n' && *p != '\r')
|
|
|
|
|
p++;
|
|
|
|
|
|
|
|
|
|
if (!*p) {
|
|
|
|
|
if (!done)
|
|
|
|
|
goto wait_more_data;
|
2013-02-11 20:45:54 -05:00
|
|
|
|
2013-12-11 15:40:11 -05:00
|
|
|
/* at least inform the admin that the agent is mis-behaving */
|
|
|
|
|
set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-09 14:51:51 -05:00
|
|
|
|
2013-12-11 15:40:11 -05:00
|
|
|
*p = 0;
|
2018-07-10 11:43:27 -04:00
|
|
|
cmd = b_head(&check->bi);
|
2013-02-11 20:45:54 -05:00
|
|
|
|
2013-12-09 14:51:51 -05:00
|
|
|
while (*cmd) {
|
|
|
|
|
/* look for next word */
|
|
|
|
|
if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
|
|
|
|
|
cmd++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2013-11-24 20:46:39 -05:00
|
|
|
|
2013-12-09 14:51:51 -05:00
|
|
|
if (*cmd == '#') {
|
|
|
|
|
/* this is the beginning of a health status description,
|
|
|
|
|
* skip the sharp and blanks.
|
|
|
|
|
*/
|
|
|
|
|
cmd++;
|
|
|
|
|
while (*cmd == '\t' || *cmd == ' ')
|
|
|
|
|
cmd++;
|
2013-11-24 20:46:39 -05:00
|
|
|
break;
|
2013-02-11 20:45:54 -05:00
|
|
|
}
|
2013-12-09 14:51:51 -05:00
|
|
|
|
|
|
|
|
/* find the end of the word so that we have a null-terminated
|
|
|
|
|
* word between <cmd> and <p>.
|
|
|
|
|
*/
|
|
|
|
|
p = cmd + 1;
|
|
|
|
|
while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
|
|
|
|
|
p++;
|
|
|
|
|
if (*p)
|
|
|
|
|
*p++ = 0;
|
|
|
|
|
|
|
|
|
|
/* first, health statuses */
|
|
|
|
|
if (strcasecmp(cmd, "up") == 0) {
|
|
|
|
|
check->health = check->rise + check->fall - 1;
|
2013-02-11 20:45:54 -05:00
|
|
|
status = HCHK_STATUS_L7OKD;
|
2013-12-09 14:51:51 -05:00
|
|
|
hs = cmd;
|
|
|
|
|
}
|
|
|
|
|
else if (strcasecmp(cmd, "down") == 0) {
|
|
|
|
|
check->health = 0;
|
|
|
|
|
status = HCHK_STATUS_L7STS;
|
|
|
|
|
hs = cmd;
|
|
|
|
|
}
|
|
|
|
|
else if (strcasecmp(cmd, "stopped") == 0) {
|
|
|
|
|
check->health = 0;
|
|
|
|
|
status = HCHK_STATUS_L7STS;
|
|
|
|
|
hs = cmd;
|
|
|
|
|
}
|
|
|
|
|
else if (strcasecmp(cmd, "fail") == 0) {
|
|
|
|
|
check->health = 0;
|
|
|
|
|
status = HCHK_STATUS_L7STS;
|
|
|
|
|
hs = cmd;
|
2013-02-11 20:45:54 -05:00
|
|
|
}
|
2013-12-09 14:51:51 -05:00
|
|
|
/* admin statuses */
|
|
|
|
|
else if (strcasecmp(cmd, "ready") == 0) {
|
|
|
|
|
as = cmd;
|
|
|
|
|
}
|
|
|
|
|
else if (strcasecmp(cmd, "drain") == 0) {
|
|
|
|
|
as = cmd;
|
|
|
|
|
}
|
|
|
|
|
else if (strcasecmp(cmd, "maint") == 0) {
|
|
|
|
|
as = cmd;
|
|
|
|
|
}
|
2016-04-24 17:10:06 -04:00
|
|
|
/* try to parse a weight here and keep the last one */
|
2013-12-09 14:51:51 -05:00
|
|
|
else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
|
|
|
|
|
ps = cmd;
|
|
|
|
|
}
|
2016-04-24 17:10:06 -04:00
|
|
|
/* try to parse a maxconn here */
|
|
|
|
|
else if (strncasecmp(cmd, "maxconn:", strlen("maxconn:")) == 0) {
|
|
|
|
|
cs = cmd;
|
|
|
|
|
}
|
2013-12-09 14:51:51 -05:00
|
|
|
else {
|
|
|
|
|
/* keep a copy of the first error */
|
|
|
|
|
if (!err)
|
|
|
|
|
err = cmd;
|
|
|
|
|
}
|
|
|
|
|
/* skip to next word */
|
|
|
|
|
cmd = p;
|
|
|
|
|
}
|
|
|
|
|
/* here, cmd points either to \0 or to the beginning of a
|
|
|
|
|
* description. Skip possible leading spaces.
|
|
|
|
|
*/
|
|
|
|
|
while (*cmd == ' ' || *cmd == '\n')
|
|
|
|
|
cmd++;
|
|
|
|
|
|
|
|
|
|
/* First, update the admin status so that we avoid sending other
|
|
|
|
|
* possibly useless warnings and can also update the health if
|
|
|
|
|
* present after going back up.
|
|
|
|
|
*/
|
|
|
|
|
if (as) {
|
|
|
|
|
if (strcasecmp(as, "drain") == 0)
|
|
|
|
|
srv_adm_set_drain(check->server);
|
|
|
|
|
else if (strcasecmp(as, "maint") == 0)
|
|
|
|
|
srv_adm_set_maint(check->server);
|
|
|
|
|
else
|
|
|
|
|
srv_adm_set_ready(check->server);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* now change weights */
|
|
|
|
|
if (ps) {
|
|
|
|
|
const char *msg;
|
|
|
|
|
|
|
|
|
|
msg = server_parse_weight_change_request(s, ps);
|
|
|
|
|
if (!wrn || !*wrn)
|
|
|
|
|
wrn = msg;
|
2013-02-11 20:45:54 -05:00
|
|
|
}
|
|
|
|
|
|
2016-04-24 17:10:06 -04:00
|
|
|
if (cs) {
|
|
|
|
|
const char *msg;
|
|
|
|
|
|
|
|
|
|
cs += strlen("maxconn:");
|
|
|
|
|
|
|
|
|
|
msg = server_parse_maxconn_change_request(s, cs);
|
|
|
|
|
if (!wrn || !*wrn)
|
|
|
|
|
wrn = msg;
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-09 14:51:51 -05:00
|
|
|
/* and finally health status */
|
|
|
|
|
if (hs) {
|
|
|
|
|
/* We'll report some of the warnings and errors we have
|
|
|
|
|
* here. Down reports are critical, we leave them untouched.
|
|
|
|
|
* Lack of report, or report of 'UP' leaves the room for
|
|
|
|
|
* ERR first, then WARN.
|
2013-02-11 20:45:54 -05:00
|
|
|
*/
|
2013-12-09 14:51:51 -05:00
|
|
|
const char *msg = cmd;
|
2018-07-13 05:56:34 -04:00
|
|
|
struct buffer *t;
|
2013-12-09 14:51:51 -05:00
|
|
|
|
|
|
|
|
if (!*msg || status == HCHK_STATUS_L7OKD) {
|
|
|
|
|
if (err && *err)
|
|
|
|
|
msg = err;
|
|
|
|
|
else if (wrn && *wrn)
|
|
|
|
|
msg = wrn;
|
2013-02-11 20:45:54 -05:00
|
|
|
}
|
2013-12-09 14:51:51 -05:00
|
|
|
|
|
|
|
|
t = get_trash_chunk();
|
|
|
|
|
chunk_printf(t, "via agent : %s%s%s%s",
|
|
|
|
|
hs, *msg ? " (" : "",
|
|
|
|
|
msg, *msg ? ")" : "");
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
set_server_check_status(check, status, t->area);
|
2013-02-11 20:45:54 -05:00
|
|
|
}
|
2013-12-09 14:51:51 -05:00
|
|
|
else if (err && *err) {
|
|
|
|
|
/* No status change but we'd like to report something odd.
|
|
|
|
|
* Just report the current state and copy the message.
|
|
|
|
|
*/
|
|
|
|
|
chunk_printf(&trash, "agent reports an error : %s", err);
|
2018-07-13 04:54:26 -04:00
|
|
|
set_server_check_status(check, status/*check->status*/,
|
|
|
|
|
trash.area);
|
2013-02-11 20:45:54 -05:00
|
|
|
|
2013-12-09 14:51:51 -05:00
|
|
|
}
|
|
|
|
|
else if (wrn && *wrn) {
|
|
|
|
|
/* No status change but we'd like to report something odd.
|
|
|
|
|
* Just report the current state and copy the message.
|
|
|
|
|
*/
|
|
|
|
|
chunk_printf(&trash, "agent warns : %s", wrn);
|
2018-07-13 04:54:26 -04:00
|
|
|
set_server_check_status(check, status/*check->status*/,
|
|
|
|
|
trash.area);
|
2013-12-09 14:51:51 -05:00
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
set_server_check_status(check, status, NULL);
|
2013-02-11 20:45:54 -05:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2011-08-06 11:05:02 -04:00
|
|
|
case PR_O2_PGSQL_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < 9)
|
2011-01-04 09:14:13 -05:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2019-08-06 10:26:31 -04:00
|
|
|
/* do not reset when closing, servers don't like this */
|
|
|
|
|
if (conn_ctrl_ready(cs->conn))
|
|
|
|
|
fdtab[cs->conn->handle.fd].linger_risk = 0;
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (b_head(&check->bi)[0] == 'R') {
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, "PostgreSQL server is ok");
|
2011-01-04 09:14:13 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2018-07-10 11:43:27 -04:00
|
|
|
if ((b_head(&check->bi)[0] == 'E') && (b_head(&check->bi)[5]!=0) && (b_head(&check->bi)[6]!=0))
|
|
|
|
|
desc = &b_head(&check->bi)[6];
|
2011-01-04 09:14:13 -05:00
|
|
|
else
|
|
|
|
|
desc = "PostgreSQL unknown error";
|
|
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
|
2011-01-04 09:14:13 -05:00
|
|
|
}
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PR_O2_REDIS_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < 7)
|
2011-08-05 10:23:48 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (strcmp(b_head(&check->bi), "+PONG\r\n") == 0) {
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, "Redis server is ok");
|
2011-08-05 10:23:48 -04:00
|
|
|
}
|
|
|
|
|
else {
|
2018-07-10 11:43:27 -04:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, b_head(&check->bi));
|
2011-08-05 10:23:48 -04:00
|
|
|
}
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PR_O2_MYSQL_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < 5)
|
2010-03-17 16:52:07 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2019-08-06 10:26:31 -04:00
|
|
|
/* do not reset when closing, servers don't like this */
|
|
|
|
|
if (conn_ctrl_ready(cs->conn))
|
|
|
|
|
fdtab[cs->conn->handle.fd].linger_risk = 0;
|
|
|
|
|
|
2010-10-18 09:58:36 -04:00
|
|
|
if (s->proxy->check_len == 0) { // old mode
|
2018-07-10 11:43:27 -04:00
|
|
|
if (*(b_head(&check->bi) + 4) != '\xff') {
|
2010-10-18 09:58:36 -04:00
|
|
|
/* We set the MySQL Version in description for information purpose
|
|
|
|
|
* FIXME : it can be cool to use MySQL Version for other purpose,
|
|
|
|
|
* like mark as down old MySQL server.
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
if (b_data(&check->bi) > 51) {
|
|
|
|
|
desc = ltrim(b_head(&check->bi) + 5, ' ');
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
|
2010-10-18 09:58:36 -04:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (!done)
|
|
|
|
|
goto wait_more_data;
|
2017-10-23 08:39:51 -04:00
|
|
|
|
2010-10-18 09:58:36 -04:00
|
|
|
/* it seems we have a OK packet but without a valid length,
|
|
|
|
|
* it must be a protocol error
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, b_head(&check->bi));
|
2010-10-18 09:58:36 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* An error message is attached in the Error packet */
|
2018-07-10 11:43:27 -04:00
|
|
|
desc = ltrim(b_head(&check->bi) + 7, ' ');
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
|
2010-10-18 09:58:36 -04:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-07-10 11:43:27 -04:00
|
|
|
unsigned int first_packet_len = ((unsigned int) *b_head(&check->bi)) +
|
|
|
|
|
(((unsigned int) *(b_head(&check->bi) + 1)) << 8) +
|
|
|
|
|
(((unsigned int) *(b_head(&check->bi) + 2)) << 16);
|
2010-10-18 09:58:36 -04:00
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (b_data(&check->bi) == first_packet_len + 4) {
|
2010-10-18 09:58:36 -04:00
|
|
|
/* MySQL Error packet always begin with field_count = 0xff */
|
2018-07-10 11:43:27 -04:00
|
|
|
if (*(b_head(&check->bi) + 4) != '\xff') {
|
2010-10-18 09:58:36 -04:00
|
|
|
/* We have only one MySQL packet and it is a Handshake Initialization packet
|
|
|
|
|
* but we need to have a second packet to know if it is alright
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < first_packet_len + 5)
|
2010-10-18 09:58:36 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* We have only one packet and it is an Error packet,
|
|
|
|
|
* an error message is attached, so we can display it
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
desc = &b_head(&check->bi)[7];
|
2017-11-24 10:50:31 -05:00
|
|
|
//ha_warning("onlyoneERR: %s\n", desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
|
2010-10-18 09:58:36 -04:00
|
|
|
}
|
2018-07-10 11:43:27 -04:00
|
|
|
} else if (b_data(&check->bi) > first_packet_len + 4) {
|
|
|
|
|
unsigned int second_packet_len = ((unsigned int) *(b_head(&check->bi) + first_packet_len + 4)) +
|
|
|
|
|
(((unsigned int) *(b_head(&check->bi) + first_packet_len + 5)) << 8) +
|
|
|
|
|
(((unsigned int) *(b_head(&check->bi) + first_packet_len + 6)) << 16);
|
2010-10-18 09:58:36 -04:00
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (b_data(&check->bi) == first_packet_len + 4 + second_packet_len + 4 ) {
|
2010-10-18 09:58:36 -04:00
|
|
|
/* We have 2 packets and that's good */
|
|
|
|
|
/* Check if the second packet is a MySQL Error packet or not */
|
2018-07-10 11:43:27 -04:00
|
|
|
if (*(b_head(&check->bi) + first_packet_len + 8) != '\xff') {
|
2010-10-18 09:58:36 -04:00
|
|
|
/* No error packet */
|
|
|
|
|
/* We set the MySQL Version in description for information purpose */
|
2018-07-10 11:43:27 -04:00
|
|
|
desc = &b_head(&check->bi)[5];
|
2017-11-24 10:50:31 -05:00
|
|
|
//ha_warning("2packetOK: %s\n", desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, desc);
|
2010-10-18 09:58:36 -04:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* An error message is attached in the Error packet
|
|
|
|
|
* so we can display it ! :)
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
desc = &b_head(&check->bi)[first_packet_len+11];
|
2017-11-24 10:50:31 -05:00
|
|
|
//ha_warning("2packetERR: %s\n", desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, desc);
|
2010-10-18 09:58:36 -04:00
|
|
|
}
|
|
|
|
|
}
|
2010-01-12 03:25:13 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2010-03-17 16:52:07 -04:00
|
|
|
if (!done)
|
|
|
|
|
goto wait_more_data;
|
2017-10-23 08:39:51 -04:00
|
|
|
|
2010-10-18 09:58:36 -04:00
|
|
|
/* it seems we have a Handshake Initialization packet but without a valid length,
|
2010-01-12 03:25:13 -05:00
|
|
|
* it must be a protocol error
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
desc = &b_head(&check->bi)[5];
|
2017-11-24 10:50:31 -05:00
|
|
|
//ha_warning("protoerr: %s\n", desc);
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, desc);
|
2010-01-12 03:25:13 -05:00
|
|
|
}
|
|
|
|
|
}
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PR_O2_LDAP_CHK:
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < 14)
|
2010-09-29 12:17:05 -04:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
|
|
|
|
/* Check if the server speaks LDAP (ASN.1/BER)
|
|
|
|
|
* http://en.wikipedia.org/wiki/Basic_Encoding_Rules
|
|
|
|
|
* http://tools.ietf.org/html/rfc4511
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* http://tools.ietf.org/html/rfc4511#section-4.1.1
|
|
|
|
|
* LDAPMessage: 0x30: SEQUENCE
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
if ((b_data(&check->bi) < 14) || (*(b_head(&check->bi)) != '\x30')) {
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
|
2010-09-29 12:17:05 -04:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* size of LDAPMessage */
|
2018-07-10 11:43:27 -04:00
|
|
|
msglen = (*(b_head(&check->bi) + 1) & 0x80) ? (*(b_head(&check->bi) + 1) & 0x7f) : 0;
|
2010-09-29 12:17:05 -04:00
|
|
|
|
|
|
|
|
/* http://tools.ietf.org/html/rfc4511#section-4.2.2
|
|
|
|
|
* messageID: 0x02 0x01 0x01: INTEGER 1
|
|
|
|
|
* protocolOp: 0x61: bindResponse
|
|
|
|
|
*/
|
|
|
|
|
if ((msglen > 2) ||
|
2018-07-10 11:43:27 -04:00
|
|
|
(memcmp(b_head(&check->bi) + 2 + msglen, "\x02\x01\x01\x61", 4) != 0)) {
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
|
2010-09-29 12:17:05 -04:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* size of bindResponse */
|
2018-07-10 11:43:27 -04:00
|
|
|
msglen += (*(b_head(&check->bi) + msglen + 6) & 0x80) ? (*(b_head(&check->bi) + msglen + 6) & 0x7f) : 0;
|
2010-09-29 12:17:05 -04:00
|
|
|
|
|
|
|
|
/* http://tools.ietf.org/html/rfc4511#section-4.1.9
|
|
|
|
|
* ldapResult: 0x0a 0x01: ENUMERATION
|
|
|
|
|
*/
|
|
|
|
|
if ((msglen > 4) ||
|
2018-07-10 11:43:27 -04:00
|
|
|
(memcmp(b_head(&check->bi) + 7 + msglen, "\x0a\x01", 2) != 0)) {
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, "Not LDAPv3 protocol");
|
2010-09-29 12:17:05 -04:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* http://tools.ietf.org/html/rfc4511#section-4.1.9
|
|
|
|
|
* resultCode
|
|
|
|
|
*/
|
2018-07-10 11:43:27 -04:00
|
|
|
check->code = *(b_head(&check->bi) + msglen + 9);
|
2013-02-23 01:35:38 -05:00
|
|
|
if (check->code) {
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, "See RFC: http://tools.ietf.org/html/rfc4511#section-4.1.9");
|
2010-09-29 12:17:05 -04:00
|
|
|
} else {
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, "Success");
|
2010-09-29 12:17:05 -04:00
|
|
|
}
|
|
|
|
|
}
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
|
2016-11-07 15:07:38 -05:00
|
|
|
case PR_O2_SPOP_CHK: {
|
|
|
|
|
unsigned int framesz;
|
|
|
|
|
char err[HCHK_DESC_LEN];
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < 4)
|
2016-11-07 15:07:38 -05:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
memcpy(&framesz, b_head(&check->bi), 4);
|
2016-11-07 15:07:38 -05:00
|
|
|
framesz = ntohl(framesz);
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!done && b_data(&check->bi) < (4+framesz))
|
2016-11-07 15:07:38 -05:00
|
|
|
goto wait_more_data;
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!spoe_handle_healthcheck_response(b_head(&check->bi)+4, framesz, err, HCHK_DESC_LEN-1))
|
2016-11-07 15:07:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, "SPOA server is ok");
|
|
|
|
|
else
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7STS, err);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2011-08-06 11:05:02 -04:00
|
|
|
default:
|
2019-09-05 12:43:22 -04:00
|
|
|
/* good connection is enough for pure TCP check */
|
2020-01-23 10:27:54 -05:00
|
|
|
if (!(conn->flags & CO_FL_WAIT_XPRT) && !check->type) {
|
2020-03-27 13:55:49 -04:00
|
|
|
if (check->use_ssl == 1)
|
2019-09-05 12:43:22 -04:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
|
|
|
|
|
else
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
|
|
|
|
|
}
|
2011-08-06 11:05:02 -04:00
|
|
|
break;
|
|
|
|
|
} /* switch */
|
2007-04-15 14:56:27 -04:00
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
out_wakeup:
|
2013-12-04 05:17:05 -05:00
|
|
|
/* collect possible new errors */
|
2017-10-16 09:17:17 -04:00
|
|
|
if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, 0, 0);
|
2006-06-25 20:48:02 -04:00
|
|
|
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
/* Reset the check buffer... */
|
2018-07-10 11:43:27 -04:00
|
|
|
*b_head(&check->bi) = '\0';
|
|
|
|
|
b_reset(&check->bi);
|
[MEDIUM] checks: support multi-packet health check responses
We are seeing both real servers repeatedly going on- and off-line with
a period of tens of seconds. Packet tracing, stracing, and adding
debug code to HAProxy itself has revealed that the real servers are
always responding correctly, but HAProxy is sometimes receiving only
part of the response.
It appears that the real servers are sending the test page as three
separate packets. HAProxy receives the contents of one, two, or three
packets, apparently randomly. Naturally, the health check only
succeeds when all three packets' data are seen by HAProxy. If HAProxy
and the real servers are modified to use a plain HTML page for the
health check, the response is in the form of a single packet and the
checks do not fail.
(...)
I've added buffer and length variables to struct server, and allocated
space with the rest of the server initialisation.
(...)
It seems to be working fine in my tests, and handles check responses
that are bigger than the buffer.
2010-03-16 11:50:46 -04:00
|
|
|
|
2017-03-08 14:06:20 -05:00
|
|
|
/* Close the connection... We still attempt to nicely close if,
|
|
|
|
|
* for instance, SSL needs to send a "close notify." Later, we perform
|
|
|
|
|
* a hard close and reset the connection if some data are pending,
|
|
|
|
|
* otherwise we end up with many TIME_WAITs and eat all the source port
|
|
|
|
|
* range quickly. To avoid sending RSTs all the time, we first try to
|
|
|
|
|
* drain pending data.
|
2012-11-23 03:18:20 -05:00
|
|
|
*/
|
2019-07-02 10:35:18 -04:00
|
|
|
/* Call cs_shutr() first, to add the CO_FL_SOCK_RD_SH flag on the
|
|
|
|
|
* connection, to make sure cs_shutw() will not lead to a shutdown()
|
|
|
|
|
* that would provoke TIME_WAITs.
|
|
|
|
|
*/
|
|
|
|
|
cs_shutr(cs, CS_SHR_DRAIN);
|
2017-10-05 09:25:48 -04:00
|
|
|
cs_shutw(cs, CS_SHW_NORMAL);
|
MEDIUM: protocol: implement a "drain" function in protocol layers
Since commit cfd97c6f was merged into 1.5-dev14 (BUG/MEDIUM: checks:
prevent TIME_WAITs from appearing also on timeouts), some valid health
checks sometimes used to show some TCP resets. For example, this HTTP
health check sent to a local server :
19:55:15.742818 IP 127.0.0.1.16568 > 127.0.0.1.8000: S 3355859679:3355859679(0) win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
19:55:15.742841 IP 127.0.0.1.8000 > 127.0.0.1.16568: S 1060952566:1060952566(0) ack 3355859680 win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
19:55:15.742863 IP 127.0.0.1.16568 > 127.0.0.1.8000: . ack 1 win 257
19:55:15.745402 IP 127.0.0.1.16568 > 127.0.0.1.8000: P 1:23(22) ack 1 win 257
19:55:15.745488 IP 127.0.0.1.8000 > 127.0.0.1.16568: FP 1:146(145) ack 23 win 257
19:55:15.747109 IP 127.0.0.1.16568 > 127.0.0.1.8000: R 23:23(0) ack 147 win 257
After some discussion with Chris Huang-Leaver, it appeared clear that
what we want is to only send the RST when we have no other choice, which
means when the server has not closed. So we still keep SYN/SYN-ACK/RST
for pure TCP checks, but don't want to see an RST emitted as above when
the server has already sent the FIN.
The solution against this consists in implementing a "drain" function at
the protocol layer, which, when defined, causes as much as possible of
the input socket buffer to be flushed to make recv() return zero so that
we know that the server's FIN was received and ACKed. On Linux, we can make
use of MSG_TRUNC on TCP sockets, which has the benefit of draining everything
at once without even copying data. On other platforms, we read up to one
buffer of data before the close. If recv() manages to get the final zero,
we don't disable lingering. Same for hard errors. Otherwise we do.
In practice, on HTTP health checks we generally find that the close was
pending and is returned upon first recv() call. The network trace becomes
cleaner :
19:55:23.650621 IP 127.0.0.1.16561 > 127.0.0.1.8000: S 3982804816:3982804816(0) win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
19:55:23.650644 IP 127.0.0.1.8000 > 127.0.0.1.16561: S 4082139313:4082139313(0) ack 3982804817 win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
19:55:23.650666 IP 127.0.0.1.16561 > 127.0.0.1.8000: . ack 1 win 257
19:55:23.651615 IP 127.0.0.1.16561 > 127.0.0.1.8000: P 1:23(22) ack 1 win 257
19:55:23.651696 IP 127.0.0.1.8000 > 127.0.0.1.16561: FP 1:146(145) ack 23 win 257
19:55:23.652628 IP 127.0.0.1.16561 > 127.0.0.1.8000: F 23:23(0) ack 147 win 257
19:55:23.652655 IP 127.0.0.1.8000 > 127.0.0.1.16561: . ack 24 win 257
This change should be backported to 1.4 which is where Chris encountered
this issue. The code is different, so probably the tcp_drain() function
will have to be put in the checks only.
2013-06-10 13:56:38 -04:00
|
|
|
|
2013-12-04 05:17:05 -05:00
|
|
|
/* OK, let's not stay here forever */
|
2013-12-11 11:09:34 -05:00
|
|
|
if (check->result == CHK_RES_FAILED)
|
2013-12-04 05:17:05 -05:00
|
|
|
conn->flags |= CO_FL_ERROR;
|
|
|
|
|
|
2008-08-29 12:19:04 -04:00
|
|
|
task_wakeup(t, TASK_WOKEN_IO);
|
2018-08-09 07:06:55 -04:00
|
|
|
out:
|
2012-08-17 17:53:56 -04:00
|
|
|
return;
|
2010-03-17 16:52:07 -04:00
|
|
|
|
|
|
|
|
wait_more_data:
|
2018-12-19 07:59:17 -05:00
|
|
|
cs->conn->mux->subscribe(cs, SUB_RETRY_RECV, &check->wait_list);
|
2018-08-09 07:06:55 -04:00
|
|
|
goto out;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2012-09-28 08:40:02 -04:00
|
|
|
/*
|
|
|
|
|
* This function is used only for server health-checks. It handles connection
|
|
|
|
|
* status updates including errors. If necessary, it wakes the check task up.
|
2017-10-04 12:41:00 -04:00
|
|
|
* It returns 0 on normal cases, <0 if at least one close() has happened on the
|
|
|
|
|
* connection (eg: reconnect).
|
2012-09-28 08:40:02 -04:00
|
|
|
*/
|
2017-09-13 12:30:23 -04:00
|
|
|
static int wake_srv_chk(struct conn_stream *cs)
|
2012-07-06 06:00:49 -04:00
|
|
|
{
|
2017-09-13 12:30:23 -04:00
|
|
|
struct connection *conn = cs->conn;
|
|
|
|
|
struct check *check = cs->data;
|
2019-01-11 12:43:04 -05:00
|
|
|
struct email_alertq *q = container_of(check, typeof(*q), check);
|
2017-10-04 12:41:00 -04:00
|
|
|
int ret = 0;
|
2012-07-06 06:00:49 -04:00
|
|
|
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
|
|
|
|
|
else
|
|
|
|
|
HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2017-11-02 09:35:27 -04:00
|
|
|
|
2017-10-04 05:58:22 -04:00
|
|
|
/* we may have to make progress on the TCP checks */
|
2017-10-04 12:41:00 -04:00
|
|
|
if (check->type == PR_O2_TCPCHK_CHK) {
|
|
|
|
|
ret = tcpcheck_main(check);
|
2017-09-13 12:30:23 -04:00
|
|
|
cs = check->cs;
|
2018-09-20 05:25:12 -04:00
|
|
|
conn = cs->conn;
|
2019-09-05 11:38:40 -04:00
|
|
|
} else {
|
|
|
|
|
if (!(check->wait_list.events & SUB_RETRY_SEND))
|
|
|
|
|
__event_srv_chk_w(cs);
|
|
|
|
|
if (!(check->wait_list.events & SUB_RETRY_RECV))
|
|
|
|
|
__event_srv_chk_r(cs);
|
|
|
|
|
}
|
2017-10-04 05:58:22 -04:00
|
|
|
|
2017-10-16 09:17:17 -04:00
|
|
|
if (unlikely(conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)) {
|
2013-12-03 09:42:33 -05:00
|
|
|
/* We may get error reports bypassing the I/O handlers, typically
|
|
|
|
|
* the case when sending a pure TCP check which fails, then the I/O
|
|
|
|
|
* handlers above are not called. This is completely handled by the
|
2013-12-04 05:17:05 -05:00
|
|
|
* main processing task so let's simply wake it up. If we get here,
|
|
|
|
|
* we expect errno to still be valid.
|
2013-12-03 09:42:33 -05:00
|
|
|
*/
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, errno, 0);
|
2013-12-04 20:36:25 -05:00
|
|
|
task_wakeup(check->task, TASK_WOKEN_IO);
|
|
|
|
|
}
|
2020-01-23 10:27:54 -05:00
|
|
|
else if (!(conn->flags & CO_FL_WAIT_XPRT) && !check->type) {
|
2014-02-05 12:31:24 -05:00
|
|
|
/* we may get here if only a connection probe was required : we
|
|
|
|
|
* don't have any data to send nor anything expected in response,
|
|
|
|
|
* so the completion of the connection establishment is enough.
|
|
|
|
|
*/
|
|
|
|
|
task_wakeup(check->task, TASK_WOKEN_IO);
|
|
|
|
|
}
|
2013-12-04 20:36:25 -05:00
|
|
|
|
2013-12-11 11:09:34 -05:00
|
|
|
if (check->result != CHK_RES_UNKNOWN) {
|
2019-01-21 08:15:50 -05:00
|
|
|
/* Check complete or aborted. If connection not yet closed do it
|
|
|
|
|
* now and wake the check task up to be sure the result is
|
|
|
|
|
* handled ASAP. */
|
2015-03-12 19:40:28 -04:00
|
|
|
conn_sock_drain(conn);
|
2017-10-05 12:52:17 -04:00
|
|
|
cs_close(cs);
|
2017-10-04 12:41:00 -04:00
|
|
|
ret = -1;
|
2019-07-02 11:42:22 -04:00
|
|
|
/* We may have been scheduled to run, and the
|
|
|
|
|
* I/O handler expects to have a cs, so remove
|
|
|
|
|
* the tasklet
|
|
|
|
|
*/
|
|
|
|
|
tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
|
2019-01-21 08:15:50 -05:00
|
|
|
task_wakeup(check->task, TASK_WOKEN_IO);
|
2013-12-04 20:36:25 -05:00
|
|
|
}
|
2017-10-04 12:41:00 -04:00
|
|
|
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
|
|
|
|
|
else
|
|
|
|
|
HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2017-11-02 09:35:27 -04:00
|
|
|
|
2017-10-04 12:41:00 -04:00
|
|
|
/* if a connection got replaced, we must absolutely prevent the connection
|
|
|
|
|
* handler from touching its fd, and perform the FD polling updates ourselves
|
|
|
|
|
*/
|
|
|
|
|
if (ret < 0)
|
|
|
|
|
conn_cond_update_polling(conn);
|
|
|
|
|
|
|
|
|
|
return ret;
|
2012-07-06 06:00:49 -04:00
|
|
|
}
|
|
|
|
|
|
2012-09-28 08:40:02 -04:00
|
|
|
struct data_cb check_conn_cb = {
|
|
|
|
|
.wake = wake_srv_chk,
|
2016-11-24 10:58:12 -05:00
|
|
|
.name = "CHCK",
|
2012-09-28 08:40:02 -04:00
|
|
|
};
|
|
|
|
|
|
2011-10-31 06:53:20 -04:00
|
|
|
/*
|
|
|
|
|
* updates the server's weight during a warmup stage. Once the final weight is
|
|
|
|
|
* reached, the task automatically stops. Note that any server status change
|
|
|
|
|
* must have updated s->last_change accordingly.
|
|
|
|
|
*/
|
2018-05-25 08:04:04 -04:00
|
|
|
static struct task *server_warmup(struct task *t, void *context, unsigned short state)
|
2011-10-31 06:53:20 -04:00
|
|
|
{
|
2018-05-25 08:04:04 -04:00
|
|
|
struct server *s = context;
|
2011-10-31 06:53:20 -04:00
|
|
|
|
|
|
|
|
/* by default, plan on stopping the task */
|
|
|
|
|
t->expire = TICK_ETERNITY;
|
2017-08-31 08:41:55 -04:00
|
|
|
if ((s->next_admin & SRV_ADMF_MAINT) ||
|
|
|
|
|
(s->next_state != SRV_ST_STARTING))
|
2011-10-31 06:53:20 -04:00
|
|
|
return t;
|
|
|
|
|
|
2019-05-05 00:54:22 -04:00
|
|
|
HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
|
|
|
|
|
|
2014-05-13 17:41:20 -04:00
|
|
|
/* recalculate the weights and update the state */
|
2018-08-02 05:48:52 -04:00
|
|
|
server_recalc_eweight(s, 1);
|
2011-10-31 06:53:20 -04:00
|
|
|
|
|
|
|
|
/* probably that we can refill this server with a bit more connections */
|
2014-05-16 05:48:10 -04:00
|
|
|
pendconn_grab_from_px(s);
|
2011-10-31 06:53:20 -04:00
|
|
|
|
2019-05-05 00:54:22 -04:00
|
|
|
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
|
|
|
|
|
|
2011-10-31 06:53:20 -04:00
|
|
|
/* get back there in 1 second or 1/20th of the slowstart interval,
|
|
|
|
|
* whichever is greater, resulting in small 5% steps.
|
|
|
|
|
*/
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->next_state == SRV_ST_STARTING)
|
2011-10-31 06:53:20 -04:00
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
|
|
|
|
|
return t;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-04 09:58:52 -04:00
|
|
|
/* returns the first NON-COMMENT tcp-check rule from list <list> or NULL if
|
|
|
|
|
* none was found.
|
|
|
|
|
*/
|
2020-03-30 14:34:34 -04:00
|
|
|
static struct tcpcheck_rule *get_first_tcpcheck_rule(struct tcpcheck_rules *rules)
|
2017-10-04 09:58:52 -04:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *r;
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry(r, rules->list, list) {
|
2020-02-21 12:14:59 -05:00
|
|
|
if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
|
2017-10-04 09:58:52 -04:00
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-14 11:47:08 -05:00
|
|
|
/* returns the NON-COMMENT tcp-check rule from list <list> following <start> or
|
|
|
|
|
* NULL if non was found. If <start> is NULL, it relies on
|
|
|
|
|
* get_first_tcpcheck_rule().
|
|
|
|
|
*/
|
2020-03-30 14:34:34 -04:00
|
|
|
static struct tcpcheck_rule *get_next_tcpcheck_rule(struct tcpcheck_rules *rules, struct tcpcheck_rule *start)
|
2020-02-14 11:47:08 -05:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *r;
|
|
|
|
|
|
|
|
|
|
if (!start)
|
2020-03-30 14:34:34 -04:00
|
|
|
return get_first_tcpcheck_rule(rules);
|
2020-02-14 11:47:08 -05:00
|
|
|
|
|
|
|
|
r = LIST_NEXT(&start->list, typeof(r), list);
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry_from(r, rules->list, list) {
|
2020-02-21 12:14:59 -05:00
|
|
|
if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
|
2020-02-14 11:47:08 -05:00
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-13 03:18:16 -04:00
|
|
|
/*
|
2014-06-19 23:30:16 -04:00
|
|
|
* establish a server health-check that makes use of a connection.
|
2014-06-13 03:18:16 -04:00
|
|
|
*
|
|
|
|
|
* It can return one of :
|
2015-04-02 19:14:29 -04:00
|
|
|
* - SF_ERR_NONE if everything's OK and tcpcheck_main() was not called
|
|
|
|
|
* - SF_ERR_UP if if everything's OK and tcpcheck_main() was called
|
|
|
|
|
* - SF_ERR_SRVTO if there are no more servers
|
|
|
|
|
* - SF_ERR_SRVCL if the connection was refused by the server
|
|
|
|
|
* - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
|
|
|
|
|
* - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
|
|
|
|
|
* - SF_ERR_INTERNAL for any other purely internal errors
|
2016-06-13 08:15:41 -04:00
|
|
|
* - SF_ERR_CHK_PORT if no port could be found to run a health check on an AF_INET* socket
|
2016-11-28 20:15:19 -05:00
|
|
|
* Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
|
2014-06-13 03:18:16 -04:00
|
|
|
* Note that we try to prevent the network stack from sending the ACK during the
|
|
|
|
|
* connect() when a pure TCP check is used (without PROXY protocol).
|
|
|
|
|
*/
|
2014-06-19 23:30:16 -04:00
|
|
|
static int connect_conn_chk(struct task *t)
|
2014-06-13 03:18:16 -04:00
|
|
|
{
|
|
|
|
|
struct check *check = t->context;
|
|
|
|
|
struct server *s = check->server;
|
2017-09-13 12:30:23 -04:00
|
|
|
struct conn_stream *cs = check->cs;
|
|
|
|
|
struct connection *conn = cs_conn(cs);
|
2014-06-13 03:18:16 -04:00
|
|
|
struct protocol *proto;
|
|
|
|
|
int ret;
|
2019-05-06 12:32:29 -04:00
|
|
|
int connflags = 0;
|
2014-06-13 03:18:16 -04:00
|
|
|
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
/* we cannot have a connection here */
|
|
|
|
|
if (conn)
|
|
|
|
|
return SF_ERR_INTERNAL;
|
|
|
|
|
|
2014-06-13 03:18:16 -04:00
|
|
|
/* prepare the check buffer.
|
|
|
|
|
* This should not be used if check is the secondary agent check
|
|
|
|
|
* of a server as s->proxy->check_req will relate to the
|
|
|
|
|
* configuration of the primary check. Similarly, tcp-check uses
|
|
|
|
|
* its own strings.
|
|
|
|
|
*/
|
|
|
|
|
if (check->type && check->type != PR_O2_TCPCHK_CHK && !(check->state & CHK_ST_AGENT)) {
|
2018-07-10 11:43:27 -04:00
|
|
|
b_putblk(&check->bo, s->proxy->check_req, s->proxy->check_len);
|
2014-06-13 03:18:16 -04:00
|
|
|
|
|
|
|
|
/* we want to check if this host replies to HTTP or SSLv3 requests
|
|
|
|
|
* so we'll send the request, and won't wake the checker up now.
|
|
|
|
|
*/
|
|
|
|
|
if ((check->type) == PR_O2_SSL3_CHK) {
|
|
|
|
|
/* SSL requires that we put Unix time in the request */
|
|
|
|
|
int gmt_time = htonl(date.tv_sec);
|
2018-07-10 11:43:27 -04:00
|
|
|
memcpy(b_head(&check->bo) + 11, &gmt_time, 4);
|
2014-06-13 03:18:16 -04:00
|
|
|
}
|
|
|
|
|
else if ((check->type) == PR_O2_HTTP_CHK) {
|
2015-01-29 18:07:07 -05:00
|
|
|
/* prevent HTTP keep-alive when "http-check expect" is used */
|
|
|
|
|
if (s->proxy->options2 & PR_O2_EXP_TYPE)
|
2018-07-10 11:43:27 -04:00
|
|
|
b_putist(&check->bo, ist("Connection: close\r\n"));
|
2020-04-09 02:44:06 -04:00
|
|
|
|
|
|
|
|
/* If there is a body, add its content-length */
|
|
|
|
|
if (s->proxy->check_body_len)
|
|
|
|
|
chunk_appendf(&check->bo, "Content-Length: %s\r\n", ultoa(s->proxy->check_body_len));
|
|
|
|
|
|
|
|
|
|
/* Add configured headers */
|
|
|
|
|
if (s->proxy->check_hdrs)
|
|
|
|
|
b_putblk(&check->bo, s->proxy->check_hdrs, s->proxy->check_hdrs_len);
|
|
|
|
|
|
|
|
|
|
/* Add send-state header */
|
|
|
|
|
if (s->proxy->options2 & PR_O2_CHK_SNDST)
|
|
|
|
|
b_putblk(&check->bo, trash.area,
|
|
|
|
|
httpchk_build_status_header(s, trash.area, trash.size));
|
|
|
|
|
|
|
|
|
|
/* end-of-header */
|
2018-07-10 11:43:27 -04:00
|
|
|
b_putist(&check->bo, ist("\r\n"));
|
2020-04-09 02:44:06 -04:00
|
|
|
|
|
|
|
|
/* Add the body */
|
|
|
|
|
if (s->proxy->check_body)
|
|
|
|
|
b_putblk(&check->bo, s->proxy->check_body, s->proxy->check_body_len);
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
*b_tail(&check->bo) = '\0'; /* to make gdb output easier to read */
|
2014-06-13 03:18:16 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-21 21:19:05 -04:00
|
|
|
if ((check->type & PR_O2_LB_AGENT_CHK) && check->send_string_len) {
|
2018-07-10 11:43:27 -04:00
|
|
|
b_putblk(&check->bo, check->send_string, check->send_string_len);
|
2015-10-21 21:19:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-10-04 10:21:19 -04:00
|
|
|
/* for tcp-checks, the initial connection setup is handled separately as
|
|
|
|
|
* it may be sent to a specific port and not to the server's.
|
|
|
|
|
*/
|
2020-03-26 12:38:49 -04:00
|
|
|
if (check->type == PR_O2_TCPCHK_CHK) {
|
|
|
|
|
/* tcpcheck initialisation */
|
2020-03-30 05:05:10 -04:00
|
|
|
check->current_step = NULL;
|
2017-10-04 10:21:19 -04:00
|
|
|
tcpcheck_main(check);
|
|
|
|
|
return SF_ERR_UP;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-13 03:18:16 -04:00
|
|
|
/* prepare a new connection */
|
2017-09-13 12:30:23 -04:00
|
|
|
cs = check->cs = cs_new(NULL);
|
|
|
|
|
if (!check->cs)
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
return SF_ERR_RESOURCE;
|
2017-09-13 12:30:23 -04:00
|
|
|
conn = cs->conn;
|
2018-09-12 09:15:12 -04:00
|
|
|
/* Maybe there were an older connection we were waiting on */
|
2018-12-19 07:59:17 -05:00
|
|
|
check->wait_list.events = 0;
|
2019-09-20 11:18:35 -04:00
|
|
|
tasklet_set_tid(check->wait_list.tasklet, tid);
|
|
|
|
|
|
2014-06-13 03:18:16 -04:00
|
|
|
|
2019-07-17 13:04:47 -04:00
|
|
|
if (!sockaddr_alloc(&conn->dst))
|
|
|
|
|
return SF_ERR_RESOURCE;
|
|
|
|
|
|
2015-01-29 21:22:56 -05:00
|
|
|
if (is_addr(&check->addr)) {
|
2014-06-13 03:18:16 -04:00
|
|
|
/* we'll connect to the check addr specified on the server */
|
2019-07-17 10:54:52 -04:00
|
|
|
*conn->dst = check->addr;
|
2014-06-13 03:18:16 -04:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* we'll connect to the addr on the server */
|
2019-07-17 10:54:52 -04:00
|
|
|
*conn->dst = s->addr;
|
2014-06-13 03:18:16 -04:00
|
|
|
}
|
|
|
|
|
|
2019-05-22 07:44:48 -04:00
|
|
|
if (s->check.via_socks4 && (s->flags & SRV_F_SOCKS4_PROXY)) {
|
|
|
|
|
conn->send_proxy_ofs = 1;
|
|
|
|
|
conn->flags |= CO_FL_SOCKS4;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-17 10:54:52 -04:00
|
|
|
proto = protocol_by_family(conn->dst->ss_family);
|
2017-12-01 16:04:05 -05:00
|
|
|
conn->target = &s->obj_type;
|
|
|
|
|
|
2019-07-17 10:54:52 -04:00
|
|
|
if ((conn->dst->ss_family == AF_INET) || (conn->dst->ss_family == AF_INET6)) {
|
2016-06-13 08:15:41 -04:00
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
i = srv_check_healthcheck_port(check);
|
2017-12-01 16:04:05 -05:00
|
|
|
if (i == 0)
|
2016-06-13 08:15:41 -04:00
|
|
|
return SF_ERR_CHK_PORT;
|
|
|
|
|
|
2019-07-17 10:54:52 -04:00
|
|
|
set_host_port(conn->dst, i);
|
2014-06-13 03:18:16 -04:00
|
|
|
}
|
|
|
|
|
|
2015-01-14 05:31:49 -05:00
|
|
|
/* no client address */
|
|
|
|
|
|
2018-09-06 05:45:30 -04:00
|
|
|
conn_prepare(conn, proto, check->xprt);
|
2019-01-29 09:47:43 -05:00
|
|
|
if (conn_install_mux(conn, &mux_pt_ops, cs, s->proxy, NULL) < 0)
|
|
|
|
|
return SF_ERR_RESOURCE;
|
2018-09-06 05:45:30 -04:00
|
|
|
cs_attach(cs, check, &check_conn_cb);
|
|
|
|
|
|
2020-03-26 12:38:49 -04:00
|
|
|
/* only plain tcp check supports quick ACK */
|
|
|
|
|
connflags |= (check->type ? CONNECT_HAS_DATA : CONNECT_DELACK_ALWAYS);
|
2014-06-13 03:18:16 -04:00
|
|
|
|
2015-04-02 19:14:29 -04:00
|
|
|
ret = SF_ERR_INTERNAL;
|
2017-08-04 12:39:01 -04:00
|
|
|
if (proto && proto->connect)
|
2019-05-06 12:32:29 -04:00
|
|
|
ret = proto->connect(conn, connflags);
|
2017-11-02 10:45:00 -04:00
|
|
|
|
|
|
|
|
|
2017-10-17 11:33:43 -04:00
|
|
|
#ifdef USE_OPENSSL
|
2019-01-29 10:37:52 -05:00
|
|
|
if (ret == SF_ERR_NONE) {
|
|
|
|
|
if (s->check.sni)
|
|
|
|
|
ssl_sock_set_servername(conn, s->check.sni);
|
|
|
|
|
if (s->check.alpn_str)
|
|
|
|
|
ssl_sock_set_alpn(conn, (unsigned char *)s->check.alpn_str,
|
|
|
|
|
s->check.alpn_len);
|
|
|
|
|
}
|
2017-10-17 11:33:43 -04:00
|
|
|
#endif
|
2017-05-06 02:45:28 -04:00
|
|
|
if (s->check.send_proxy && !(check->state & CHK_ST_AGENT)) {
|
2014-06-13 03:18:16 -04:00
|
|
|
conn->send_proxy_ofs = 1;
|
|
|
|
|
conn->flags |= CO_FL_SEND_PROXY;
|
2019-12-30 09:13:42 -05:00
|
|
|
}
|
|
|
|
|
if (conn->flags & (CO_FL_SEND_PROXY | CO_FL_SOCKS4) &&
|
|
|
|
|
conn_ctrl_ready(conn)) {
|
2019-05-27 06:09:19 -04:00
|
|
|
if (xprt_add_hs(conn) < 0)
|
|
|
|
|
ret = SF_ERR_RESOURCE;
|
2014-06-13 03:18:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-19 23:30:16 -04:00
|
|
|
static struct list pid_list = LIST_HEAD_INIT(pid_list);
|
2017-11-24 11:34:44 -05:00
|
|
|
static struct pool_head *pool_head_pid_list;
|
2018-11-25 14:12:18 -05:00
|
|
|
__decl_spinlock(pid_list_lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
void block_sigchld(void)
|
|
|
|
|
{
|
|
|
|
|
sigset_t set;
|
|
|
|
|
sigemptyset(&set);
|
|
|
|
|
sigaddset(&set, SIGCHLD);
|
2018-06-07 05:23:40 -04:00
|
|
|
assert(ha_sigmask(SIG_BLOCK, &set, NULL) == 0);
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void unblock_sigchld(void)
|
|
|
|
|
{
|
|
|
|
|
sigset_t set;
|
|
|
|
|
sigemptyset(&set);
|
2016-06-21 11:29:46 -04:00
|
|
|
sigaddset(&set, SIGCHLD);
|
2018-06-07 05:23:40 -04:00
|
|
|
assert(ha_sigmask(SIG_UNBLOCK, &set, NULL) == 0);
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct pid_list *pid_list_add(pid_t pid, struct task *t)
|
|
|
|
|
{
|
|
|
|
|
struct pid_list *elem;
|
|
|
|
|
struct check *check = t->context;
|
|
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
elem = pool_alloc(pool_head_pid_list);
|
2014-06-19 23:30:16 -04:00
|
|
|
if (!elem)
|
|
|
|
|
return NULL;
|
|
|
|
|
elem->pid = pid;
|
|
|
|
|
elem->t = t;
|
|
|
|
|
elem->exited = 0;
|
|
|
|
|
check->curpid = elem;
|
|
|
|
|
LIST_INIT(&elem->list);
|
2017-10-20 09:40:23 -04:00
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
LIST_ADD(&pid_list, &elem->list);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
|
2017-10-20 09:40:23 -04:00
|
|
|
|
2014-06-19 23:30:16 -04:00
|
|
|
return elem;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void pid_list_del(struct pid_list *elem)
|
|
|
|
|
{
|
|
|
|
|
struct check *check;
|
|
|
|
|
|
|
|
|
|
if (!elem)
|
|
|
|
|
return;
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
LIST_DEL(&elem->list);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
|
2017-10-20 09:40:23 -04:00
|
|
|
|
2014-06-19 23:30:16 -04:00
|
|
|
if (!elem->exited)
|
|
|
|
|
kill(elem->pid, SIGTERM);
|
|
|
|
|
|
|
|
|
|
check = elem->t->context;
|
|
|
|
|
check->curpid = NULL;
|
2017-11-24 11:34:44 -05:00
|
|
|
pool_free(pool_head_pid_list, elem);
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Called from inside SIGCHLD handler, SIGCHLD is blocked */
|
|
|
|
|
static void pid_list_expire(pid_t pid, int status)
|
|
|
|
|
{
|
|
|
|
|
struct pid_list *elem;
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
list_for_each_entry(elem, &pid_list, list) {
|
|
|
|
|
if (elem->pid == pid) {
|
|
|
|
|
elem->t->expire = now_ms;
|
|
|
|
|
elem->status = status;
|
|
|
|
|
elem->exited = 1;
|
2014-08-06 19:55:39 -04:00
|
|
|
task_wakeup(elem->t, TASK_WOKEN_IO);
|
2017-10-20 09:40:23 -04:00
|
|
|
break;
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
|
2016-06-21 10:27:34 -04:00
|
|
|
static void sigchld_handler(struct sig_handler *sh)
|
2014-06-19 23:30:16 -04:00
|
|
|
{
|
|
|
|
|
pid_t pid;
|
|
|
|
|
int status;
|
2016-06-21 10:27:34 -04:00
|
|
|
|
2014-06-19 23:30:16 -04:00
|
|
|
while ((pid = waitpid(0, &status, WNOHANG)) > 0)
|
|
|
|
|
pid_list_expire(pid, status);
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-21 10:27:34 -04:00
|
|
|
static int init_pid_list(void)
|
|
|
|
|
{
|
2017-11-24 11:34:44 -05:00
|
|
|
if (pool_head_pid_list != NULL)
|
2014-06-19 23:30:16 -04:00
|
|
|
/* Nothing to do */
|
|
|
|
|
return 0;
|
|
|
|
|
|
2016-06-21 10:27:34 -04:00
|
|
|
if (!signal_register_fct(SIGCHLD, sigchld_handler, SIGCHLD)) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Failed to set signal handler for external health checks: %s. Aborting.\n",
|
|
|
|
|
strerror(errno));
|
2014-06-19 23:30:16 -04:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
pool_head_pid_list = create_pool("pid_list", sizeof(struct pid_list), MEM_F_SHARED);
|
|
|
|
|
if (pool_head_pid_list == NULL) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Failed to allocate memory pool for external health checks: %s. Aborting.\n",
|
|
|
|
|
strerror(errno));
|
2014-06-19 23:30:16 -04:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-27 16:28:38 -05:00
|
|
|
/* helper macro to set an environment variable and jump to a specific label on failure. */
|
|
|
|
|
#define EXTCHK_SETENV(check, envidx, value, fail) { if (extchk_setenv(check, envidx, value)) goto fail; }
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
|
|
|
|
|
/*
|
2014-12-27 16:28:38 -05:00
|
|
|
* helper function to allocate enough memory to store an environment variable.
|
|
|
|
|
* It will also check that the environment variable is updatable, and silently
|
|
|
|
|
* fail if not.
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
*/
|
2014-12-27 16:28:38 -05:00
|
|
|
static int extchk_setenv(struct check *check, int idx, const char *value)
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
{
|
|
|
|
|
int len, ret;
|
2014-12-27 16:28:38 -05:00
|
|
|
char *envname;
|
|
|
|
|
int vmaxlen;
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
|
2014-12-27 16:28:38 -05:00
|
|
|
if (idx < 0 || idx >= EXTCHK_SIZE) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Illegal environment variable index %d. Aborting.\n", idx);
|
2014-12-27 16:28:38 -05:00
|
|
|
return 1;
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
}
|
2014-12-27 16:28:38 -05:00
|
|
|
|
|
|
|
|
envname = extcheck_envs[idx].name;
|
|
|
|
|
vmaxlen = extcheck_envs[idx].vmaxlen;
|
|
|
|
|
|
|
|
|
|
/* Check if the environment variable is already set, and silently reject
|
|
|
|
|
* the update if this one is not updatable. */
|
|
|
|
|
if ((vmaxlen == EXTCHK_SIZE_EVAL_INIT) && (check->envp[idx]))
|
|
|
|
|
return 0;
|
|
|
|
|
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
/* Instead of sending NOT_USED, sending an empty value is preferable */
|
|
|
|
|
if (strcmp(value, "NOT_USED") == 0) {
|
|
|
|
|
value = "";
|
|
|
|
|
}
|
2014-12-27 16:28:38 -05:00
|
|
|
|
|
|
|
|
len = strlen(envname) + 1;
|
|
|
|
|
if (vmaxlen == EXTCHK_SIZE_EVAL_INIT)
|
|
|
|
|
len += strlen(value);
|
|
|
|
|
else
|
|
|
|
|
len += vmaxlen;
|
|
|
|
|
|
|
|
|
|
if (!check->envp[idx])
|
|
|
|
|
check->envp[idx] = malloc(len + 1);
|
|
|
|
|
|
|
|
|
|
if (!check->envp[idx]) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Failed to allocate memory for the environment variable '%s'. Aborting.\n", envname);
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
2014-12-27 16:28:38 -05:00
|
|
|
ret = snprintf(check->envp[idx], len + 1, "%s=%s", envname, value);
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
if (ret < 0) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Failed to store the environment variable '%s'. Reason : %s. Aborting.\n", envname, strerror(errno));
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
2014-12-27 16:28:38 -05:00
|
|
|
else if (ret > len) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Environment variable '%s' was truncated. Aborting.\n", envname);
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
static int prepare_external_check(struct check *check)
|
|
|
|
|
{
|
|
|
|
|
struct server *s = check->server;
|
|
|
|
|
struct proxy *px = s->proxy;
|
|
|
|
|
struct listener *listener = NULL, *l;
|
|
|
|
|
int i;
|
|
|
|
|
const char *path = px->check_path ? px->check_path : DEF_CHECK_PATH;
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
char buf[256];
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
list_for_each_entry(l, &px->conf.listeners, by_fe)
|
|
|
|
|
/* Use the first INET, INET6 or UNIX listener */
|
|
|
|
|
if (l->addr.ss_family == AF_INET ||
|
|
|
|
|
l->addr.ss_family == AF_INET6 ||
|
|
|
|
|
l->addr.ss_family == AF_UNIX) {
|
|
|
|
|
listener = l;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check->curpid = NULL;
|
2014-12-27 16:28:38 -05:00
|
|
|
check->envp = calloc((EXTCHK_SIZE + 1), sizeof(char *));
|
|
|
|
|
if (!check->envp) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Failed to allocate memory for environment variables. Aborting\n");
|
2014-12-27 16:28:38 -05:00
|
|
|
goto err;
|
|
|
|
|
}
|
2014-06-19 23:30:16 -04:00
|
|
|
|
2014-12-27 16:28:38 -05:00
|
|
|
check->argv = calloc(6, sizeof(char *));
|
|
|
|
|
if (!check->argv) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
|
2014-06-19 23:30:16 -04:00
|
|
|
goto err;
|
2014-12-27 16:28:38 -05:00
|
|
|
}
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
check->argv[0] = px->check_command;
|
|
|
|
|
|
2014-12-02 15:21:35 -05:00
|
|
|
if (!listener) {
|
|
|
|
|
check->argv[1] = strdup("NOT_USED");
|
|
|
|
|
check->argv[2] = strdup("NOT_USED");
|
|
|
|
|
}
|
|
|
|
|
else if (listener->addr.ss_family == AF_INET ||
|
2014-06-19 23:30:16 -04:00
|
|
|
listener->addr.ss_family == AF_INET6) {
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
addr_to_str(&listener->addr, buf, sizeof(buf));
|
|
|
|
|
check->argv[1] = strdup(buf);
|
|
|
|
|
port_to_str(&listener->addr, buf, sizeof(buf));
|
|
|
|
|
check->argv[2] = strdup(buf);
|
2014-12-02 15:21:35 -05:00
|
|
|
}
|
|
|
|
|
else if (listener->addr.ss_family == AF_UNIX) {
|
2014-06-19 23:30:16 -04:00
|
|
|
const struct sockaddr_un *un;
|
|
|
|
|
|
|
|
|
|
un = (struct sockaddr_un *)&listener->addr;
|
|
|
|
|
check->argv[1] = strdup(un->sun_path);
|
|
|
|
|
check->argv[2] = strdup("NOT_USED");
|
2014-12-02 15:21:35 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Starting [%s:%s] check: unsupported address family.\n", px->id, s->id);
|
2014-06-19 23:30:16 -04:00
|
|
|
goto err;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-26 03:50:31 -04:00
|
|
|
if (!check->argv[1] || !check->argv[2]) {
|
|
|
|
|
ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
|
|
|
|
|
goto err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check->argv[3] = calloc(EXTCHK_SIZE_ADDR, sizeof(*check->argv[3]));
|
|
|
|
|
check->argv[4] = calloc(EXTCHK_SIZE_UINT, sizeof(*check->argv[4]));
|
|
|
|
|
if (!check->argv[3] || !check->argv[4]) {
|
|
|
|
|
ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
|
|
|
|
|
goto err;
|
|
|
|
|
}
|
2017-01-06 11:41:29 -05:00
|
|
|
|
2020-04-26 03:50:31 -04:00
|
|
|
addr_to_str(&s->addr, check->argv[3], EXTCHK_SIZE_ADDR);
|
2017-01-06 11:41:29 -05:00
|
|
|
if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
|
2020-04-26 03:50:31 -04:00
|
|
|
snprintf(check->argv[4], EXTCHK_SIZE_UINT, "%u", s->svc_port);
|
2014-06-19 23:30:16 -04:00
|
|
|
|
2014-12-27 16:28:38 -05:00
|
|
|
for (i = 0; i < 5; i++) {
|
|
|
|
|
if (!check->argv[i]) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
|
2014-06-19 23:30:16 -04:00
|
|
|
goto err;
|
2014-12-27 16:28:38 -05:00
|
|
|
}
|
|
|
|
|
}
|
2014-06-19 23:30:16 -04:00
|
|
|
|
2014-12-27 16:28:38 -05:00
|
|
|
EXTCHK_SETENV(check, EXTCHK_PATH, path, err);
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
/* Add proxy environment variables */
|
2014-12-27 16:28:38 -05:00
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_NAME, px->id, err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ID, ultoa_r(px->uuid, buf, sizeof(buf)), err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ADDR, check->argv[1], err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_PORT, check->argv[2], err);
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
/* Add server environment variables */
|
2014-12-27 16:28:38 -05:00
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_NAME, s->id, err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ID, ultoa_r(s->puid, buf, sizeof(buf)), err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_MAXCONN, ultoa_r(s->maxconn, buf, sizeof(buf)), err);
|
|
|
|
|
EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), err);
|
|
|
|
|
|
|
|
|
|
/* Ensure that we don't leave any hole in check->envp */
|
|
|
|
|
for (i = 0; i < EXTCHK_SIZE; i++)
|
|
|
|
|
if (!check->envp[i])
|
|
|
|
|
EXTCHK_SETENV(check, i, "", err);
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
|
2014-08-06 19:55:38 -04:00
|
|
|
return 1;
|
2014-06-19 23:30:16 -04:00
|
|
|
err:
|
|
|
|
|
if (check->envp) {
|
2014-12-27 16:28:38 -05:00
|
|
|
for (i = 0; i < EXTCHK_SIZE; i++)
|
MEDIUM: checks: provide environment variables to the external checks
The external command accepted 4 arguments, some with the value "NOT_USED" when
not applicable. In order to make the exernal command more generic, this patch
also provides the values in environment variables. This allows to provide more
information.
Currently, the supported environment variables are :
PATH, as previously provided.
HAPROXY_PROXY_NAME, the backend name
HAPROXY_PROXY_ID, the backend id
HAPROXY_PROXY_ADDR, the first bind address if available (or empty)
HAPROXY_PROXY_PORT, the first bind port if available (or empty)
HAPROXY_SERVER_NAME, the server name
HAPROXY_SERVER_ID, the server id
HAPROXY_SERVER_ADDR, the server address
HAPROXY_SERVER_PORT, the server port if available (or empty)
HAPROXY_SERVER_MAXCONN, the server max connections
HAPROXY_SERVER_CURCONN, the current number of connections on the server
2014-12-02 15:21:36 -05:00
|
|
|
free(check->envp[i]);
|
2014-06-19 23:30:16 -04:00
|
|
|
free(check->envp);
|
|
|
|
|
check->envp = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (check->argv) {
|
|
|
|
|
for (i = 1; i < 5; i++)
|
|
|
|
|
free(check->argv[i]);
|
|
|
|
|
free(check->argv);
|
|
|
|
|
check->argv = NULL;
|
|
|
|
|
}
|
2014-08-06 19:55:38 -04:00
|
|
|
return 0;
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
/*
|
2014-06-19 23:30:16 -04:00
|
|
|
* establish a server health-check that makes use of a process.
|
|
|
|
|
*
|
|
|
|
|
* It can return one of :
|
2015-04-02 19:14:29 -04:00
|
|
|
* - SF_ERR_NONE if everything's OK
|
|
|
|
|
* - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
|
2016-11-28 20:15:19 -05:00
|
|
|
* Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
|
2014-06-19 23:30:16 -04:00
|
|
|
*
|
|
|
|
|
* Blocks and then unblocks SIGCHLD
|
|
|
|
|
*/
|
|
|
|
|
static int connect_proc_chk(struct task *t)
|
|
|
|
|
{
|
2014-12-27 16:28:38 -05:00
|
|
|
char buf[256];
|
2014-06-19 23:30:16 -04:00
|
|
|
struct check *check = t->context;
|
|
|
|
|
struct server *s = check->server;
|
|
|
|
|
struct proxy *px = s->proxy;
|
|
|
|
|
int status;
|
|
|
|
|
pid_t pid;
|
|
|
|
|
|
2015-04-02 19:14:29 -04:00
|
|
|
status = SF_ERR_RESOURCE;
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
block_sigchld();
|
|
|
|
|
|
|
|
|
|
pid = fork();
|
|
|
|
|
if (pid < 0) {
|
MEDIUM: init: prevent process and thread creation at runtime
Some concerns are regularly raised about the risk to inherit some Lua
files which make use of a fork (e.g. via os.execute()) as well as
whether or not some of bugs we fix might or not be exploitable to run
some code. Given that haproxy is event-driven, any foreground activity
completely stops processing and is easy to detect, but background
activity is a different story. A Lua script could very well discretely
fork a sub-process connecting to a remote location and taking commands,
and some injected code could also try to hide its activity by creating
a process or a thread without blocking the rest of the processing. While
such activities should be extremely limited when run in an empty chroot
without any permission, it would be better to get a higher assurance
they cannot happen.
This patch introduces something very simple: it limits the number of
processes and threads to zero in the workers after the last thread was
created. By doing so, it effectively instructs the system to fail on
any fork() or clone() syscall. Thus any undesired activity has to happen
in the foreground and is way easier to detect.
This will obviously break external checks (whose concept is already
totally insecure), and for this reason a new option
"insecure-fork-wanted" was added to disable this protection, and it
is suggested in the fork() error report from the checks. It is
obviously recommended not to use it and to reconsider the reasons
leading to it being enabled in the first place.
If for any reason we fail to disable forks, we still start because it
could be imaginable that some operating systems refuse to set this
limit to zero, but in this case we emit a warning, that may or may not
be reported since we're after the fork point. Ideally over the long
term it should be conditionned by strict-limits and cause a hard fail.
2019-12-03 01:07:36 -05:00
|
|
|
ha_alert("Failed to fork process for external health check%s: %s. Aborting.\n",
|
|
|
|
|
(global.tune.options & GTUNE_INSECURE_FORK) ?
|
|
|
|
|
"" : " (likely caused by missing 'insecure-fork-wanted')",
|
2017-11-24 10:50:31 -05:00
|
|
|
strerror(errno));
|
2014-06-19 23:30:16 -04:00
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
if (pid == 0) {
|
|
|
|
|
/* Child */
|
|
|
|
|
extern char **environ;
|
2019-03-01 05:15:10 -05:00
|
|
|
struct rlimit limit;
|
BUG/MEDIUM: external-checks: close all FDs right after the fork()
Lukas Erlacher reported an interesting problem : since we don't close
FDs after the fork() upon external checks, any script executed that
writes data on stdout/stderr will possibly send its data to wrong
places, very likely an existing connection.
After some analysis, the problem is even wider. It's not enough to
just close stdin/stdout/stderr, as all sockets are passed to the
sub-process, and as long as they're not closed, they are usable for
whatever mistake can be done. Furthermore with epoll, such FDs will
continue to be reported after a close() as the underlying file is
not closed yet.
CLOEXEC would be an acceptable workaround except that 1) it adds an
extra syscall on the fast path, and 2) we have no control over FDs
created by external libs (eg: openssl using /dev/crypto, libc using
/dev/random, lua using anything else), so in the end we still need
to close them all.
On some BSD systems there's a closefrom() syscall which could be
very useful for this.
Based on an insightful idea from Simon Horman, we don't close 0/1/2
when we're in verbose mode since they're properly connected to
stdin/stdout/stderr and can become quite useful during debugging
sessions to detect some script output errors or execve() failures.
This fix must be backported to 1.6.
2016-06-21 09:32:29 -04:00
|
|
|
int fd;
|
|
|
|
|
|
|
|
|
|
/* close all FDs. Keep stdin/stdout/stderr in verbose mode */
|
|
|
|
|
fd = (global.mode & (MODE_QUIET|MODE_VERBOSE)) == MODE_QUIET ? 0 : 3;
|
|
|
|
|
|
2019-02-21 16:22:06 -05:00
|
|
|
my_closefrom(fd);
|
BUG/MEDIUM: external-checks: close all FDs right after the fork()
Lukas Erlacher reported an interesting problem : since we don't close
FDs after the fork() upon external checks, any script executed that
writes data on stdout/stderr will possibly send its data to wrong
places, very likely an existing connection.
After some analysis, the problem is even wider. It's not enough to
just close stdin/stdout/stderr, as all sockets are passed to the
sub-process, and as long as they're not closed, they are usable for
whatever mistake can be done. Furthermore with epoll, such FDs will
continue to be reported after a close() as the underlying file is
not closed yet.
CLOEXEC would be an acceptable workaround except that 1) it adds an
extra syscall on the fast path, and 2) we have no control over FDs
created by external libs (eg: openssl using /dev/crypto, libc using
/dev/random, lua using anything else), so in the end we still need
to close them all.
On some BSD systems there's a closefrom() syscall which could be
very useful for this.
Based on an insightful idea from Simon Horman, we don't close 0/1/2
when we're in verbose mode since they're properly connected to
stdin/stdout/stderr and can become quite useful during debugging
sessions to detect some script output errors or execve() failures.
This fix must be backported to 1.6.
2016-06-21 09:32:29 -04:00
|
|
|
|
2019-03-01 05:15:10 -05:00
|
|
|
/* restore the initial FD limits */
|
|
|
|
|
limit.rlim_cur = rlim_fd_cur_at_boot;
|
|
|
|
|
limit.rlim_max = rlim_fd_max_at_boot;
|
|
|
|
|
if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
|
|
|
|
|
getrlimit(RLIMIT_NOFILE, &limit);
|
|
|
|
|
ha_warning("External check: failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
|
|
|
|
|
rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
|
|
|
|
|
(unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-19 23:30:16 -04:00
|
|
|
environ = check->envp;
|
2020-04-26 03:50:31 -04:00
|
|
|
|
|
|
|
|
/* Update some environment variables and command args: curconn, server addr and server port */
|
2014-12-27 16:28:38 -05:00
|
|
|
extchk_setenv(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)));
|
2020-04-26 03:50:31 -04:00
|
|
|
|
|
|
|
|
addr_to_str(&s->addr, check->argv[3], EXTCHK_SIZE_ADDR);
|
|
|
|
|
extchk_setenv(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3]);
|
|
|
|
|
|
|
|
|
|
*check->argv[4] = 0;
|
|
|
|
|
if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
|
|
|
|
|
snprintf(check->argv[4], EXTCHK_SIZE_UINT, "%u", s->svc_port);
|
|
|
|
|
extchk_setenv(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4]);
|
|
|
|
|
|
2019-07-01 01:51:29 -04:00
|
|
|
haproxy_unblock_signals();
|
2014-06-19 23:30:16 -04:00
|
|
|
execvp(px->check_command, check->argv);
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Failed to exec process for external health check: %s. Aborting.\n",
|
|
|
|
|
strerror(errno));
|
2014-06-19 23:30:16 -04:00
|
|
|
exit(-1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parent */
|
|
|
|
|
if (check->result == CHK_RES_UNKNOWN) {
|
|
|
|
|
if (pid_list_add(pid, t) != NULL) {
|
|
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
|
|
|
|
|
|
|
|
|
|
if (px->timeout.check && px->timeout.connect) {
|
|
|
|
|
int t_con = tick_add(now_ms, px->timeout.connect);
|
|
|
|
|
t->expire = tick_first(t->expire, t_con);
|
|
|
|
|
}
|
2015-04-02 19:14:29 -04:00
|
|
|
status = SF_ERR_NONE;
|
2014-06-19 23:30:16 -04:00
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
|
|
|
|
|
}
|
|
|
|
|
kill(pid, SIGTERM); /* process creation error */
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
unblock_sigchld();
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2017-10-04 09:07:02 -04:00
|
|
|
* manages a server health-check that uses an external process. Returns
|
2006-06-25 20:48:02 -04:00
|
|
|
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
2017-11-05 04:11:13 -05:00
|
|
|
*
|
|
|
|
|
* Please do NOT place any return statement in this function and only leave
|
|
|
|
|
* via the out_unlock label.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2018-05-25 08:04:04 -04:00
|
|
|
static struct task *process_chk_proc(struct task *t, void *context, unsigned short state)
|
2014-06-19 23:30:16 -04:00
|
|
|
{
|
2018-05-25 08:04:04 -04:00
|
|
|
struct check *check = context;
|
2014-06-19 23:30:16 -04:00
|
|
|
struct server *s = check->server;
|
|
|
|
|
int rv;
|
|
|
|
|
int ret;
|
|
|
|
|
int expired = tick_is_expired(t->expire, now_ms);
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
if (!(check->state & CHK_ST_INPROGRESS)) {
|
|
|
|
|
/* no check currently running */
|
2017-11-05 04:11:13 -05:00
|
|
|
if (!expired) /* woke up too early */
|
|
|
|
|
goto out_unlock;
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
/* we don't send any health-checks when the proxy is
|
|
|
|
|
* stopped, the server should not be checked or the check
|
|
|
|
|
* is disabled.
|
|
|
|
|
*/
|
|
|
|
|
if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
|
|
|
|
|
s->proxy->state == PR_STSTOPPED)
|
|
|
|
|
goto reschedule;
|
|
|
|
|
|
|
|
|
|
/* we'll initiate a new check */
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_START, NULL);
|
|
|
|
|
|
|
|
|
|
check->state |= CHK_ST_INPROGRESS;
|
|
|
|
|
|
2015-01-29 21:22:53 -05:00
|
|
|
ret = connect_proc_chk(t);
|
2017-10-04 09:19:26 -04:00
|
|
|
if (ret == SF_ERR_NONE) {
|
2017-10-04 09:07:02 -04:00
|
|
|
/* the process was forked, we allow up to min(inter,
|
|
|
|
|
* timeout.connect) for it to report its status, but
|
|
|
|
|
* only when timeout.check is set as it may be to short
|
|
|
|
|
* for a full check otherwise.
|
2014-06-19 23:30:16 -04:00
|
|
|
*/
|
|
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
|
|
|
|
|
|
|
|
|
|
if (s->proxy->timeout.check && s->proxy->timeout.connect) {
|
|
|
|
|
int t_con = tick_add(now_ms, s->proxy->timeout.connect);
|
|
|
|
|
t->expire = tick_first(t->expire, t_con);
|
|
|
|
|
}
|
2017-10-20 09:41:18 -04:00
|
|
|
task_set_affinity(t, tid_bit);
|
2014-06-19 23:30:16 -04:00
|
|
|
goto reschedule;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-04 09:07:02 -04:00
|
|
|
/* here, we failed to start the check */
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
check->state &= ~CHK_ST_INPROGRESS;
|
|
|
|
|
check_notify_failure(check);
|
|
|
|
|
|
|
|
|
|
/* we allow up to min(inter, timeout.connect) for a connection
|
|
|
|
|
* to establish but only when timeout.check is set
|
|
|
|
|
* as it may be to short for a full check otherwise
|
|
|
|
|
*/
|
|
|
|
|
while (tick_is_expired(t->expire, now_ms)) {
|
|
|
|
|
int t_con;
|
|
|
|
|
|
|
|
|
|
t_con = tick_add(t->expire, s->proxy->timeout.connect);
|
|
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
|
|
|
|
|
|
|
|
|
|
if (s->proxy->timeout.check)
|
|
|
|
|
t->expire = tick_first(t->expire, t_con);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* there was a test running.
|
|
|
|
|
* First, let's check whether there was an uncaught error,
|
|
|
|
|
* which can happen on connect timeout or error.
|
|
|
|
|
*/
|
|
|
|
|
if (check->result == CHK_RES_UNKNOWN) {
|
|
|
|
|
/* good connection is enough for pure TCP check */
|
|
|
|
|
struct pid_list *elem = check->curpid;
|
|
|
|
|
int status = HCHK_STATUS_UNKNOWN;
|
|
|
|
|
|
|
|
|
|
if (elem->exited) {
|
|
|
|
|
status = elem->status; /* Save in case the process exits between use below */
|
|
|
|
|
if (!WIFEXITED(status))
|
|
|
|
|
check->code = -1;
|
|
|
|
|
else
|
|
|
|
|
check->code = WEXITSTATUS(status);
|
|
|
|
|
if (!WIFEXITED(status) || WEXITSTATUS(status))
|
|
|
|
|
status = HCHK_STATUS_PROCERR;
|
|
|
|
|
else
|
|
|
|
|
status = HCHK_STATUS_PROCOK;
|
|
|
|
|
} else if (expired) {
|
|
|
|
|
status = HCHK_STATUS_PROCTOUT;
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_warning("kill %d\n", (int)elem->pid);
|
2014-06-19 23:30:16 -04:00
|
|
|
kill(elem->pid, SIGTERM);
|
|
|
|
|
}
|
|
|
|
|
set_server_check_status(check, status, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (check->result == CHK_RES_FAILED) {
|
|
|
|
|
/* a failure or timeout detected */
|
|
|
|
|
check_notify_failure(check);
|
|
|
|
|
}
|
|
|
|
|
else if (check->result == CHK_RES_CONDPASS) {
|
|
|
|
|
/* check is OK but asks for stopping mode */
|
|
|
|
|
check_notify_stopping(check);
|
|
|
|
|
}
|
|
|
|
|
else if (check->result == CHK_RES_PASSED) {
|
|
|
|
|
/* a success was detected */
|
|
|
|
|
check_notify_success(check);
|
|
|
|
|
}
|
2019-09-03 12:55:02 -04:00
|
|
|
task_set_affinity(t, 1);
|
2014-06-19 23:30:16 -04:00
|
|
|
check->state &= ~CHK_ST_INPROGRESS;
|
|
|
|
|
|
|
|
|
|
pid_list_del(check->curpid);
|
|
|
|
|
|
|
|
|
|
rv = 0;
|
|
|
|
|
if (global.spread_checks > 0) {
|
|
|
|
|
rv = srv_getinter(check) * global.spread_checks / 100;
|
2020-03-08 12:53:53 -04:00
|
|
|
rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reschedule:
|
|
|
|
|
while (tick_is_expired(t->expire, now_ms))
|
|
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
|
2017-11-05 04:11:13 -05:00
|
|
|
|
|
|
|
|
out_unlock:
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
|
2014-06-19 23:30:16 -04:00
|
|
|
return t;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* manages a server health-check that uses a connection. Returns
|
|
|
|
|
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
2017-11-05 04:11:13 -05:00
|
|
|
*
|
|
|
|
|
* Please do NOT place any return statement in this function and only leave
|
|
|
|
|
* via the out_unlock label.
|
2014-06-19 23:30:16 -04:00
|
|
|
*/
|
2018-05-25 08:04:04 -04:00
|
|
|
static struct task *process_chk_conn(struct task *t, void *context, unsigned short state)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2018-05-25 08:04:04 -04:00
|
|
|
struct check *check = context;
|
2019-01-11 12:17:17 -05:00
|
|
|
struct proxy *proxy = check->proxy;
|
2017-09-13 12:30:23 -04:00
|
|
|
struct conn_stream *cs = check->cs;
|
|
|
|
|
struct connection *conn = cs_conn(cs);
|
2007-10-14 17:40:01 -04:00
|
|
|
int rv;
|
2012-09-28 08:40:02 -04:00
|
|
|
int ret;
|
2012-11-23 08:02:10 -05:00
|
|
|
int expired = tick_is_expired(t->expire, now_ms);
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
|
2013-12-11 13:41:16 -05:00
|
|
|
if (!(check->state & CHK_ST_INPROGRESS)) {
|
2012-11-23 06:47:05 -05:00
|
|
|
/* no check currently running */
|
2017-11-05 04:11:13 -05:00
|
|
|
if (!expired) /* woke up too early */
|
|
|
|
|
goto out_unlock;
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2013-11-24 20:46:39 -05:00
|
|
|
/* we don't send any health-checks when the proxy is
|
|
|
|
|
* stopped, the server should not be checked or the check
|
|
|
|
|
* is disabled.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2013-12-11 15:26:24 -05:00
|
|
|
if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
|
2019-01-11 12:17:17 -05:00
|
|
|
proxy->state == PR_STSTOPPED)
|
2012-11-23 06:47:05 -05:00
|
|
|
goto reschedule;
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
/* we'll initiate a new check */
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_START, NULL);
|
2012-09-28 09:28:30 -04:00
|
|
|
|
2013-12-11 13:41:16 -05:00
|
|
|
check->state |= CHK_ST_INPROGRESS;
|
2018-07-10 11:43:27 -04:00
|
|
|
b_reset(&check->bi);
|
|
|
|
|
b_reset(&check->bo);
|
2012-09-28 09:28:30 -04:00
|
|
|
|
2019-11-29 10:18:51 -05:00
|
|
|
task_set_affinity(t, tid_bit);
|
2015-01-29 21:22:53 -05:00
|
|
|
ret = connect_conn_chk(t);
|
2017-09-13 12:30:23 -04:00
|
|
|
cs = check->cs;
|
|
|
|
|
conn = cs_conn(cs);
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
|
2012-09-28 08:40:02 -04:00
|
|
|
switch (ret) {
|
2015-04-02 19:14:29 -04:00
|
|
|
case SF_ERR_UP:
|
2017-11-05 04:11:13 -05:00
|
|
|
goto out_unlock;
|
|
|
|
|
|
2015-04-02 19:14:29 -04:00
|
|
|
case SF_ERR_NONE:
|
2012-09-28 08:40:02 -04:00
|
|
|
/* we allow up to min(inter, timeout.connect) for a connection
|
|
|
|
|
* to establish but only when timeout.check is set
|
|
|
|
|
* as it may be to short for a full check otherwise
|
|
|
|
|
*/
|
2013-02-23 01:35:38 -05:00
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
|
2019-01-11 12:17:17 -05:00
|
|
|
if (proxy->timeout.check && proxy->timeout.connect) {
|
|
|
|
|
int t_con = tick_add(now_ms, proxy->timeout.connect);
|
2012-09-28 08:40:02 -04:00
|
|
|
t->expire = tick_first(t->expire, t_con);
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
2013-12-04 19:53:08 -05:00
|
|
|
|
2019-09-05 11:51:30 -04:00
|
|
|
if (check->type) {
|
|
|
|
|
/* send the request if we have one. We avoid receiving
|
|
|
|
|
* if not connected, unless we didn't subscribe for
|
|
|
|
|
* sending since otherwise we won't be woken up.
|
|
|
|
|
*/
|
|
|
|
|
__event_srv_chk_w(cs);
|
2020-01-23 10:27:54 -05:00
|
|
|
if (!(conn->flags & CO_FL_WAIT_XPRT) ||
|
2019-09-05 11:38:40 -04:00
|
|
|
!(check->wait_list.events & SUB_RETRY_SEND))
|
|
|
|
|
__event_srv_chk_r(cs);
|
2019-09-05 11:51:30 -04:00
|
|
|
}
|
2013-12-04 19:53:08 -05:00
|
|
|
|
2012-11-23 06:47:05 -05:00
|
|
|
goto reschedule;
|
|
|
|
|
|
2015-04-02 19:14:29 -04:00
|
|
|
case SF_ERR_SRVTO: /* ETIMEDOUT */
|
|
|
|
|
case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
if (conn)
|
|
|
|
|
conn->flags |= CO_FL_ERROR;
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, errno, 0);
|
2012-11-23 06:47:05 -05:00
|
|
|
break;
|
2016-06-13 08:15:41 -04:00
|
|
|
/* should share same code than cases below */
|
|
|
|
|
case SF_ERR_CHK_PORT:
|
|
|
|
|
check->state |= CHK_ST_PORT_MISS;
|
2015-04-02 19:14:29 -04:00
|
|
|
case SF_ERR_PRXCOND:
|
|
|
|
|
case SF_ERR_RESOURCE:
|
|
|
|
|
case SF_ERR_INTERNAL:
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
if (conn)
|
|
|
|
|
conn->flags |= CO_FL_ERROR;
|
|
|
|
|
chk_report_conn_err(check, conn ? 0 : ENOMEM, 0);
|
2012-11-23 06:47:05 -05:00
|
|
|
break;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2012-11-23 06:47:05 -05:00
|
|
|
/* here, we have seen a synchronous error, no fd was allocated */
|
2019-11-29 10:18:51 -05:00
|
|
|
task_set_affinity(t, MAX_THREADS_MASK);
|
2017-09-13 12:30:23 -04:00
|
|
|
if (cs) {
|
2019-05-31 13:20:36 -04:00
|
|
|
if (check->wait_list.events)
|
|
|
|
|
cs->conn->xprt->unsubscribe(cs->conn,
|
|
|
|
|
cs->conn->xprt_ctx,
|
|
|
|
|
check->wait_list.events,
|
|
|
|
|
&check->wait_list);
|
|
|
|
|
/* We may have been scheduled to run, and the
|
|
|
|
|
* I/O handler expects to have a cs, so remove
|
|
|
|
|
* the tasklet
|
|
|
|
|
*/
|
2019-06-14 08:47:49 -04:00
|
|
|
tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
|
2017-09-13 12:30:23 -04:00
|
|
|
cs_destroy(cs);
|
|
|
|
|
cs = check->cs = NULL;
|
|
|
|
|
conn = NULL;
|
2017-10-24 13:03:30 -04:00
|
|
|
}
|
2012-11-23 02:51:32 -05:00
|
|
|
|
2013-12-11 13:41:16 -05:00
|
|
|
check->state &= ~CHK_ST_INPROGRESS;
|
2014-05-20 16:32:27 -04:00
|
|
|
check_notify_failure(check);
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2008-01-20 19:54:06 -05:00
|
|
|
/* we allow up to min(inter, timeout.connect) for a connection
|
|
|
|
|
* to establish but only when timeout.check is set
|
|
|
|
|
* as it may be to short for a full check otherwise
|
|
|
|
|
*/
|
2008-07-06 18:09:58 -04:00
|
|
|
while (tick_is_expired(t->expire, now_ms)) {
|
|
|
|
|
int t_con;
|
2008-01-20 19:54:06 -05:00
|
|
|
|
2019-01-11 12:17:17 -05:00
|
|
|
t_con = tick_add(t->expire, proxy->timeout.connect);
|
2013-02-23 01:35:38 -05:00
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
|
2019-01-11 12:17:17 -05:00
|
|
|
if (proxy->timeout.check)
|
2008-07-06 18:09:58 -04:00
|
|
|
t->expire = tick_first(t->expire, t_con);
|
2008-01-20 19:54:06 -05:00
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
else {
|
2012-09-28 13:39:36 -04:00
|
|
|
/* there was a test running.
|
|
|
|
|
* First, let's check whether there was an uncaught error,
|
|
|
|
|
* which can happen on connect timeout or error.
|
|
|
|
|
*/
|
2014-06-19 23:29:47 -04:00
|
|
|
if (check->result == CHK_RES_UNKNOWN) {
|
2013-12-04 05:17:05 -05:00
|
|
|
/* good connection is enough for pure TCP check */
|
2020-01-23 10:27:54 -05:00
|
|
|
if (!(conn->flags & CO_FL_WAIT_XPRT) && !check->type) {
|
2020-03-27 13:55:49 -04:00
|
|
|
if (check->use_ssl == 1)
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L6OK, NULL);
|
2012-09-28 13:39:36 -04:00
|
|
|
else
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(check, HCHK_STATUS_L4OK, NULL);
|
2012-11-23 08:02:10 -05:00
|
|
|
}
|
2017-10-16 09:17:17 -04:00
|
|
|
else if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) {
|
2017-10-04 08:47:29 -04:00
|
|
|
chk_report_conn_err(check, 0, expired);
|
2012-09-28 13:39:36 -04:00
|
|
|
}
|
2012-11-23 08:43:49 -05:00
|
|
|
else
|
2017-11-05 04:11:13 -05:00
|
|
|
goto out_unlock; /* timeout not reached, wait again */
|
2012-09-28 13:39:36 -04:00
|
|
|
}
|
|
|
|
|
|
2012-11-23 08:43:49 -05:00
|
|
|
/* check complete or aborted */
|
2020-02-14 11:42:54 -05:00
|
|
|
|
|
|
|
|
check->current_step = NULL;
|
|
|
|
|
if (check->sess != NULL) {
|
|
|
|
|
session_free(check->sess);
|
|
|
|
|
check->sess = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
if (conn && conn->xprt) {
|
2013-02-12 09:23:12 -05:00
|
|
|
/* The check was aborted and the connection was not yet closed.
|
|
|
|
|
* This can happen upon timeout, or when an external event such
|
|
|
|
|
* as a failed response coupled with "observe layer7" caused the
|
|
|
|
|
* server state to be suddenly changed.
|
|
|
|
|
*/
|
2015-03-12 19:40:28 -04:00
|
|
|
conn_sock_drain(conn);
|
2017-10-05 12:52:17 -04:00
|
|
|
cs_close(cs);
|
2013-02-12 09:23:12 -05:00
|
|
|
}
|
|
|
|
|
|
2017-10-08 05:10:19 -04:00
|
|
|
if (cs) {
|
2019-05-31 13:20:36 -04:00
|
|
|
if (check->wait_list.events)
|
|
|
|
|
cs->conn->xprt->unsubscribe(cs->conn,
|
|
|
|
|
cs->conn->xprt_ctx,
|
|
|
|
|
check->wait_list.events,
|
|
|
|
|
&check->wait_list);
|
|
|
|
|
/* We may have been scheduled to run, and the
|
2019-06-14 08:47:49 -04:00
|
|
|
* I/O handler expects to have a cs, so remove
|
|
|
|
|
* the tasklet
|
|
|
|
|
*/
|
|
|
|
|
tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
|
2017-09-13 12:30:23 -04:00
|
|
|
cs_destroy(cs);
|
|
|
|
|
cs = check->cs = NULL;
|
|
|
|
|
conn = NULL;
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
}
|
|
|
|
|
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server) {
|
|
|
|
|
if (check->result == CHK_RES_FAILED) {
|
|
|
|
|
/* a failure or timeout detected */
|
|
|
|
|
check_notify_failure(check);
|
|
|
|
|
}
|
|
|
|
|
else if (check->result == CHK_RES_CONDPASS) {
|
|
|
|
|
/* check is OK but asks for stopping mode */
|
|
|
|
|
check_notify_stopping(check);
|
|
|
|
|
}
|
|
|
|
|
else if (check->result == CHK_RES_PASSED) {
|
|
|
|
|
/* a success was detected */
|
|
|
|
|
check_notify_success(check);
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
2017-10-20 09:41:18 -04:00
|
|
|
task_set_affinity(t, MAX_THREADS_MASK);
|
2013-12-11 13:41:16 -05:00
|
|
|
check->state &= ~CHK_ST_INPROGRESS;
|
2007-10-14 17:40:01 -04:00
|
|
|
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server) {
|
|
|
|
|
rv = 0;
|
|
|
|
|
if (global.spread_checks > 0) {
|
|
|
|
|
rv = srv_getinter(check) * global.spread_checks / 100;
|
2020-03-08 12:53:53 -04:00
|
|
|
rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
|
2019-01-11 12:43:04 -05:00
|
|
|
}
|
|
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
|
2012-11-23 06:47:05 -05:00
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
2012-11-23 06:47:05 -05:00
|
|
|
|
|
|
|
|
reschedule:
|
|
|
|
|
while (tick_is_expired(t->expire, now_ms))
|
2013-02-23 01:35:38 -05:00
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
|
2017-11-05 04:11:13 -05:00
|
|
|
out_unlock:
|
2019-01-11 12:43:04 -05:00
|
|
|
if (check->server)
|
|
|
|
|
HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
|
2009-03-08 04:38:41 -04:00
|
|
|
return t;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2014-06-19 23:30:16 -04:00
|
|
|
/*
|
|
|
|
|
* manages a server health-check. Returns
|
|
|
|
|
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
|
|
|
|
*/
|
2018-05-25 08:04:04 -04:00
|
|
|
static struct task *process_chk(struct task *t, void *context, unsigned short state)
|
2014-06-19 23:30:16 -04:00
|
|
|
{
|
2018-05-25 08:04:04 -04:00
|
|
|
struct check *check = context;
|
2014-06-19 23:30:16 -04:00
|
|
|
|
|
|
|
|
if (check->type == PR_O2_EXT_CHK)
|
2018-05-25 08:04:04 -04:00
|
|
|
return process_chk_proc(t, context, state);
|
|
|
|
|
return process_chk_conn(t, context, state);
|
2015-04-13 19:15:08 -04:00
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-24 20:46:32 -05:00
|
|
|
static int start_check_task(struct check *check, int mininter,
|
|
|
|
|
int nbcheck, int srvpos)
|
|
|
|
|
{
|
|
|
|
|
struct task *t;
|
2019-09-03 12:55:02 -04:00
|
|
|
unsigned long thread_mask = MAX_THREADS_MASK;
|
|
|
|
|
|
|
|
|
|
if (check->type == PR_O2_EXT_CHK)
|
|
|
|
|
thread_mask = 1;
|
|
|
|
|
|
2013-11-24 20:46:32 -05:00
|
|
|
/* task for the check */
|
2019-09-03 12:55:02 -04:00
|
|
|
if ((t = task_new(thread_mask)) == NULL) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Starting [%s:%s] check: out of memory.\n",
|
|
|
|
|
check->server->proxy->id, check->server->id);
|
2013-11-24 20:46:32 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check->task = t;
|
|
|
|
|
t->process = process_chk;
|
|
|
|
|
t->context = check;
|
|
|
|
|
|
2014-04-25 04:46:47 -04:00
|
|
|
if (mininter < srv_getinter(check))
|
|
|
|
|
mininter = srv_getinter(check);
|
|
|
|
|
|
|
|
|
|
if (global.max_spread_checks && mininter > global.max_spread_checks)
|
|
|
|
|
mininter = global.max_spread_checks;
|
|
|
|
|
|
2013-11-24 20:46:32 -05:00
|
|
|
/* check this every ms */
|
2014-04-25 04:46:47 -04:00
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
|
2013-11-24 20:46:32 -05:00
|
|
|
check->start = now;
|
|
|
|
|
task_queue(t);
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2007-10-14 17:40:01 -04:00
|
|
|
/*
|
|
|
|
|
* Start health-check.
|
2016-12-21 14:04:48 -05:00
|
|
|
* Returns 0 if OK, ERR_FATAL on error, and prints the error in this case.
|
2007-10-14 17:40:01 -04:00
|
|
|
*/
|
2016-12-21 14:04:48 -05:00
|
|
|
static int start_checks()
|
|
|
|
|
{
|
2007-10-14 17:40:01 -04:00
|
|
|
|
|
|
|
|
struct proxy *px;
|
|
|
|
|
struct server *s;
|
|
|
|
|
struct task *t;
|
2013-02-23 01:35:38 -05:00
|
|
|
int nbcheck=0, mininter=0, srvpos=0;
|
2007-10-14 17:40:01 -04:00
|
|
|
|
2020-02-14 11:42:54 -05:00
|
|
|
/* 0- init the dummy frontend used to create all checks sessions */
|
|
|
|
|
init_new_proxy(&checks_fe);
|
|
|
|
|
checks_fe.cap = PR_CAP_FE | PR_CAP_BE;
|
|
|
|
|
checks_fe.mode = PR_MODE_TCP;
|
|
|
|
|
checks_fe.maxconn = 0;
|
|
|
|
|
checks_fe.conn_retries = CONN_RETRIES;
|
|
|
|
|
checks_fe.options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
|
|
|
|
|
checks_fe.timeout.client = TICK_ETERNITY;
|
|
|
|
|
|
2007-10-14 17:05:39 -04:00
|
|
|
/* 1- count the checkers to run simultaneously.
|
|
|
|
|
* We also determine the minimum interval among all of those which
|
|
|
|
|
* have an interval larger than SRV_CHK_INTER_THRES. This interval
|
|
|
|
|
* will be used to spread their start-up date. Those which have
|
2012-08-25 00:18:33 -04:00
|
|
|
* a shorter interval will start independently and will not dictate
|
2007-10-14 17:05:39 -04:00
|
|
|
* too short an interval for all others.
|
|
|
|
|
*/
|
2017-11-24 10:54:05 -05:00
|
|
|
for (px = proxies_list; px; px = px->next) {
|
2007-10-14 17:40:01 -04:00
|
|
|
for (s = px->srv; s; s = s->next) {
|
2013-11-21 05:50:50 -05:00
|
|
|
if (s->slowstart) {
|
2017-09-27 08:59:38 -04:00
|
|
|
if ((t = task_new(MAX_THREADS_MASK)) == NULL) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
|
2016-12-21 14:04:48 -05:00
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2013-11-21 05:50:50 -05:00
|
|
|
}
|
|
|
|
|
/* We need a warmup task that will be called when the server
|
|
|
|
|
* state switches from down to up.
|
|
|
|
|
*/
|
|
|
|
|
s->warmup = t;
|
|
|
|
|
t->process = server_warmup;
|
|
|
|
|
t->context = s;
|
2015-09-17 16:53:59 -04:00
|
|
|
/* server can be in this state only because of */
|
2017-08-31 08:41:55 -04:00
|
|
|
if (s->next_state == SRV_ST_STARTING)
|
2015-09-17 16:53:59 -04:00
|
|
|
task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, (now.tv_sec - s->last_change)) / 20)));
|
2013-11-21 05:50:50 -05:00
|
|
|
}
|
|
|
|
|
|
2013-12-11 15:10:14 -05:00
|
|
|
if (s->check.state & CHK_ST_CONFIGURED) {
|
|
|
|
|
nbcheck++;
|
|
|
|
|
if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
|
|
|
|
|
(!mininter || mininter > srv_getinter(&s->check)))
|
|
|
|
|
mininter = srv_getinter(&s->check);
|
|
|
|
|
}
|
2013-12-11 14:41:18 -05:00
|
|
|
|
2013-12-11 15:10:14 -05:00
|
|
|
if (s->agent.state & CHK_ST_CONFIGURED) {
|
|
|
|
|
nbcheck++;
|
|
|
|
|
if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
|
|
|
|
|
(!mininter || mininter > srv_getinter(&s->agent)))
|
|
|
|
|
mininter = srv_getinter(&s->agent);
|
|
|
|
|
}
|
2007-10-14 17:40:01 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
if (!nbcheck)
|
2007-10-14 17:40:01 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
srand((unsigned)time(NULL));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* 2- start them as far as possible from each others. For this, we will
|
|
|
|
|
* start them after their interval set to the min interval divided by
|
|
|
|
|
* the number of servers, weighted by the server's position in the list.
|
|
|
|
|
*/
|
2017-11-24 10:54:05 -05:00
|
|
|
for (px = proxies_list; px; px = px->next) {
|
2014-06-19 23:30:16 -04:00
|
|
|
if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
|
|
|
|
|
if (init_pid_list()) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Starting [%s] check: out of memory.\n", px->id);
|
2016-12-21 14:04:48 -05:00
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2014-06-19 23:30:16 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2007-10-14 17:40:01 -04:00
|
|
|
for (s = px->srv; s; s = s->next) {
|
2013-11-24 20:46:36 -05:00
|
|
|
/* A task for the main check */
|
2013-12-11 14:36:34 -05:00
|
|
|
if (s->check.state & CHK_ST_CONFIGURED) {
|
2014-08-06 19:55:38 -04:00
|
|
|
if (s->check.type == PR_O2_EXT_CHK) {
|
|
|
|
|
if (!prepare_external_check(&s->check))
|
2016-12-21 14:04:48 -05:00
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2014-08-06 19:55:38 -04:00
|
|
|
}
|
2013-11-24 20:46:36 -05:00
|
|
|
if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
|
2016-12-21 14:04:48 -05:00
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2013-11-24 20:46:36 -05:00
|
|
|
srvpos++;
|
|
|
|
|
}
|
2007-10-14 17:40:01 -04:00
|
|
|
|
2013-11-24 20:46:36 -05:00
|
|
|
/* A task for a auxiliary agent check */
|
2013-12-11 14:36:34 -05:00
|
|
|
if (s->agent.state & CHK_ST_CONFIGURED) {
|
2013-11-24 20:46:36 -05:00
|
|
|
if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
|
2016-12-21 14:04:48 -05:00
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2013-11-24 20:46:36 -05:00
|
|
|
}
|
|
|
|
|
srvpos++;
|
|
|
|
|
}
|
2007-10-14 17:40:01 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
/*
|
2012-09-28 09:01:02 -04:00
|
|
|
* Perform content verification check on data in s->check.buffer buffer.
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
* The buffer MUST be terminated by a null byte before calling this function.
|
|
|
|
|
* Sets server status appropriately. The caller is responsible for ensuring
|
|
|
|
|
* that the buffer contains at least 13 characters. If <done> is zero, we may
|
|
|
|
|
* return 0 to indicate that data is required to decide of a match.
|
|
|
|
|
*/
|
|
|
|
|
static int httpchk_expect(struct server *s, int done)
|
|
|
|
|
{
|
2017-10-29 15:14:08 -04:00
|
|
|
static THREAD_LOCAL char status_msg[] = "HTTP status check returned code <000>";
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
char status_code[] = "000";
|
|
|
|
|
char *contentptr;
|
|
|
|
|
int crlf;
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
switch (s->proxy->options2 & PR_O2_EXP_TYPE) {
|
|
|
|
|
case PR_O2_EXP_STS:
|
|
|
|
|
case PR_O2_EXP_RSTS:
|
2018-07-10 11:43:27 -04:00
|
|
|
memcpy(status_code, b_head(&s->check.bi) + 9, 3);
|
|
|
|
|
memcpy(status_msg + strlen(status_msg) - 4, b_head(&s->check.bi) + 9, 3);
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
|
|
|
|
|
if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STS)
|
|
|
|
|
ret = strncmp(s->proxy->expect_str, status_code, 3) == 0;
|
|
|
|
|
else
|
2014-06-18 05:35:54 -04:00
|
|
|
ret = regex_exec(s->proxy->expect_regex, status_code);
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
|
|
|
|
|
/* we necessarily have the response, so there are no partial failures */
|
|
|
|
|
if (s->proxy->options2 & PR_O2_EXP_INV)
|
|
|
|
|
ret = !ret;
|
|
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, ret ? HCHK_STATUS_L7OKD : HCHK_STATUS_L7STS, status_msg);
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PR_O2_EXP_STR:
|
|
|
|
|
case PR_O2_EXP_RSTR:
|
|
|
|
|
/* very simple response parser: ignore CR and only count consecutive LFs,
|
|
|
|
|
* stop with contentptr pointing to first char after the double CRLF or
|
|
|
|
|
* to '\0' if crlf < 2.
|
|
|
|
|
*/
|
|
|
|
|
crlf = 0;
|
2018-07-10 11:43:27 -04:00
|
|
|
for (contentptr = b_head(&s->check.bi); *contentptr; contentptr++) {
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
if (crlf >= 2)
|
|
|
|
|
break;
|
|
|
|
|
if (*contentptr == '\r')
|
|
|
|
|
continue;
|
|
|
|
|
else if (*contentptr == '\n')
|
|
|
|
|
crlf++;
|
|
|
|
|
else
|
|
|
|
|
crlf = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check that response contains a body... */
|
|
|
|
|
if (crlf < 2) {
|
|
|
|
|
if (!done)
|
|
|
|
|
return 0;
|
|
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
"HTTP content check could not find a response body");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check that response body is not empty... */
|
|
|
|
|
if (*contentptr == '\0') {
|
2011-04-13 03:32:41 -04:00
|
|
|
if (!done)
|
|
|
|
|
return 0;
|
|
|
|
|
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
"HTTP content check found empty response body");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check the response content against the supplied string
|
|
|
|
|
* or regex... */
|
|
|
|
|
if ((s->proxy->options2 & PR_O2_EXP_TYPE) == PR_O2_EXP_STR)
|
|
|
|
|
ret = strstr(contentptr, s->proxy->expect_str) != NULL;
|
|
|
|
|
else
|
2014-06-18 05:35:54 -04:00
|
|
|
ret = regex_exec(s->proxy->expect_regex, contentptr);
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
|
|
|
|
|
/* if we don't match, we may need to wait more */
|
|
|
|
|
if (!ret && !done)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
|
/* content matched */
|
|
|
|
|
if (s->proxy->options2 & PR_O2_EXP_INV)
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
"HTTP check matched unwanted content");
|
|
|
|
|
else
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
"HTTP content check matched");
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (s->proxy->options2 & PR_O2_EXP_INV)
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_L7OKD,
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
"HTTP check did not match unwanted content");
|
|
|
|
|
else
|
2013-02-23 01:35:38 -05:00
|
|
|
set_server_check_status(&s->check, HCHK_STATUS_L7RSP,
|
[MEDIUM] checks: add support for HTTP contents lookup
This patch adds the "http-check expect [r]{string,status}" statements
which enable health checks based on whether the response status or body
to an HTTP request contains a string or matches a regex.
This probably is one of the oldest patches that remained unmerged. Over
the time, several people have contributed to it, among which FinalBSD
(first and second implementations), Nick Chalk (port to 1.4), Anze
Skerlavaj (tests and fixes), Cyril Bont (general fixes), and of course
myself for the final fixes and doc during integration.
Some people already use an old version of this patch which has several
issues, among which the inability to search for a plain string that is
not at the beginning of the data, and the inability to look for response
contents that are provided in a second and subsequent recv() calls. But
since some configs are already deployed, it was quite important to ensure
a 100% compatible behaviour on the working cases.
Thus, that patch fixes the issues while maintaining config compatibility
with already deployed versions.
(cherry picked from commit b507c43a3ce9a8e8e4b770e52e4edc20cba4c37f)
2010-03-16 13:46:54 -04:00
|
|
|
"HTTP content check did not match");
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-06 17:24:13 -04:00
|
|
|
/*
|
|
|
|
|
* return the id of a step in a send/expect session
|
|
|
|
|
*/
|
2020-03-30 05:05:10 -04:00
|
|
|
static int tcpcheck_get_step_id(struct check *check, struct tcpcheck_rule *rule)
|
2013-10-06 17:24:13 -04:00
|
|
|
{
|
2020-03-30 05:05:10 -04:00
|
|
|
if (!rule)
|
|
|
|
|
rule = check->current_step;
|
2014-10-02 08:51:02 -04:00
|
|
|
|
2020-03-24 08:31:19 -04:00
|
|
|
/* no last started step => first step */
|
2020-03-30 05:05:10 -04:00
|
|
|
if (!rule)
|
2013-10-06 17:24:13 -04:00
|
|
|
return 1;
|
|
|
|
|
|
2020-03-26 12:38:49 -04:00
|
|
|
/* last step is the first implicit connect */
|
2020-03-30 05:05:10 -04:00
|
|
|
if (rule->index == 0 &&
|
|
|
|
|
rule->action == TCPCHK_ACT_CONNECT &&
|
|
|
|
|
(rule->connect.options & TCPCHK_OPT_DEFAULT_CONNECT))
|
2020-03-26 12:38:49 -04:00
|
|
|
return 0;
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
return rule->index + 1;
|
2013-10-06 17:24:13 -04:00
|
|
|
}
|
|
|
|
|
|
2015-05-01 02:03:04 -04:00
|
|
|
/*
|
2020-03-24 08:31:19 -04:00
|
|
|
* return the latest known comment for the current rule, the comment attached to
|
|
|
|
|
* it or the COMMENT rule immediately preceedding the expect rule chain, if any.
|
|
|
|
|
* returns NULL if no comment found.
|
2015-05-01 02:03:04 -04:00
|
|
|
*/
|
2020-03-30 05:05:10 -04:00
|
|
|
static char *tcpcheck_get_step_comment(struct check *check, struct tcpcheck_rule *rule)
|
2015-05-01 02:03:04 -04:00
|
|
|
{
|
2020-03-24 08:31:19 -04:00
|
|
|
struct tcpcheck_rule *cur;
|
2015-05-01 02:03:04 -04:00
|
|
|
char *ret = NULL;
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
if (!rule)
|
|
|
|
|
rule = check->current_step;
|
2015-05-01 02:03:04 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
if (rule->comment) {
|
|
|
|
|
ret = rule->comment;
|
2020-03-24 08:31:19 -04:00
|
|
|
goto return_comment;
|
|
|
|
|
}
|
2015-05-01 02:03:04 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
rule = LIST_PREV(&rule->list, typeof(cur), list);
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry_from_rev(rule, check->tcpcheck_rules->list, list) {
|
2020-03-30 05:05:10 -04:00
|
|
|
if (rule->action == TCPCHK_ACT_COMMENT) {
|
|
|
|
|
ret = rule->comment;
|
2020-03-24 08:31:19 -04:00
|
|
|
break;
|
|
|
|
|
}
|
2020-03-30 05:05:10 -04:00
|
|
|
else if (rule->action != TCPCHK_ACT_EXPECT)
|
2020-03-24 08:31:19 -04:00
|
|
|
break;
|
2015-05-01 02:03:04 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return_comment:
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
enum tcpcheck_eval_ret {
|
|
|
|
|
TCPCHK_EVAL_WAIT = 0,
|
|
|
|
|
TCPCHK_EVAL_STOP,
|
|
|
|
|
TCPCHK_EVAL_CONTINUE,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Evaluate a TCPCHK_ACT_CONNECT rule. It returns 1 to evaluate the next rule, 0
|
|
|
|
|
* to wait and -1 to stop the check. */
|
|
|
|
|
static enum tcpcheck_eval_ret tcpcheck_eval_connect(struct check *check, struct tcpcheck_rule *rule)
|
2013-10-06 17:24:13 -04:00
|
|
|
{
|
2020-03-30 05:05:10 -04:00
|
|
|
enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
|
|
|
|
|
struct tcpcheck_connect *connect = &rule->connect;
|
2019-01-11 12:17:17 -05:00
|
|
|
struct proxy *proxy = check->proxy;
|
2020-03-30 05:05:10 -04:00
|
|
|
struct server *s = check->server;
|
2013-10-06 17:24:13 -04:00
|
|
|
struct task *t = check->task;
|
2020-03-30 05:05:10 -04:00
|
|
|
struct conn_stream *cs;
|
|
|
|
|
struct connection *conn = NULL;
|
|
|
|
|
struct protocol *proto;
|
|
|
|
|
struct xprt_ops *xprt;
|
2020-02-07 09:37:17 -05:00
|
|
|
char *comment;
|
2020-03-31 02:15:58 -04:00
|
|
|
int status, port;
|
2014-10-02 08:30:14 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* For a connect action we'll create a new connection. We may also have
|
|
|
|
|
* to kill a previous one. But we don't want to leave *without* a
|
|
|
|
|
* connection if we came here from the connection layer, hence with a
|
|
|
|
|
* connection. Thus we'll proceed in the following order :
|
|
|
|
|
* 1: close but not release previous connection (handled by the caller)
|
|
|
|
|
* 2: try to get a new connection
|
|
|
|
|
* 3: release and replace the old one on success
|
2013-12-10 18:52:19 -05:00
|
|
|
*/
|
2015-05-13 09:39:48 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* 2- prepare new connection */
|
|
|
|
|
cs = cs_new(NULL);
|
|
|
|
|
if (!cs) {
|
|
|
|
|
chunk_printf(&trash, "TCPCHK error allocating connection at step %d",
|
|
|
|
|
tcpcheck_get_step_id(check, rule));
|
|
|
|
|
comment = tcpcheck_get_step_comment(check, rule);
|
|
|
|
|
if (comment)
|
|
|
|
|
chunk_appendf(&trash, " comment: '%s'", comment);
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
2017-11-28 04:06:29 -05:00
|
|
|
goto out;
|
2013-10-06 17:24:13 -04:00
|
|
|
}
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* 3- release and replace the old one on success */
|
|
|
|
|
if (check->cs) {
|
|
|
|
|
if (check->wait_list.events)
|
|
|
|
|
cs->conn->xprt->unsubscribe(cs->conn, cs->conn->xprt_ctx,
|
|
|
|
|
check->wait_list.events, &check->wait_list);
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* We may have been scheduled to run, and the I/O handler
|
|
|
|
|
* expects to have a cs, so remove the tasklet
|
2013-12-06 10:54:31 -05:00
|
|
|
*/
|
2020-03-30 05:05:10 -04:00
|
|
|
tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
|
|
|
|
|
cs_destroy(check->cs);
|
|
|
|
|
}
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
tasklet_set_tid(check->wait_list.tasklet, tid);
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
check->cs = cs;
|
|
|
|
|
conn = cs->conn;
|
MEDIUM: checks: do not allocate a permanent connection anymore
Health check currently cheat, they allocate a connection upon startup and never
release it, it's only recycled. The problem with doing this is that this code
is preventing the connection code from evolving towards multiplexing.
This code ensures that it's safe for the checks to run without a connection
all the time. Given that the code heavily relies on CO_FL_ERROR to signal
check errors, it is not trivial but in practice this is the principle adopted
here :
- the connection is not allocated anymore on startup
- new checks are not supposed to have a connection, so an attempt is made
to allocate this connection in the check task's context. If it fails,
the check is aborted on a resource error, and the rare code on this path
verifying the connection was adjusted to check for its existence (in
practice, avoid to close it)
- returning checks necessarily have a valid connection (which may possibly
be closed).
- a "tcp-check connect" rule tries to allocate a new connection before
releasing the previous one (but after closing it), so that if it fails,
it still keeps the previous connection in a closed state. This ensures
a connection is always valid here
Now it works well on all tested cases (regular and TCP checks, even with
multiple reconnections), including when the connection is forced to NULL or
randomly allocated.
2017-10-04 12:05:01 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Maybe there were an older connection we were waiting on */
|
|
|
|
|
check->wait_list.events = 0;
|
|
|
|
|
conn->target = s ? &s->obj_type : &proxy->obj_type;
|
2019-09-20 11:18:35 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* no client address */
|
|
|
|
|
if (!sockaddr_alloc(&conn->dst)) {
|
|
|
|
|
status = SF_ERR_RESOURCE;
|
|
|
|
|
goto fail_check;
|
|
|
|
|
}
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-31 02:15:58 -04:00
|
|
|
/* connect to the connect rule addr if specified, otherwise the check
|
|
|
|
|
* addr if specified on the server. otherwise, use the server addr
|
2020-03-30 05:05:10 -04:00
|
|
|
*/
|
2020-03-31 02:15:58 -04:00
|
|
|
*conn->dst = (is_addr(&connect->addr)
|
|
|
|
|
? connect->addr
|
|
|
|
|
: (is_addr(&check->addr) ? check->addr : s->addr));
|
2020-03-30 05:05:10 -04:00
|
|
|
proto = protocol_by_family(conn->dst->ss_family);
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-31 02:15:58 -04:00
|
|
|
port = 0;
|
|
|
|
|
if (!port && connect->port)
|
|
|
|
|
port = connect->port;
|
2020-03-30 09:19:03 -04:00
|
|
|
if (!port && connect->port_expr) {
|
|
|
|
|
struct sample *smp;
|
|
|
|
|
|
|
|
|
|
smp = sample_fetch_as_type(check->proxy, check->sess, NULL,
|
|
|
|
|
SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
|
|
|
|
|
connect->port_expr, SMP_T_SINT);
|
|
|
|
|
if (smp)
|
|
|
|
|
port = smp->data.u.sint;
|
|
|
|
|
}
|
2020-03-31 02:15:58 -04:00
|
|
|
if (!port && is_inet_addr(&connect->addr))
|
|
|
|
|
port = get_host_port(&connect->addr);
|
|
|
|
|
if (!port && check->port)
|
|
|
|
|
port = check->port;
|
|
|
|
|
if (!port && is_inet_addr(&check->addr))
|
|
|
|
|
port = get_host_port(&check->addr);
|
|
|
|
|
if (!port)
|
|
|
|
|
port = s->svc_port;
|
|
|
|
|
set_host_port(conn->dst, port);
|
2019-07-17 13:04:47 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
xprt = ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT)
|
|
|
|
|
? check->xprt
|
|
|
|
|
: ((connect->options & TCPCHK_OPT_SSL) ? xprt_get(XPRT_SSL) : xprt_get(XPRT_RAW)));
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
conn_prepare(conn, proto, xprt);
|
2020-02-14 11:42:54 -05:00
|
|
|
if (conn_install_mux(conn, &mux_pt_ops, cs, proxy, check->sess) < 0) {
|
2020-03-30 05:05:10 -04:00
|
|
|
status = SF_ERR_RESOURCE;
|
|
|
|
|
goto fail_check;
|
|
|
|
|
}
|
|
|
|
|
cs_attach(cs, check, &check_conn_cb);
|
2020-03-26 12:38:49 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
status = SF_ERR_INTERNAL;
|
|
|
|
|
if (proto && proto->connect) {
|
|
|
|
|
struct tcpcheck_rule *next;
|
|
|
|
|
int flags = CONNECT_HAS_DATA;
|
2018-09-06 05:45:30 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
next = get_next_tcpcheck_rule(check->tcpcheck_rules, rule);
|
|
|
|
|
if (!next || next->action != TCPCHK_ACT_EXPECT)
|
|
|
|
|
flags |= CONNECT_DELACK_ALWAYS;
|
|
|
|
|
status = proto->connect(conn, flags);
|
|
|
|
|
}
|
2019-07-17 12:48:07 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
if (connect->options & TCPCHK_OPT_DEFAULT_CONNECT) {
|
|
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
|
if (status == SF_ERR_NONE) {
|
|
|
|
|
if (s->check.sni)
|
|
|
|
|
ssl_sock_set_servername(conn, s->check.sni);
|
|
|
|
|
if (s->check.alpn_str)
|
|
|
|
|
ssl_sock_set_alpn(conn, (unsigned char *)s->check.alpn_str,
|
|
|
|
|
s->check.alpn_len);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
if (s->check.via_socks4 && (s->flags & SRV_F_SOCKS4_PROXY)) {
|
|
|
|
|
conn->send_proxy_ofs = 1;
|
|
|
|
|
conn->flags |= CO_FL_SOCKS4;
|
|
|
|
|
}
|
|
|
|
|
if (s->check.send_proxy && !(check->state & CHK_ST_AGENT)) {
|
|
|
|
|
conn->send_proxy_ofs = 1;
|
|
|
|
|
conn->flags |= CO_FL_SEND_PROXY;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
2020-03-30 07:00:05 -04:00
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
|
if (status == SF_ERR_NONE) {
|
|
|
|
|
if (connect->sni)
|
|
|
|
|
ssl_sock_set_servername(conn, connect->sni);
|
2020-03-30 07:16:44 -04:00
|
|
|
if (connect->alpn)
|
|
|
|
|
ssl_sock_set_alpn(conn, (unsigned char *)connect->alpn,
|
|
|
|
|
connect->alpn_len);
|
2020-03-30 07:00:05 -04:00
|
|
|
}
|
|
|
|
|
#endif
|
2020-03-30 07:07:02 -04:00
|
|
|
if ((connect->options & TCPCHK_OPT_SOCKS4) && (s->flags & SRV_F_SOCKS4_PROXY)) {
|
|
|
|
|
conn->send_proxy_ofs = 1;
|
|
|
|
|
conn->flags |= CO_FL_SOCKS4;
|
|
|
|
|
}
|
2020-03-30 05:05:10 -04:00
|
|
|
if (connect->options & TCPCHK_OPT_SEND_PROXY) {
|
|
|
|
|
conn->send_proxy_ofs = 1;
|
|
|
|
|
conn->flags |= CO_FL_SEND_PROXY;
|
|
|
|
|
}
|
|
|
|
|
if (conn_ctrl_ready(conn) && (connect->options & TCPCHK_OPT_LINGER)) {
|
|
|
|
|
/* Some servers don't like reset on close */
|
|
|
|
|
fdtab[cs->conn->handle.fd].linger_risk = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
if (conn_ctrl_ready(conn) && (conn->flags & (CO_FL_SEND_PROXY | CO_FL_SOCKS4))) {
|
|
|
|
|
if (xprt_add_hs(conn) < 0)
|
|
|
|
|
status = SF_ERR_RESOURCE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fail_check:
|
|
|
|
|
/* It can return one of :
|
|
|
|
|
* - SF_ERR_NONE if everything's OK
|
|
|
|
|
* - SF_ERR_SRVTO if there are no more servers
|
|
|
|
|
* - SF_ERR_SRVCL if the connection was refused by the server
|
|
|
|
|
* - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
|
|
|
|
|
* - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
|
|
|
|
|
* - SF_ERR_INTERNAL for any other purely internal errors
|
|
|
|
|
* Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
|
|
|
|
|
* Note that we try to prevent the network stack from sending the ACK during the
|
|
|
|
|
* connect() when a pure TCP check is used (without PROXY protocol).
|
|
|
|
|
*/
|
|
|
|
|
switch (status) {
|
|
|
|
|
case SF_ERR_NONE:
|
|
|
|
|
/* we allow up to min(inter, timeout.connect) for a connection
|
|
|
|
|
* to establish but only when timeout.check is set as it may be
|
|
|
|
|
* to short for a full check otherwise
|
|
|
|
|
*/
|
|
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
|
2020-02-28 05:04:21 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
if (proxy->timeout.check && proxy->timeout.connect) {
|
|
|
|
|
int t_con = tick_add(now_ms, proxy->timeout.connect);
|
|
|
|
|
t->expire = tick_first(t->expire, t_con);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case SF_ERR_SRVTO: /* ETIMEDOUT */
|
|
|
|
|
case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
|
|
|
|
|
chunk_printf(&trash, "TCPCHK error establishing connection at step %d: %s",
|
|
|
|
|
tcpcheck_get_step_id(check, rule), strerror(errno));
|
|
|
|
|
comment = tcpcheck_get_step_comment(check, rule);
|
|
|
|
|
if (comment)
|
|
|
|
|
chunk_appendf(&trash, " comment: '%s'", comment);
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L4CON, trash.area);
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
case SF_ERR_PRXCOND:
|
|
|
|
|
case SF_ERR_RESOURCE:
|
|
|
|
|
case SF_ERR_INTERNAL:
|
|
|
|
|
chunk_printf(&trash, "TCPCHK error establishing connection at step %d",
|
|
|
|
|
tcpcheck_get_step_id(check, rule));
|
|
|
|
|
comment = tcpcheck_get_step_comment(check, rule);
|
|
|
|
|
if (comment)
|
|
|
|
|
chunk_appendf(&trash, " comment: '%s'", comment);
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2020-03-26 12:38:49 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* don't do anything until the connection is established */
|
|
|
|
|
if (conn->flags & CO_FL_WAIT_XPRT) {
|
|
|
|
|
ret = TCPCHK_EVAL_WAIT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2020-03-26 12:38:49 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
out:
|
|
|
|
|
if (conn && check->result == CHK_RES_FAILED)
|
|
|
|
|
conn->flags |= CO_FL_ERROR;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Evaluate a TCPCHK_ACT_SEND rule. It returns 1 to evaluate the next rule, 0
|
|
|
|
|
* to wait and -1 to stop the check. */
|
|
|
|
|
static enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_rule *rule)
|
|
|
|
|
{
|
|
|
|
|
enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
|
|
|
|
|
struct tcpcheck_send *send = &rule->send;
|
|
|
|
|
struct conn_stream *cs = check->cs;
|
|
|
|
|
struct connection *conn = cs_conn(cs);
|
2020-03-30 13:52:29 -04:00
|
|
|
struct buffer *tmp = NULL;
|
2020-02-07 09:37:17 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* reset the read & write buffer */
|
|
|
|
|
b_reset(&check->bi);
|
|
|
|
|
b_reset(&check->bo);
|
|
|
|
|
|
|
|
|
|
switch (send->type) {
|
|
|
|
|
case TCPCHK_SEND_STRING:
|
|
|
|
|
case TCPCHK_SEND_BINARY:
|
2020-03-30 13:52:29 -04:00
|
|
|
if (istlen(send->data) >= b_size(&check->bo)) {
|
|
|
|
|
chunk_printf(&trash, "tcp-check send : string too large (%u) for buffer size (%u) at step %d",
|
|
|
|
|
(unsigned int)istlen(send->data), (unsigned int)b_size(&check->bo),
|
|
|
|
|
tcpcheck_get_step_id(check, rule));
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
b_putist(&check->bo, send->data);
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_STRING_LF:
|
|
|
|
|
check->bo.data = sess_build_logline(check->sess, NULL, b_orig(&check->bo), b_size(&check->bo), &rule->send.fmt);
|
|
|
|
|
if (!b_data(&check->bo))
|
|
|
|
|
goto out;
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_BINARY_LF:
|
|
|
|
|
tmp = alloc_trash_chunk();
|
|
|
|
|
if (!tmp)
|
|
|
|
|
goto error_lf;
|
|
|
|
|
tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &rule->send.fmt);
|
|
|
|
|
if (!b_data(tmp))
|
|
|
|
|
goto out;
|
|
|
|
|
tmp->area[tmp->data] = '\0';
|
|
|
|
|
b_set_data(&check->bo, b_size(&check->bo));
|
|
|
|
|
if (parse_binary(b_orig(tmp), &check->bo.area, (int *)&check->bo.data, NULL) == 0)
|
|
|
|
|
goto error_lf;
|
2020-03-30 05:05:10 -04:00
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_UNDEF:
|
|
|
|
|
/* Should never happen. */
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
};
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
if (conn->mux->snd_buf(cs, &check->bo, b_data(&check->bo), 0) <= 0) {
|
|
|
|
|
ret = TCPCHK_EVAL_WAIT;
|
|
|
|
|
if ((conn->flags & CO_FL_ERROR) || (cs->flags & CS_FL_ERROR))
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
if (b_data(&check->bo)) {
|
|
|
|
|
cs->conn->mux->subscribe(cs, SUB_RETRY_SEND, &check->wait_list);
|
|
|
|
|
ret = TCPCHK_EVAL_WAIT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2015-05-13 06:08:21 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
out:
|
2020-03-30 13:52:29 -04:00
|
|
|
free_trash_chunk(tmp);
|
2020-03-30 05:05:10 -04:00
|
|
|
return ret;
|
2020-03-30 13:52:29 -04:00
|
|
|
|
|
|
|
|
error_lf:
|
|
|
|
|
chunk_printf(&trash, "tcp-check send : failed to build log-format string at step %d",
|
|
|
|
|
tcpcheck_get_step_id(check, rule));
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
}
|
2019-07-15 04:57:51 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Evaluate a TCPCHK_ACT_EXPECT rule. It returns 1 to evaluate the next rule, 0
|
2020-02-24 11:34:11 -05:00
|
|
|
* to wait and -1 to stop the check.
|
2020-03-30 05:05:10 -04:00
|
|
|
*/
|
|
|
|
|
static enum tcpcheck_eval_ret tcpcheck_eval_expect(struct check *check, struct tcpcheck_rule *rule, int last_read)
|
|
|
|
|
{
|
|
|
|
|
enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
|
|
|
|
|
struct tcpcheck_expect *expect = &check->current_step->expect;
|
|
|
|
|
char *comment, *diag;
|
|
|
|
|
int match;
|
2020-02-21 12:41:28 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* The current expect might need more data than the previous one, check again
|
|
|
|
|
* that the minimum amount data required to match is respected.
|
|
|
|
|
*/
|
|
|
|
|
if (!last_read) {
|
|
|
|
|
if ((expect->type == TCPCHK_EXPECT_STRING || expect->type == TCPCHK_EXPECT_BINARY) &&
|
|
|
|
|
(b_data(&check->bi) < expect->length)) {
|
|
|
|
|
ret = TCPCHK_EVAL_WAIT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
if (expect->min_recv > 0 && (b_data(&check->bi) < expect->min_recv)) {
|
|
|
|
|
ret = TCPCHK_EVAL_WAIT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Make GCC happy ; initialize match to a failure state. */
|
|
|
|
|
match = expect->inverse;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
switch (expect->type) {
|
|
|
|
|
case TCPCHK_EXPECT_STRING:
|
|
|
|
|
case TCPCHK_EXPECT_BINARY:
|
|
|
|
|
match = my_memmem(b_head(&check->bi), b_data(&check->bi), expect->string, expect->length) != NULL;
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_REGEX:
|
|
|
|
|
if (expect->with_capture)
|
|
|
|
|
match = regex_exec_match2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1),
|
|
|
|
|
MAX_MATCH, pmatch, 0);
|
|
|
|
|
else
|
|
|
|
|
match = regex_exec2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1));
|
|
|
|
|
break;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
case TCPCHK_EXPECT_REGEX_BINARY:
|
|
|
|
|
chunk_reset(&trash);
|
|
|
|
|
dump_binary(&trash, b_head(&check->bi), b_data(&check->bi));
|
|
|
|
|
if (expect->with_capture)
|
|
|
|
|
match = regex_exec_match2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1),
|
|
|
|
|
MAX_MATCH, pmatch, 0);
|
|
|
|
|
else
|
|
|
|
|
match = regex_exec2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1));
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_UNDEF:
|
|
|
|
|
/* Should never happen. */
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2013-12-06 10:54:31 -05:00
|
|
|
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Wait for more data on mismatch only if no minimum is defined (-1),
|
|
|
|
|
* otherwise the absence of match is already conclusive.
|
|
|
|
|
*/
|
|
|
|
|
if (!match && !last_read && (expect->min_recv == -1)) {
|
|
|
|
|
ret = TCPCHK_EVAL_WAIT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Result as expected, next rule. */
|
|
|
|
|
if (match ^ expect->inverse)
|
|
|
|
|
goto out;
|
2020-02-26 10:19:40 -05:00
|
|
|
|
2013-12-10 18:52:19 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* From this point on, we matched something we did not want, this is an error state. */
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
diag = match ? "matched unwanted content" : "did not match content";
|
|
|
|
|
switch (expect->type) {
|
|
|
|
|
case TCPCHK_EXPECT_STRING:
|
|
|
|
|
chunk_printf(&trash, "TCPCHK %s '%s' at step %d",
|
|
|
|
|
diag, expect->string, tcpcheck_get_step_id(check, rule));
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_BINARY:
|
|
|
|
|
chunk_printf(&trash, "TCPCHK %s (binary) at step %d",
|
|
|
|
|
diag, tcpcheck_get_step_id(check, rule));
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_REGEX:
|
|
|
|
|
chunk_printf(&trash, "TCPCHK %s (regex) at step %d",
|
|
|
|
|
diag, tcpcheck_get_step_id(check, rule));
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_REGEX_BINARY:
|
|
|
|
|
chunk_printf(&trash, "TCPCHK %s (binary regex) at step %d",
|
|
|
|
|
diag, tcpcheck_get_step_id(check, rule));
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* If references to the matched text were made, divide the
|
|
|
|
|
* offsets by 2 to match offset of the original response buffer.
|
|
|
|
|
*/
|
|
|
|
|
if (expect->with_capture) {
|
|
|
|
|
int i;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
for (i = 1; i < MAX_MATCH && pmatch[i].rm_so != -1; i++) {
|
|
|
|
|
pmatch[i].rm_so /= 2; /* at first matched char. */
|
|
|
|
|
pmatch[i].rm_eo /= 2; /* at last matched char. */
|
2013-10-06 17:24:13 -04:00
|
|
|
}
|
2020-03-30 05:05:10 -04:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_UNDEF:
|
|
|
|
|
/* Should never happen. */
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
comment = tcpcheck_get_step_comment(check, rule);
|
|
|
|
|
if (comment) {
|
|
|
|
|
if (expect->with_capture) {
|
|
|
|
|
ret = exp_replace(b_tail(&trash), b_room(&trash), b_head(&check->bi), comment, pmatch);
|
|
|
|
|
if (ret > 0) /* ignore comment if too large */
|
|
|
|
|
trash.data += ret;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
chunk_appendf(&trash, " comment: '%s'", comment);
|
|
|
|
|
}
|
2020-04-01 05:04:52 -04:00
|
|
|
set_server_check_status(check, expect->err_status, trash.area);
|
2020-03-30 05:05:10 -04:00
|
|
|
ret = TCPCHK_EVAL_STOP;
|
2020-02-07 09:37:17 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
out:
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2020-02-26 09:59:22 -05:00
|
|
|
|
2020-02-24 11:34:11 -05:00
|
|
|
/* Evaluate a TCPCHK_ACT_ACTION_KW rule. It returns 1 to evaluate the next rule, 0
|
|
|
|
|
* to wait and -1 to stop the check.
|
|
|
|
|
*/
|
|
|
|
|
static enum tcpcheck_eval_ret tcpcheck_eval_action_kw(struct check *check, struct tcpcheck_rule *rule)
|
|
|
|
|
{
|
|
|
|
|
enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
|
|
|
|
|
struct act_rule *act_rule;
|
|
|
|
|
enum act_return act_ret;
|
|
|
|
|
|
|
|
|
|
act_rule =rule->action_kw.rule;
|
|
|
|
|
act_ret = act_rule->action_ptr(act_rule, check->proxy, check->sess, NULL, 0);
|
|
|
|
|
if (act_ret != ACT_RET_CONT) {
|
|
|
|
|
chunk_printf(&trash, "TCPCHK ACTION unexpected result at step %d\n",
|
|
|
|
|
tcpcheck_get_step_id(check, rule));
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
|
|
|
|
|
ret = TCPCHK_EVAL_STOP;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* proceed with next steps for the TCP checks <check>. Note that this is called
|
|
|
|
|
* both from the connection's wake() callback and from the check scheduling task.
|
|
|
|
|
* It returns 0 on normal cases, or <0 if a close() has happened on an existing
|
|
|
|
|
* connection, presenting the risk of an fd replacement.
|
|
|
|
|
*
|
|
|
|
|
* Please do NOT place any return statement in this function and only leave
|
|
|
|
|
* via the out_end_tcpcheck label after setting retcode.
|
|
|
|
|
*/
|
|
|
|
|
static int tcpcheck_main(struct check *check)
|
|
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *rule;
|
|
|
|
|
struct conn_stream *cs = check->cs;
|
|
|
|
|
struct connection *conn = cs_conn(cs);
|
|
|
|
|
int must_read = 1, last_read = 0;
|
|
|
|
|
int ret, retcode = 0;
|
2020-02-07 09:37:17 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* here, we know that the check is complete or that it failed */
|
|
|
|
|
if (check->result != CHK_RES_UNKNOWN)
|
|
|
|
|
goto out_end_tcpcheck;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* 1- check for connection error, if any */
|
|
|
|
|
if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR))
|
|
|
|
|
goto out_end_tcpcheck;
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* 2- check if we are waiting for the connection establishment. It only
|
|
|
|
|
* happens during TCPCHK_ACT_CONNECT. */
|
|
|
|
|
if (conn && (conn->flags & CO_FL_WAIT_XPRT))
|
|
|
|
|
goto out;
|
2020-02-26 09:59:22 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* 3- check for pending outgoing data. It only happens during TCPCHK_ACT_SEND. */
|
|
|
|
|
if (conn && b_data(&check->bo)) {
|
|
|
|
|
ret = conn->mux->snd_buf(cs, &check->bo, b_data(&check->bo), 0);
|
|
|
|
|
if (ret <= 0) {
|
|
|
|
|
if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR))
|
|
|
|
|
goto out_end_tcpcheck;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
if (b_data(&check->bo)) {
|
|
|
|
|
cs->conn->mux->subscribe(cs, SUB_RETRY_SEND, &check->wait_list);
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-05-01 02:03:04 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* Now evaluate the tcp-check rules */
|
2015-05-01 02:03:04 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* If check->current_step is defined, we are in resume condition. For
|
|
|
|
|
* TCPCHK_ACT_CONNECT and TCPCHK_ACT_SEND rules, we must go to the next
|
|
|
|
|
* rule before resuming the evaluation. For TCPCHK_ACT_EXPECT, we
|
|
|
|
|
* re-evaluate the current rule. Others cannot yield.
|
|
|
|
|
*/
|
|
|
|
|
if (check->current_step) {
|
|
|
|
|
if (check->current_step->action == TCPCHK_ACT_CONNECT ||
|
|
|
|
|
check->current_step->action == TCPCHK_ACT_SEND)
|
|
|
|
|
rule = LIST_NEXT(&check->current_step->list, typeof(rule), list);
|
|
|
|
|
else
|
|
|
|
|
rule = check->current_step;
|
|
|
|
|
}
|
2020-02-14 11:42:54 -05:00
|
|
|
else {
|
|
|
|
|
/* First evaluation, create a session */
|
2020-02-21 12:13:44 -05:00
|
|
|
check->sess = session_new(&checks_fe, NULL, (check->server ? &check->server->obj_type : NULL));
|
2020-02-14 11:42:54 -05:00
|
|
|
if (!check->sess) {
|
|
|
|
|
chunk_printf(&trash, "TCPCHK error allocating check session");
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
|
|
|
|
|
goto out_end_tcpcheck;
|
|
|
|
|
}
|
2020-02-21 12:13:44 -05:00
|
|
|
vars_init(&check->vars, SCOPE_CHECK);
|
2020-03-30 14:34:34 -04:00
|
|
|
rule = LIST_NEXT(check->tcpcheck_rules->list, typeof(rule), list);
|
2020-02-14 11:42:54 -05:00
|
|
|
}
|
2015-05-13 06:08:21 -04:00
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry_from(rule, check->tcpcheck_rules->list, list) {
|
2020-03-30 05:05:10 -04:00
|
|
|
enum tcpcheck_eval_ret eval_ret;
|
2020-02-07 09:37:17 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
switch (rule->action) {
|
|
|
|
|
case TCPCHK_ACT_CONNECT:
|
|
|
|
|
check->current_step = rule;
|
2020-02-07 09:37:17 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* close but not release yet previous connection */
|
|
|
|
|
if (check->cs) {
|
|
|
|
|
cs_close(check->cs);
|
|
|
|
|
retcode = -1; /* do not reuse the fd in the caller! */
|
|
|
|
|
}
|
|
|
|
|
eval_ret = tcpcheck_eval_connect(check, rule);
|
|
|
|
|
must_read = 1; last_read = 0;
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_ACT_SEND:
|
|
|
|
|
check->current_step = rule;
|
|
|
|
|
eval_ret = tcpcheck_eval_send(check, rule);
|
|
|
|
|
must_read = 1;
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_ACT_EXPECT:
|
|
|
|
|
check->current_step = rule;
|
|
|
|
|
if (must_read) {
|
|
|
|
|
if (check->proxy->timeout.check)
|
|
|
|
|
check->task->expire = tick_add_ifset(now_ms, check->proxy->timeout.check);
|
|
|
|
|
|
|
|
|
|
/* If we already subscribed, then we tried to received and
|
|
|
|
|
* failed, so there's no point trying again.
|
2020-02-07 09:37:17 -05:00
|
|
|
*/
|
2020-03-30 05:05:10 -04:00
|
|
|
if (check->wait_list.events & SUB_RETRY_RECV)
|
|
|
|
|
goto out;
|
|
|
|
|
if (conn->mux->rcv_buf(cs, &check->bi, b_size(&check->bi), 0) <= 0) {
|
|
|
|
|
if (conn->flags & (CO_FL_ERROR|CO_FL_SOCK_RD_SH) || cs->flags & CS_FL_ERROR) {
|
|
|
|
|
last_read = 1;
|
|
|
|
|
if ((conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR) && !b_data(&check->bi)) {
|
|
|
|
|
/* Report network errors only if we got no other data. Otherwise
|
|
|
|
|
* we'll let the upper layers decide whether the response is OK
|
|
|
|
|
* or not. It is very common that an RST sent by the server is
|
|
|
|
|
* reported as an error just after the last data chunk.
|
|
|
|
|
*/
|
|
|
|
|
goto out_end_tcpcheck;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
conn->mux->subscribe(cs, SUB_RETRY_RECV, &check->wait_list);
|
|
|
|
|
goto out;
|
2020-02-07 09:37:17 -05:00
|
|
|
}
|
|
|
|
|
}
|
2020-02-26 09:59:22 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* buffer full, don't wait for more data */
|
|
|
|
|
if (b_full(&check->bi))
|
|
|
|
|
last_read = 1;
|
|
|
|
|
|
|
|
|
|
/* Check that response body is not empty... */
|
|
|
|
|
if (!b_data(&check->bi)) {
|
|
|
|
|
char *comment;
|
|
|
|
|
|
|
|
|
|
if (!last_read)
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
/* empty response */
|
|
|
|
|
chunk_printf(&trash, "TCPCHK got an empty response at step %d",
|
|
|
|
|
tcpcheck_get_step_id(check, rule));
|
|
|
|
|
comment = tcpcheck_get_step_comment(check, rule);
|
|
|
|
|
if (comment)
|
|
|
|
|
chunk_appendf(&trash, " comment: '%s'", comment);
|
2020-04-01 05:04:52 -04:00
|
|
|
set_server_check_status(check, rule->expect.err_status, trash.area);
|
2020-03-30 05:05:10 -04:00
|
|
|
ret = -1;
|
|
|
|
|
goto out_end_tcpcheck;
|
2020-02-07 09:37:17 -05:00
|
|
|
}
|
2020-03-30 05:05:10 -04:00
|
|
|
must_read = 0;
|
2020-02-07 09:37:17 -05:00
|
|
|
}
|
2020-02-24 11:34:11 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
eval_ret = tcpcheck_eval_expect(check, rule, last_read);
|
|
|
|
|
if (eval_ret == TCPCHK_EVAL_WAIT) {
|
|
|
|
|
check->current_step = rule->expect.head;
|
|
|
|
|
conn->mux->subscribe(cs, SUB_RETRY_RECV, &check->wait_list);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2020-02-24 11:34:11 -05:00
|
|
|
case TCPCHK_ACT_ACTION_KW:
|
|
|
|
|
/* Don't update the current step */
|
|
|
|
|
eval_ret = tcpcheck_eval_action_kw(check, rule);
|
|
|
|
|
break;
|
2020-03-30 05:05:10 -04:00
|
|
|
default:
|
|
|
|
|
/* Otherwise, just go to the next one and don't update
|
|
|
|
|
* the current step
|
|
|
|
|
*/
|
|
|
|
|
eval_ret = TCPCHK_EVAL_CONTINUE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2018-03-01 15:49:01 -05:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
switch (eval_ret) {
|
|
|
|
|
case TCPCHK_EVAL_CONTINUE:
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EVAL_WAIT:
|
|
|
|
|
goto out;
|
|
|
|
|
case TCPCHK_EVAL_STOP:
|
|
|
|
|
goto out_end_tcpcheck;
|
2018-03-01 15:49:01 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
/* All rules was evaluated */
|
|
|
|
|
set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-30 05:05:10 -04:00
|
|
|
out_end_tcpcheck:
|
2019-07-23 08:37:47 -04:00
|
|
|
if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR))
|
2020-03-30 05:05:10 -04:00
|
|
|
chk_report_conn_err(check, errno, 0);
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2013-12-10 18:52:19 -05:00
|
|
|
/* cleanup before leaving */
|
2020-03-30 05:05:10 -04:00
|
|
|
check->current_step = NULL;
|
2020-02-14 11:42:54 -05:00
|
|
|
if (check->sess != NULL) {
|
2020-02-21 12:13:44 -05:00
|
|
|
vars_prune(&check->vars, check->sess, NULL);
|
2020-02-14 11:42:54 -05:00
|
|
|
session_free(check->sess);
|
|
|
|
|
check->sess = NULL;
|
|
|
|
|
}
|
|
|
|
|
out:
|
|
|
|
|
return retcode;
|
2013-10-06 17:24:13 -04:00
|
|
|
}
|
|
|
|
|
|
2020-03-26 16:10:03 -04:00
|
|
|
static const char *init_check(struct check *check, int type)
|
2015-01-29 21:22:54 -05:00
|
|
|
{
|
|
|
|
|
check->type = type;
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
b_reset(&check->bi); check->bi.size = global.tune.chksize;
|
|
|
|
|
b_reset(&check->bo); check->bo.size = global.tune.chksize;
|
|
|
|
|
|
|
|
|
|
check->bi.area = calloc(check->bi.size, sizeof(char));
|
|
|
|
|
check->bo.area = calloc(check->bo.size, sizeof(char));
|
2015-01-29 21:22:54 -05:00
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
if (!check->bi.area || !check->bo.area)
|
2015-01-29 21:22:54 -05:00
|
|
|
return "out of memory while allocating check buffer";
|
2018-07-10 11:43:27 -04:00
|
|
|
|
2019-06-14 08:42:29 -04:00
|
|
|
check->wait_list.tasklet = tasklet_new();
|
|
|
|
|
if (!check->wait_list.tasklet)
|
2020-04-07 16:07:56 -04:00
|
|
|
return "out of memory while allocating check tasklet";
|
2018-12-19 07:59:17 -05:00
|
|
|
check->wait_list.events = 0;
|
2019-06-14 08:42:29 -04:00
|
|
|
check->wait_list.tasklet->process = event_srv_chk_io;
|
|
|
|
|
check->wait_list.tasklet->context = check;
|
2015-01-29 21:22:54 -05:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2015-01-29 21:22:55 -05:00
|
|
|
void free_check(struct check *check)
|
|
|
|
|
{
|
2020-03-26 14:48:20 -04:00
|
|
|
task_destroy(check->task);
|
|
|
|
|
if (check->wait_list.tasklet)
|
|
|
|
|
tasklet_free(check->wait_list.tasklet);
|
|
|
|
|
|
2018-07-10 11:43:27 -04:00
|
|
|
free(check->bi.area);
|
|
|
|
|
free(check->bo.area);
|
2018-01-25 05:36:35 -05:00
|
|
|
if (check->cs) {
|
|
|
|
|
free(check->cs->conn);
|
|
|
|
|
check->cs->conn = NULL;
|
|
|
|
|
cs_free(check->cs);
|
|
|
|
|
check->cs = NULL;
|
|
|
|
|
}
|
2015-01-29 21:22:55 -05:00
|
|
|
}
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
static void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool)
|
|
|
|
|
{
|
2020-03-30 13:52:29 -04:00
|
|
|
struct logformat_node *lf, *lfb;
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
if (!rule)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
free(rule->comment);
|
2020-02-21 12:41:28 -05:00
|
|
|
switch (rule->action) {
|
|
|
|
|
case TCPCHK_ACT_SEND:
|
|
|
|
|
switch (rule->send.type) {
|
|
|
|
|
case TCPCHK_SEND_STRING:
|
|
|
|
|
case TCPCHK_SEND_BINARY:
|
2020-03-30 13:52:29 -04:00
|
|
|
free(rule->send.data.ptr);
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_STRING_LF:
|
|
|
|
|
case TCPCHK_SEND_BINARY_LF:
|
|
|
|
|
list_for_each_entry_safe(lf, lfb, &rule->send.fmt, list) {
|
|
|
|
|
LIST_DEL(&lf->list);
|
|
|
|
|
release_sample_expr(lf->expr);
|
|
|
|
|
free(lf->arg);
|
|
|
|
|
free(lf);
|
|
|
|
|
}
|
2020-02-21 12:41:28 -05:00
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_UNDEF:
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-03-25 13:20:15 -04:00
|
|
|
break;
|
2020-02-21 12:41:28 -05:00
|
|
|
case TCPCHK_ACT_EXPECT:
|
|
|
|
|
switch (rule->expect.type) {
|
|
|
|
|
case TCPCHK_EXPECT_STRING:
|
|
|
|
|
case TCPCHK_EXPECT_BINARY:
|
|
|
|
|
free(rule->expect.string);
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_REGEX:
|
|
|
|
|
case TCPCHK_EXPECT_REGEX_BINARY:
|
|
|
|
|
regex_free(rule->expect.regex);
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_UNDEF:
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-03-25 13:20:15 -04:00
|
|
|
break;
|
2020-02-21 12:41:28 -05:00
|
|
|
case TCPCHK_ACT_CONNECT:
|
2020-03-30 07:00:05 -04:00
|
|
|
free(rule->connect.sni);
|
2020-03-30 07:16:44 -04:00
|
|
|
free(rule->connect.alpn);
|
2020-03-30 09:19:03 -04:00
|
|
|
release_sample_expr(rule->connect.port_expr);
|
2020-03-30 07:00:05 -04:00
|
|
|
break;
|
2020-02-21 12:41:28 -05:00
|
|
|
case TCPCHK_ACT_COMMENT:
|
2020-03-25 13:20:15 -04:00
|
|
|
break;
|
2020-02-21 12:14:59 -05:00
|
|
|
case TCPCHK_ACT_ACTION_KW:
|
|
|
|
|
free(rule->action_kw.rule);
|
|
|
|
|
break;
|
2020-03-25 13:20:15 -04:00
|
|
|
}
|
2020-02-21 12:41:28 -05:00
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
if (in_pool)
|
|
|
|
|
pool_free(pool_head_tcpcheck_rule, rule);
|
|
|
|
|
else
|
|
|
|
|
free(rule);
|
|
|
|
|
}
|
|
|
|
|
|
2015-01-29 21:23:00 -05:00
|
|
|
void email_alert_free(struct email_alert *alert)
|
|
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *rule, *back;
|
|
|
|
|
|
|
|
|
|
if (!alert)
|
|
|
|
|
return;
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (alert->rules.list) {
|
|
|
|
|
list_for_each_entry_safe(rule, back, alert->rules.list, list) {
|
|
|
|
|
LIST_DEL(&rule->list);
|
|
|
|
|
free_tcpcheck(rule, 1);
|
|
|
|
|
}
|
|
|
|
|
free(alert->rules.list);
|
|
|
|
|
alert->rules.list = NULL;
|
2017-10-23 09:38:19 -04:00
|
|
|
}
|
2017-11-24 11:34:44 -05:00
|
|
|
pool_free(pool_head_email_alert, alert);
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
|
|
|
|
|
2018-05-25 08:04:04 -04:00
|
|
|
static struct task *process_email_alert(struct task *t, void *context, unsigned short state)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
2018-05-25 08:04:04 -04:00
|
|
|
struct check *check = context;
|
2015-01-29 21:23:00 -05:00
|
|
|
struct email_alertq *q;
|
2017-10-20 15:34:32 -04:00
|
|
|
struct email_alert *alert;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
|
|
|
|
q = container_of(check, typeof(*q), check);
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2017-10-20 15:34:32 -04:00
|
|
|
while (1) {
|
|
|
|
|
if (!(check->state & CHK_ST_ENABLED)) {
|
|
|
|
|
if (LIST_ISEMPTY(&q->email_alerts)) {
|
|
|
|
|
/* All alerts processed, queue the task */
|
|
|
|
|
t->expire = TICK_ETERNITY;
|
|
|
|
|
task_queue(t);
|
2017-10-23 09:54:24 -04:00
|
|
|
goto end;
|
2017-10-20 15:34:32 -04:00
|
|
|
}
|
2015-01-29 21:23:00 -05:00
|
|
|
|
|
|
|
|
alert = LIST_NEXT(&q->email_alerts, typeof(alert), list);
|
|
|
|
|
LIST_DEL(&alert->list);
|
2017-10-20 15:34:32 -04:00
|
|
|
t->expire = now_ms;
|
2020-03-30 14:34:34 -04:00
|
|
|
check->tcpcheck_rules = &alert->rules;
|
2019-01-11 12:43:04 -05:00
|
|
|
check->status = HCHK_STATUS_INI;
|
2017-10-20 15:34:32 -04:00
|
|
|
check->state |= CHK_ST_ENABLED;
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
|
|
|
|
|
2018-05-25 08:04:04 -04:00
|
|
|
process_chk(t, context, state);
|
2017-10-20 15:34:32 -04:00
|
|
|
if (check->state & CHK_ST_INPROGRESS)
|
|
|
|
|
break;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
alert = container_of(check->tcpcheck_rules, typeof(*alert), rules);
|
2015-01-29 21:23:00 -05:00
|
|
|
email_alert_free(alert);
|
|
|
|
|
check->tcpcheck_rules = NULL;
|
2017-10-20 15:34:32 -04:00
|
|
|
check->server = NULL;
|
|
|
|
|
check->state &= ~CHK_ST_ENABLED;
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
2017-10-23 09:54:24 -04:00
|
|
|
end:
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2015-01-29 21:23:00 -05:00
|
|
|
return t;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
/* Initializes mailer alerts for the proxy <p> using <mls> parameters.
|
|
|
|
|
*
|
|
|
|
|
* The function returns 1 in success case, otherwise, it returns 0 and err is
|
|
|
|
|
* filled.
|
|
|
|
|
*/
|
|
|
|
|
int init_email_alert(struct mailers *mls, struct proxy *p, char **err)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
2017-10-20 15:34:32 -04:00
|
|
|
struct mailer *mailer;
|
|
|
|
|
struct email_alertq *queues;
|
|
|
|
|
const char *err_str;
|
|
|
|
|
int i = 0;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
if ((queues = calloc(mls->count, sizeof(*queues))) == NULL) {
|
|
|
|
|
memprintf(err, "out of memory while allocating mailer alerts queues");
|
2018-10-02 10:46:34 -04:00
|
|
|
goto fail_no_queue;
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
|
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
for (mailer = mls->mailer_list; mailer; i++, mailer = mailer->next) {
|
|
|
|
|
struct email_alertq *q = &queues[i];
|
|
|
|
|
struct check *check = &q->check;
|
|
|
|
|
struct task *t;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
|
|
|
|
LIST_INIT(&q->email_alerts);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_INIT(&q->lock);
|
2017-10-20 15:34:32 -04:00
|
|
|
check->inter = mls->timeout.mail;
|
2015-01-29 21:23:00 -05:00
|
|
|
check->rise = DEF_AGENT_RISETIME;
|
2019-01-11 12:17:17 -05:00
|
|
|
check->proxy = p;
|
2015-01-29 21:23:00 -05:00
|
|
|
check->fall = DEF_AGENT_FALLTIME;
|
2017-10-20 15:34:32 -04:00
|
|
|
if ((err_str = init_check(check, PR_O2_TCPCHK_CHK))) {
|
|
|
|
|
memprintf(err, "%s", err_str);
|
|
|
|
|
goto error;
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check->xprt = mailer->xprt;
|
2017-10-20 15:34:32 -04:00
|
|
|
check->addr = mailer->addr;
|
2018-03-27 09:35:35 -04:00
|
|
|
check->port = get_host_port(&mailer->addr);
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2017-09-27 08:59:38 -04:00
|
|
|
if ((t = task_new(MAX_THREADS_MASK)) == NULL) {
|
2017-10-20 15:34:32 -04:00
|
|
|
memprintf(err, "out of memory while allocating mailer alerts task");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check->task = t;
|
|
|
|
|
t->process = process_email_alert;
|
|
|
|
|
t->context = check;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
/* check this in one ms */
|
|
|
|
|
t->expire = TICK_ETERNITY;
|
|
|
|
|
check->start = now;
|
|
|
|
|
task_queue(t);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mls->users++;
|
|
|
|
|
free(p->email_alert.mailers.name);
|
|
|
|
|
p->email_alert.mailers.m = mls;
|
|
|
|
|
p->email_alert.queues = queues;
|
2015-01-29 21:23:00 -05:00
|
|
|
return 0;
|
2017-10-20 15:34:32 -04:00
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
for (i = 0; i < mls->count; i++) {
|
|
|
|
|
struct email_alertq *q = &queues[i];
|
|
|
|
|
struct check *check = &q->check;
|
|
|
|
|
|
|
|
|
|
free_check(check);
|
|
|
|
|
}
|
|
|
|
|
free(queues);
|
2018-10-02 10:46:34 -04:00
|
|
|
fail_no_queue:
|
2017-10-20 15:34:32 -04:00
|
|
|
return 1;
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
static int add_tcpcheck_expect_str(struct tcpcheck_rules *rules, const char *str)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
2020-02-26 10:19:40 -05:00
|
|
|
struct tcpcheck_rule *tcpcheck, *prev_check;
|
2020-02-07 09:37:17 -05:00
|
|
|
struct tcpcheck_expect *expect;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
if ((tcpcheck = pool_alloc(pool_head_tcpcheck_rule)) == NULL)
|
2015-01-29 21:23:00 -05:00
|
|
|
return 0;
|
2017-10-23 09:45:20 -04:00
|
|
|
memset(tcpcheck, 0, sizeof(*tcpcheck));
|
2020-02-07 09:37:17 -05:00
|
|
|
tcpcheck->action = TCPCHK_ACT_EXPECT;
|
|
|
|
|
|
|
|
|
|
expect = &tcpcheck->expect;
|
|
|
|
|
expect->type = TCPCHK_EXPECT_STRING;
|
2020-04-01 05:04:52 -04:00
|
|
|
expect->err_status = HCHK_STATUS_L7RSP;
|
|
|
|
|
expect->tout_status = HCHK_STATUS_L7TOUT;
|
2020-02-07 09:37:17 -05:00
|
|
|
expect->string = strdup(str);
|
|
|
|
|
if (!expect->string) {
|
2017-11-24 11:34:44 -05:00
|
|
|
pool_free(pool_head_tcpcheck_rule, tcpcheck);
|
2015-01-29 21:23:00 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2020-02-07 09:37:17 -05:00
|
|
|
expect->length = strlen(expect->string);
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2020-02-26 10:19:40 -05:00
|
|
|
/* All tcp-check expect points back to the first inverse expect rule
|
|
|
|
|
* in a chain of one or more expect rule, potentially itself.
|
|
|
|
|
*/
|
2020-02-07 09:37:17 -05:00
|
|
|
tcpcheck->expect.head = tcpcheck;
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry_rev(prev_check, rules->list, list) {
|
2020-02-26 10:19:40 -05:00
|
|
|
if (prev_check->action == TCPCHK_ACT_EXPECT) {
|
2020-02-07 09:37:17 -05:00
|
|
|
if (prev_check->expect.inverse)
|
|
|
|
|
tcpcheck->expect.head = prev_check;
|
2020-02-26 10:19:40 -05:00
|
|
|
continue;
|
|
|
|
|
}
|
2020-02-24 11:34:11 -05:00
|
|
|
if (prev_check->action != TCPCHK_ACT_COMMENT && prev_check->action != TCPCHK_ACT_ACTION_KW)
|
2020-02-26 10:19:40 -05:00
|
|
|
break;
|
|
|
|
|
}
|
2020-03-30 14:34:34 -04:00
|
|
|
LIST_ADDQ(rules->list, &tcpcheck->list);
|
2015-01-29 21:23:00 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
static int add_tcpcheck_send_strs(struct tcpcheck_rules *rules, const char * const *strs)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *tcpcheck;
|
2020-02-21 12:41:28 -05:00
|
|
|
struct tcpcheck_send *send;
|
2016-08-10 13:29:09 -04:00
|
|
|
const char *in;
|
|
|
|
|
char *dst;
|
2015-01-29 21:23:00 -05:00
|
|
|
int i;
|
|
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
if ((tcpcheck = pool_alloc(pool_head_tcpcheck_rule)) == NULL)
|
2015-01-29 21:23:00 -05:00
|
|
|
return 0;
|
2017-10-23 09:45:20 -04:00
|
|
|
memset(tcpcheck, 0, sizeof(*tcpcheck));
|
|
|
|
|
tcpcheck->action = TCPCHK_ACT_SEND;
|
2020-02-21 12:41:28 -05:00
|
|
|
|
|
|
|
|
send = &tcpcheck->send;
|
|
|
|
|
send->type = TCPCHK_SEND_STRING;
|
|
|
|
|
|
2015-01-29 21:23:00 -05:00
|
|
|
for (i = 0; strs[i]; i++)
|
2020-03-30 13:52:29 -04:00
|
|
|
send->data.len += strlen(strs[i]);
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2020-03-30 13:52:29 -04:00
|
|
|
send->data.ptr = malloc(send->data.len + 1);
|
|
|
|
|
if (!isttest(send->data)) {
|
2017-11-24 11:34:44 -05:00
|
|
|
pool_free(pool_head_tcpcheck_rule, tcpcheck);
|
2015-01-29 21:23:00 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 13:52:29 -04:00
|
|
|
dst = send->data.ptr;
|
2015-01-29 21:23:00 -05:00
|
|
|
for (i = 0; strs[i]; i++)
|
2016-08-10 13:29:09 -04:00
|
|
|
for (in = strs[i]; (*dst = *in++); dst++);
|
|
|
|
|
*dst = 0;
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
LIST_ADDQ(rules->list, &tcpcheck->list);
|
2015-01-29 21:23:00 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
static int enqueue_one_email_alert(struct proxy *p, struct server *s,
|
|
|
|
|
struct email_alertq *q, const char *msg)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
2017-10-23 09:45:20 -04:00
|
|
|
struct email_alert *alert;
|
2015-01-29 21:23:00 -05:00
|
|
|
struct tcpcheck_rule *tcpcheck;
|
|
|
|
|
struct check *check = &q->check;
|
|
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
if ((alert = pool_alloc(pool_head_email_alert)) == NULL)
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
2017-10-23 09:45:20 -04:00
|
|
|
LIST_INIT(&alert->list);
|
2020-03-30 14:34:34 -04:00
|
|
|
alert->rules.flags = 0;
|
|
|
|
|
alert->rules.list = calloc(1, sizeof(*alert->rules.list));
|
|
|
|
|
if (!alert->rules.list)
|
|
|
|
|
goto error;
|
|
|
|
|
LIST_INIT(alert->rules.list);
|
2017-10-20 15:34:32 -04:00
|
|
|
alert->srv = s;
|
2017-10-23 09:45:20 -04:00
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
if ((tcpcheck = pool_alloc(pool_head_tcpcheck_rule)) == NULL)
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
2017-10-23 09:45:20 -04:00
|
|
|
memset(tcpcheck, 0, sizeof(*tcpcheck));
|
|
|
|
|
tcpcheck->action = TCPCHK_ACT_CONNECT;
|
|
|
|
|
tcpcheck->comment = NULL;
|
2020-02-21 12:41:28 -05:00
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
LIST_ADDQ(alert->rules.list, &tcpcheck->list);
|
2015-01-29 21:23:00 -05:00
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "220 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const char * const strs[4] = { "EHLO ", p->email_alert.myhostname, "\r\n" };
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_send_strs(&alert->rules, strs))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const char * const strs[4] = { "MAIL FROM:<", p->email_alert.from, ">\r\n" };
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_send_strs(&alert->rules, strs))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const char * const strs[4] = { "RCPT TO:<", p->email_alert.to, ">\r\n" };
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_send_strs(&alert->rules, strs))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const char * const strs[2] = { "DATA\r\n" };
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_send_strs(&alert->rules, strs))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "354 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
struct tm tm;
|
|
|
|
|
char datestr[48];
|
|
|
|
|
const char * const strs[18] = {
|
2016-02-13 10:27:35 -05:00
|
|
|
"From: ", p->email_alert.from, "\r\n",
|
|
|
|
|
"To: ", p->email_alert.to, "\r\n",
|
|
|
|
|
"Date: ", datestr, "\r\n",
|
|
|
|
|
"Subject: [HAproxy Alert] ", msg, "\r\n",
|
|
|
|
|
"\r\n",
|
|
|
|
|
msg, "\r\n",
|
2015-01-29 21:23:00 -05:00
|
|
|
"\r\n",
|
2015-07-22 13:51:54 -04:00
|
|
|
".\r\n",
|
2015-01-29 21:23:00 -05:00
|
|
|
NULL
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
get_localtime(date.tv_sec, &tm);
|
|
|
|
|
|
|
|
|
|
if (strftime(datestr, sizeof(datestr), "%a, %d %b %Y %T %z (%Z)", &tm) == 0) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_send_strs(&alert->rules, strs))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
const char * const strs[2] = { "QUIT\r\n" };
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_send_strs(&alert->rules, strs))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!add_tcpcheck_expect_str(&alert->rules, "221 "))
|
2015-01-29 21:23:00 -05:00
|
|
|
goto error;
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2017-10-20 15:34:32 -04:00
|
|
|
task_wakeup(check->task, TASK_WOKEN_MSG);
|
2015-01-29 21:23:00 -05:00
|
|
|
LIST_ADDQ(&q->email_alerts, &alert->list);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
|
2015-01-29 21:23:00 -05:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
email_alert_free(alert);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
static void enqueue_email_alert(struct proxy *p, struct server *s, const char *msg)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
struct mailer *mailer;
|
|
|
|
|
|
|
|
|
|
for (i = 0, mailer = p->email_alert.mailers.m->mailer_list;
|
|
|
|
|
i < p->email_alert.mailers.m->count; i++, mailer = mailer->next) {
|
2017-10-20 15:34:32 -04:00
|
|
|
if (!enqueue_one_email_alert(p, s, &p->email_alert.queues[i], msg)) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Email alert [%s] could not be enqueued: out of memory\n", p->id);
|
2015-01-29 21:23:00 -05:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Send email alert if configured.
|
|
|
|
|
*/
|
2015-02-05 21:11:57 -05:00
|
|
|
void send_email_alert(struct server *s, int level, const char *format, ...)
|
2015-01-29 21:23:00 -05:00
|
|
|
{
|
|
|
|
|
va_list argp;
|
|
|
|
|
char buf[1024];
|
|
|
|
|
int len;
|
|
|
|
|
struct proxy *p = s->proxy;
|
|
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
if (!p->email_alert.mailers.m || level > p->email_alert.level || format == NULL)
|
2015-01-29 21:23:00 -05:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
va_start(argp, format);
|
|
|
|
|
len = vsnprintf(buf, sizeof(buf), format, argp);
|
|
|
|
|
va_end(argp);
|
|
|
|
|
|
2017-02-09 06:19:27 -05:00
|
|
|
if (len < 0 || len >= sizeof(buf)) {
|
2017-11-24 10:50:31 -05:00
|
|
|
ha_alert("Email alert [%s] could not format message\n", p->id);
|
2015-01-29 21:23:00 -05:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 15:34:32 -04:00
|
|
|
enqueue_email_alert(p, s, buf);
|
2015-01-29 21:23:00 -05:00
|
|
|
}
|
|
|
|
|
|
2016-06-13 08:15:41 -04:00
|
|
|
/*
|
|
|
|
|
* Return value:
|
|
|
|
|
* the port to be used for the health check
|
|
|
|
|
* 0 in case no port could be found for the check
|
|
|
|
|
*/
|
2020-03-26 16:10:03 -04:00
|
|
|
static int srv_check_healthcheck_port(struct check *chk)
|
2016-06-13 08:15:41 -04:00
|
|
|
{
|
|
|
|
|
int i = 0;
|
|
|
|
|
struct server *srv = NULL;
|
|
|
|
|
|
|
|
|
|
srv = chk->server;
|
|
|
|
|
|
|
|
|
|
/* by default, we use the health check port ocnfigured */
|
|
|
|
|
if (chk->port > 0)
|
|
|
|
|
return chk->port;
|
|
|
|
|
|
|
|
|
|
/* try to get the port from check_core.addr if check.port not set */
|
|
|
|
|
i = get_host_port(&chk->addr);
|
|
|
|
|
if (i > 0)
|
|
|
|
|
return i;
|
|
|
|
|
|
|
|
|
|
/* try to get the port from server address */
|
|
|
|
|
/* prevent MAPPORTS from working at this point, since checks could
|
|
|
|
|
* not be performed in such case (MAPPORTS impose a relative ports
|
|
|
|
|
* based on live traffic)
|
|
|
|
|
*/
|
|
|
|
|
if (srv->flags & SRV_F_MAPPORTS)
|
|
|
|
|
return 0;
|
2017-01-06 11:41:29 -05:00
|
|
|
|
|
|
|
|
i = srv->svc_port; /* by default */
|
2016-06-13 08:15:41 -04:00
|
|
|
if (i > 0)
|
|
|
|
|
return i;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-26 05:21:50 -05:00
|
|
|
REGISTER_POST_CHECK(start_checks);
|
2013-10-06 17:24:13 -04:00
|
|
|
|
2020-03-26 12:38:49 -04:00
|
|
|
static int check_proxy_tcpcheck(struct proxy *px)
|
|
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *chk;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if ((px->options2 & PR_O2_CHK_ANY) != PR_O2_TCPCHK_CHK)
|
2020-03-26 12:38:49 -04:00
|
|
|
goto out;
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!px->tcpcheck_rules.list) {
|
|
|
|
|
px->tcpcheck_rules.list = calloc(1, sizeof(*px->tcpcheck_rules.list));
|
|
|
|
|
if (!px->tcpcheck_rules.list) {
|
|
|
|
|
ha_alert("config : proxy '%s': out of memory.\n", px->id);
|
|
|
|
|
ret |= ERR_ALERT | ERR_FATAL;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
LIST_INIT(px->tcpcheck_rules.list);
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-26 12:38:49 -04:00
|
|
|
/* If there is no connect rule preceeding all send / expect rules, an
|
|
|
|
|
* implicit one is inserted before all others
|
|
|
|
|
*/
|
2020-03-30 14:34:34 -04:00
|
|
|
chk = get_first_tcpcheck_rule(&px->tcpcheck_rules);
|
2020-03-26 12:38:49 -04:00
|
|
|
if (!chk || chk->action != TCPCHK_ACT_CONNECT) {
|
|
|
|
|
chk = calloc(1, sizeof(*chk));
|
|
|
|
|
if (!chk) {
|
|
|
|
|
ha_alert("config : proxy '%s': unable to add implicit tcp-check connect rule "
|
|
|
|
|
"(out of memory).\n", px->id);
|
|
|
|
|
ret |= ERR_ALERT | ERR_FATAL;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
chk->action = TCPCHK_ACT_CONNECT;
|
|
|
|
|
chk->connect.options = TCPCHK_OPT_DEFAULT_CONNECT;
|
2020-03-30 14:34:34 -04:00
|
|
|
LIST_ADD(px->tcpcheck_rules.list, &chk->list);
|
2020-03-26 12:38:49 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-26 14:48:20 -04:00
|
|
|
static int init_srv_check(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
const char *err;
|
|
|
|
|
struct tcpcheck_rule *r;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
|
if (!srv->do_check)
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* If neither a port nor an addr was specified and no check transport
|
|
|
|
|
* layer is forced, then the transport layer used by the checks is the
|
|
|
|
|
* same as for the production traffic. Otherwise we use raw_sock by
|
|
|
|
|
* default, unless one is specified.
|
|
|
|
|
*/
|
|
|
|
|
if (!srv->check.port && !is_addr(&srv->check.addr)) {
|
|
|
|
|
if (!srv->check.use_ssl && srv->use_ssl != -1) {
|
|
|
|
|
srv->check.use_ssl = srv->use_ssl;
|
|
|
|
|
srv->check.xprt = srv->xprt;
|
|
|
|
|
}
|
|
|
|
|
else if (srv->check.use_ssl == 1)
|
|
|
|
|
srv->check.xprt = xprt_get(XPRT_SSL);
|
|
|
|
|
|
|
|
|
|
srv->check.send_proxy |= (srv->pp_opts);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* validate <srv> server health-check settings */
|
|
|
|
|
|
|
|
|
|
/* We need at least a service port, a check port or the first tcp-check
|
|
|
|
|
* rule must be a 'connect' one when checking an IPv4/IPv6 server.
|
|
|
|
|
*/
|
|
|
|
|
if ((srv_check_healthcheck_port(&srv->check) != 0) ||
|
|
|
|
|
(!is_inet_addr(&srv->check.addr) && (is_addr(&srv->check.addr) || !is_inet_addr(&srv->addr))))
|
|
|
|
|
goto init;
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!srv->proxy->tcpcheck_rules.list || LIST_ISEMPTY(srv->proxy->tcpcheck_rules.list)) {
|
2020-03-26 14:48:20 -04:00
|
|
|
ha_alert("config: %s '%s': server '%s' has neither service port nor check port.\n",
|
|
|
|
|
proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
|
|
|
|
|
ret |= ERR_ALERT | ERR_ABORT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* search the first action (connect / send / expect) in the list */
|
2020-03-30 14:34:34 -04:00
|
|
|
r = get_first_tcpcheck_rule(&srv->proxy->tcpcheck_rules);
|
2020-03-31 02:15:58 -04:00
|
|
|
if (!r || (r->action != TCPCHK_ACT_CONNECT) || (!r->connect.port && !get_host_port(&r->connect.addr))) {
|
2020-03-26 14:48:20 -04:00
|
|
|
ha_alert("config: %s '%s': server '%s' has neither service port nor check port "
|
|
|
|
|
"nor tcp_check rule 'connect' with port information.\n",
|
|
|
|
|
proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
|
|
|
|
|
ret |= ERR_ALERT | ERR_ABORT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* scan the tcp-check ruleset to ensure a port has been configured */
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry(r, srv->proxy->tcpcheck_rules.list, list) {
|
2020-03-31 02:15:58 -04:00
|
|
|
if ((r->action == TCPCHK_ACT_CONNECT) && (!r->connect.port || !get_host_port(&r->connect.addr))) {
|
2020-03-26 14:48:20 -04:00
|
|
|
ha_alert("config: %s '%s': server '%s' has neither service port nor check port, "
|
|
|
|
|
"and a tcp_check rule 'connect' with no port information.\n",
|
|
|
|
|
proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
|
|
|
|
|
ret |= ERR_ALERT | ERR_ABORT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
init:
|
|
|
|
|
err = init_check(&srv->check, srv->proxy->options2 & PR_O2_CHK_ANY);
|
|
|
|
|
if (err) {
|
|
|
|
|
ha_alert("config: %s '%s': unable to init check for server '%s' (%s).\n",
|
|
|
|
|
proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
|
|
|
|
|
ret |= ERR_ALERT | ERR_ABORT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
srv->check.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED;
|
|
|
|
|
global.maxsock++;
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int init_srv_agent_check(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
const char *err;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
|
if (!srv->do_agent)
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
err = init_check(&srv->agent, PR_O2_LB_AGENT_CHK);
|
|
|
|
|
if (err) {
|
|
|
|
|
ha_alert("config: %s '%s': unable to init agent-check for server '%s' (%s).\n",
|
|
|
|
|
proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
|
|
|
|
|
ret |= ERR_ALERT | ERR_ABORT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!srv->agent.inter)
|
|
|
|
|
srv->agent.inter = srv->check.inter;
|
|
|
|
|
|
|
|
|
|
srv->agent.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_AGENT;
|
|
|
|
|
global.maxsock++;
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
void deinit_proxy_tcpcheck(struct proxy *px)
|
2020-03-25 13:20:15 -04:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *chk, *back;
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!px->tcpcheck_rules.list || (px->tcpcheck_rules.flags & TCPCHK_RULES_SHARED))
|
|
|
|
|
goto end;
|
2020-03-25 13:20:15 -04:00
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
list_for_each_entry_safe(chk, back, px->tcpcheck_rules.list, list) {
|
2020-03-25 13:20:15 -04:00
|
|
|
LIST_DEL(&chk->list);
|
|
|
|
|
free_tcpcheck(chk, 0);
|
|
|
|
|
}
|
2020-03-30 14:34:34 -04:00
|
|
|
free(px->tcpcheck_rules.list);
|
|
|
|
|
|
|
|
|
|
end:
|
|
|
|
|
px->tcpcheck_rules.flags = 0;
|
|
|
|
|
px->tcpcheck_rules.list = NULL;
|
2020-03-25 13:20:15 -04:00
|
|
|
}
|
|
|
|
|
|
2020-03-26 14:48:20 -04:00
|
|
|
static void deinit_srv_check(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
if (srv->do_check)
|
|
|
|
|
free_check(&srv->check);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void deinit_srv_agent_check(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
if (srv->do_agent)
|
|
|
|
|
free_check(&srv->agent);
|
|
|
|
|
free(srv->agent.send_string);
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
static void deinit_tcpchecks()
|
|
|
|
|
{
|
|
|
|
|
struct tcpcheck_ruleset *rs, *rsb;
|
|
|
|
|
struct tcpcheck_rule *r, *rb;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry_safe(rs, rsb, &tcpchecks_list, list) {
|
|
|
|
|
LIST_DEL(&rs->list);
|
|
|
|
|
list_for_each_entry_safe(r, rb, &rs->rules, list) {
|
|
|
|
|
LIST_DEL(&r->list);
|
|
|
|
|
free_tcpcheck(r, 0);
|
|
|
|
|
}
|
|
|
|
|
free(rs->name);
|
|
|
|
|
free(rs);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-26 12:38:49 -04:00
|
|
|
|
|
|
|
|
REGISTER_POST_PROXY_CHECK(check_proxy_tcpcheck);
|
2020-03-26 14:48:20 -04:00
|
|
|
REGISTER_POST_SERVER_CHECK(init_srv_check);
|
|
|
|
|
REGISTER_POST_SERVER_CHECK(init_srv_agent_check);
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
REGISTER_PROXY_DEINIT(deinit_proxy_tcpcheck);
|
2020-03-26 14:48:20 -04:00
|
|
|
REGISTER_SERVER_DEINIT(deinit_srv_check);
|
|
|
|
|
REGISTER_SERVER_DEINIT(deinit_srv_agent_check);
|
2020-03-30 14:34:34 -04:00
|
|
|
REGISTER_POST_DEINIT(deinit_tcpchecks);
|
2020-03-26 14:48:20 -04:00
|
|
|
|
2020-02-21 12:14:59 -05:00
|
|
|
struct action_kw_list tcp_check_keywords = {
|
|
|
|
|
.list = LIST_HEAD_INIT(tcp_check_keywords.list),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Return the struct action_kw associated to a keyword */
|
|
|
|
|
static struct action_kw *action_kw_tcp_check_lookup(const char *kw)
|
|
|
|
|
{
|
|
|
|
|
return action_lookup(&tcp_check_keywords.list, kw);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void action_kw_tcp_check_build_list(struct buffer *chk)
|
|
|
|
|
{
|
|
|
|
|
action_build_list(&tcp_check_keywords.list, chk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Create a tcp-check rule resulting from parsing a custom keyword. */
|
|
|
|
|
static struct tcpcheck_rule *parse_tcpcheck_action(char **args, int cur_arg, struct proxy *px,
|
2020-03-30 14:34:34 -04:00
|
|
|
struct list *rules, struct action_kw *kw,
|
|
|
|
|
const char *file, int line, char **errmsg)
|
2020-02-21 12:14:59 -05:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *chk = NULL;
|
|
|
|
|
struct act_rule *actrule = NULL;
|
|
|
|
|
|
|
|
|
|
actrule = calloc(1, sizeof(*actrule));
|
|
|
|
|
if (!actrule) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
actrule->kw = kw;
|
|
|
|
|
actrule->from = ACT_F_TCP_CHK;
|
|
|
|
|
|
|
|
|
|
cur_arg++;
|
|
|
|
|
if (kw->parse((const char **)args, &cur_arg, px, actrule, errmsg) == ACT_RET_PRS_ERR) {
|
|
|
|
|
memprintf(errmsg, "'%s' : %s", kw->kw, *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chk = calloc(1, sizeof(*chk));
|
|
|
|
|
if (!chk) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
chk->action = TCPCHK_ACT_ACTION_KW;
|
|
|
|
|
chk->action_kw.rule = actrule;
|
|
|
|
|
return chk;
|
|
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
free(actrule);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
static struct tcpcheck_rule *parse_tcpcheck_connect(char **args, int cur_arg, struct proxy *px, struct list *rules,
|
2020-03-30 09:19:03 -04:00
|
|
|
const char *file, int line, char **errmsg)
|
2020-03-25 13:20:15 -04:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *chk = NULL;
|
2020-03-31 02:15:58 -04:00
|
|
|
struct sockaddr_storage *sk = NULL;
|
2020-03-30 07:16:44 -04:00
|
|
|
char *comment = NULL, *sni = NULL, *alpn = NULL;
|
2020-03-30 09:19:03 -04:00
|
|
|
struct sample_expr *port_expr = NULL;
|
2020-03-25 13:20:15 -04:00
|
|
|
unsigned short conn_opts = 0;
|
|
|
|
|
long port = 0;
|
2020-03-30 07:16:44 -04:00
|
|
|
int alpn_len = 0;
|
2020-03-25 13:20:15 -04:00
|
|
|
|
|
|
|
|
list_for_each_entry(chk, rules, list) {
|
2020-02-21 12:14:59 -05:00
|
|
|
if (chk->action != TCPCHK_ACT_COMMENT && chk->action != TCPCHK_ACT_ACTION_KW)
|
2020-03-25 13:20:15 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (&chk->list != rules && chk->action != TCPCHK_ACT_CONNECT) {
|
2020-02-21 12:14:59 -05:00
|
|
|
memprintf(errmsg, "first step MUST also be a 'connect', "
|
|
|
|
|
"optionnaly preceded by a 'set-var', an 'unset-var' or a 'comment', "
|
|
|
|
|
"when there is a 'connect' step in the tcp-check ruleset");
|
2020-03-25 13:20:15 -04:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cur_arg++;
|
|
|
|
|
while (*(args[cur_arg])) {
|
2020-03-30 07:54:42 -04:00
|
|
|
if (strcmp(args[cur_arg], "default") == 0) {
|
|
|
|
|
if (cur_arg != 2 || *(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' is exclusive with all other options", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
conn_opts = TCPCHK_OPT_DEFAULT_CONNECT;
|
|
|
|
|
}
|
2020-03-31 02:15:58 -04:00
|
|
|
else if (strcmp(args[cur_arg], "addr") == 0) {
|
|
|
|
|
int port1, port2;
|
|
|
|
|
struct protocol *proto;
|
|
|
|
|
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects <ipv4|ipv6> as argument.", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sk = str2sa_range(args[cur_arg+1], NULL, &port1, &port2, errmsg, NULL, NULL, 1);
|
|
|
|
|
if (!sk) {
|
|
|
|
|
memprintf(errmsg, "'%s' : %s.", args[cur_arg], *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
proto = protocol_by_family(sk->ss_family);
|
|
|
|
|
if (!proto || !proto->connect) {
|
|
|
|
|
memprintf(errmsg, "'%s' : connect() not supported for this address family.\n",
|
|
|
|
|
args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (port1 != port2) {
|
|
|
|
|
memprintf(errmsg, "'%s' : port ranges and offsets are not allowed in '%s'\n",
|
|
|
|
|
args[cur_arg], args[cur_arg+1]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cur_arg++;
|
|
|
|
|
}
|
2020-03-30 07:54:42 -04:00
|
|
|
else if (strcmp(args[cur_arg], "port") == 0) {
|
2020-03-30 09:19:03 -04:00
|
|
|
const char *p, *end;
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
if (!*(args[cur_arg+1])) {
|
2020-03-30 09:19:03 -04:00
|
|
|
memprintf(errmsg, "'%s' expects a port number or a sample expression as argument.", args[cur_arg]);
|
2020-03-25 13:20:15 -04:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
2020-03-30 09:19:03 -04:00
|
|
|
|
|
|
|
|
port = 0;
|
|
|
|
|
release_sample_expr(port_expr);
|
|
|
|
|
p = args[cur_arg]; end = p + strlen(p);
|
|
|
|
|
port = read_uint(&p, end);
|
|
|
|
|
if (p != end) {
|
|
|
|
|
int idx = 0;
|
|
|
|
|
|
|
|
|
|
px->conf.args.ctx = ARGC_SRV;
|
|
|
|
|
port_expr = sample_parse_expr((char *[]){args[cur_arg], NULL}, &idx,
|
|
|
|
|
file, line, errmsg, &px->conf.args, NULL);
|
|
|
|
|
|
|
|
|
|
if (!port_expr) {
|
|
|
|
|
memprintf(errmsg, "error detected while parsing port expression : %s", *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (!(port_expr->fetch->val & SMP_VAL_BE_CHK_RUL)) {
|
|
|
|
|
memprintf(errmsg, "error detected while parsing port expression : "
|
|
|
|
|
" fetch method '%s' extracts information from '%s', "
|
|
|
|
|
"none of which is available here.\n",
|
|
|
|
|
args[cur_arg], sample_src_names(port_expr->fetch->use));
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
px->http_needed |= !!(port_expr->fetch->use & SMP_USE_HTTP_ANY);
|
|
|
|
|
}
|
|
|
|
|
else if (port > 65535 || port < 1) {
|
|
|
|
|
memprintf(errmsg, "expects a valid TCP port (from range 1 to 65535) or a sample expression, got %s.",
|
|
|
|
|
args[cur_arg]);
|
2020-03-25 13:20:15 -04:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (strcmp(args[cur_arg], "comment") == 0) {
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
free(comment);
|
|
|
|
|
comment = strdup(args[cur_arg]);
|
|
|
|
|
if (!comment) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (strcmp(args[cur_arg], "send-proxy") == 0)
|
|
|
|
|
conn_opts |= TCPCHK_OPT_SEND_PROXY;
|
2020-03-30 07:07:02 -04:00
|
|
|
else if (strcmp(args[cur_arg], "via-socks4") == 0)
|
|
|
|
|
conn_opts |= TCPCHK_OPT_SOCKS4;
|
2020-03-25 13:20:15 -04:00
|
|
|
else if (strcmp(args[cur_arg], "linger") == 0)
|
|
|
|
|
conn_opts |= TCPCHK_OPT_LINGER;
|
|
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
|
else if (strcmp(args[cur_arg], "ssl") == 0) {
|
|
|
|
|
px->options |= PR_O_TCPCHK_SSL;
|
|
|
|
|
conn_opts |= TCPCHK_OPT_SSL;
|
|
|
|
|
}
|
2020-03-30 07:00:05 -04:00
|
|
|
else if (strcmp(args[cur_arg], "sni") == 0) {
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
free(sni);
|
|
|
|
|
sni = strdup(args[cur_arg]);
|
|
|
|
|
if (!sni) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-03-30 07:16:44 -04:00
|
|
|
else if (strcmp(args[cur_arg], "alpn") == 0) {
|
|
|
|
|
#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
|
|
|
|
|
free(alpn);
|
|
|
|
|
if (ssl_sock_parse_alpn(args[cur_arg + 1], &alpn, &alpn_len, errmsg)) {
|
|
|
|
|
memprintf(errmsg, "'%s' : %s", args[cur_arg], *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
#else
|
|
|
|
|
memprintf(errmsg, "'%s' : library does not support TLS ALPN extension.", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
2020-03-25 13:20:15 -04:00
|
|
|
#endif /* USE_OPENSSL */
|
|
|
|
|
|
|
|
|
|
else {
|
2020-03-31 02:15:58 -04:00
|
|
|
memprintf(errmsg, "expects 'comment', 'port', 'addr', 'send-proxy'"
|
2020-03-25 13:20:15 -04:00
|
|
|
#ifdef USE_OPENSSL
|
2020-03-30 07:16:44 -04:00
|
|
|
", 'ssl', 'sni', 'alpn'"
|
2020-03-25 13:20:15 -04:00
|
|
|
#endif /* USE_OPENSSL */
|
2020-03-30 07:54:42 -04:00
|
|
|
" or 'via-socks4', 'linger', 'default' but got '%s' as argument.",
|
2020-03-25 13:20:15 -04:00
|
|
|
args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chk = calloc(1, sizeof(*chk));
|
|
|
|
|
if (!chk) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2020-02-21 12:49:05 -05:00
|
|
|
chk->action = TCPCHK_ACT_CONNECT;
|
|
|
|
|
chk->comment = comment;
|
|
|
|
|
chk->connect.port = port;
|
|
|
|
|
chk->connect.options = conn_opts;
|
2020-03-30 07:00:05 -04:00
|
|
|
chk->connect.sni = sni;
|
2020-03-30 07:16:44 -04:00
|
|
|
chk->connect.alpn = alpn;
|
|
|
|
|
chk->connect.alpn_len= alpn_len;
|
2020-03-30 09:19:03 -04:00
|
|
|
chk->connect.port_expr= port_expr;
|
2020-03-31 02:15:58 -04:00
|
|
|
if (sk)
|
|
|
|
|
chk->connect.addr = *sk;
|
2020-03-25 13:20:15 -04:00
|
|
|
return chk;
|
|
|
|
|
|
|
|
|
|
error:
|
2020-03-30 07:16:44 -04:00
|
|
|
free(alpn);
|
2020-03-30 07:00:05 -04:00
|
|
|
free(sni);
|
2020-03-25 13:20:15 -04:00
|
|
|
free(comment);
|
2020-03-30 09:19:03 -04:00
|
|
|
release_sample_expr(port_expr);
|
2020-03-25 13:20:15 -04:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 13:52:29 -04:00
|
|
|
static struct tcpcheck_rule *parse_tcpcheck_send(char **args, int cur_arg, struct proxy *px, struct list *rules,
|
2020-03-30 14:34:34 -04:00
|
|
|
const char *file, int line, char **errmsg)
|
2020-03-25 13:20:15 -04:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *chk = NULL;
|
2020-03-30 13:52:29 -04:00
|
|
|
char *comment = NULL, *data = NULL;
|
2020-02-21 12:41:28 -05:00
|
|
|
enum tcpcheck_send_type type = TCPCHK_SEND_UNDEF;
|
2020-03-25 13:20:15 -04:00
|
|
|
|
2020-02-21 12:41:28 -05:00
|
|
|
type = ((strcmp(args[cur_arg], "send-binary") == 0) ? TCPCHK_SEND_BINARY : TCPCHK_SEND_STRING);
|
2020-03-25 13:20:15 -04:00
|
|
|
if (!*(args[cur_arg+1])) {
|
2020-02-21 12:41:28 -05:00
|
|
|
memprintf(errmsg, "'%s' expects a %s as argument",
|
|
|
|
|
(type == TCPCHK_SEND_BINARY ? "binary string": "string"), args[cur_arg]);
|
2020-03-25 13:20:15 -04:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 13:52:29 -04:00
|
|
|
data = args[cur_arg+1];
|
|
|
|
|
|
|
|
|
|
cur_arg += 2;
|
|
|
|
|
while (*(args[cur_arg])) {
|
|
|
|
|
if (strcmp(args[cur_arg], "comment") == 0) {
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
free(comment);
|
|
|
|
|
comment = strdup(args[cur_arg]);
|
|
|
|
|
if (!comment) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2020-03-25 13:20:15 -04:00
|
|
|
}
|
2020-03-30 13:52:29 -04:00
|
|
|
else if (strcmp(args[cur_arg], "log-format") == 0) {
|
|
|
|
|
if (type == TCPCHK_SEND_BINARY)
|
|
|
|
|
type = TCPCHK_SEND_BINARY_LF;
|
|
|
|
|
else if (type == TCPCHK_SEND_STRING)
|
|
|
|
|
type = TCPCHK_SEND_STRING_LF;
|
2020-03-25 13:20:15 -04:00
|
|
|
}
|
2020-03-30 13:52:29 -04:00
|
|
|
else {
|
|
|
|
|
memprintf(errmsg, "expects 'comment', 'log-format' but got '%s' as argument.",
|
|
|
|
|
args[cur_arg]);
|
2020-03-25 13:20:15 -04:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chk = calloc(1, sizeof(*chk));
|
|
|
|
|
if (!chk) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2020-02-21 12:41:28 -05:00
|
|
|
chk->action = TCPCHK_ACT_SEND;
|
|
|
|
|
chk->comment = comment;
|
|
|
|
|
chk->send.type = type;
|
2020-03-30 13:52:29 -04:00
|
|
|
|
|
|
|
|
switch (chk->send.type) {
|
|
|
|
|
case TCPCHK_SEND_STRING:
|
|
|
|
|
chk->send.data = ist2(strdup(data), strlen(data));
|
|
|
|
|
if (!isttest(chk->send.data)) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_BINARY:
|
|
|
|
|
if (parse_binary(data, &chk->send.data.ptr, (int *)&chk->send.data.len, errmsg) == 0) {
|
|
|
|
|
memprintf(errmsg, "'%s' invalid binary string (%s).\n", data, *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_STRING_LF:
|
|
|
|
|
case TCPCHK_SEND_BINARY_LF:
|
|
|
|
|
LIST_INIT(&chk->send.fmt);
|
|
|
|
|
px->conf.args.ctx = ARGC_SRV;
|
|
|
|
|
if (!parse_logformat_string(data, px, &chk->send.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
|
|
|
|
|
memprintf(errmsg, "'%s' invalid log-format string (%s).\n", data, *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_SEND_UNDEF:
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
return chk;
|
|
|
|
|
|
|
|
|
|
error:
|
2020-03-30 13:52:29 -04:00
|
|
|
free(chk);
|
2020-03-25 13:20:15 -04:00
|
|
|
free(comment);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
static struct tcpcheck_rule *parse_tcpcheck_comment(char **args, int cur_arg, struct proxy *px, struct list *rules,
|
|
|
|
|
const char *file, int line, char **errmsg)
|
2020-03-25 13:20:15 -04:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *chk = NULL;
|
|
|
|
|
char *comment = NULL;
|
|
|
|
|
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "expects a string as argument");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
comment = strdup(args[cur_arg]);
|
|
|
|
|
if (!comment) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chk = calloc(1, sizeof(*chk));
|
|
|
|
|
if (!chk) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
chk->action = TCPCHK_ACT_COMMENT;
|
|
|
|
|
chk->comment = comment;
|
|
|
|
|
return chk;
|
|
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
free(comment);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
static struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct proxy *px, struct list *rules,
|
|
|
|
|
const char *file, int line, char **errmsg)
|
2020-03-25 13:20:15 -04:00
|
|
|
{
|
|
|
|
|
struct tcpcheck_rule *prev_check, *chk = NULL;
|
|
|
|
|
char *str = NULL, *comment = NULL, *pattern = NULL;
|
|
|
|
|
enum tcpcheck_expect_type type = TCPCHK_EXPECT_UNDEF;
|
2020-04-01 05:04:52 -04:00
|
|
|
enum healthcheck_status err_st = HCHK_STATUS_L7RSP;
|
|
|
|
|
enum healthcheck_status tout_st = HCHK_STATUS_L7TOUT;
|
2020-03-25 13:20:15 -04:00
|
|
|
long min_recv = -1;
|
|
|
|
|
int inverse = 0, with_capture = 0;
|
|
|
|
|
|
|
|
|
|
if (!*(args[cur_arg+1]) || !*(args[cur_arg+2])) {
|
|
|
|
|
memprintf(errmsg, "expects a pattern (type+string) as arguments");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cur_arg++;
|
|
|
|
|
while (*(args[cur_arg])) {
|
|
|
|
|
int in_pattern = 0;
|
|
|
|
|
|
|
|
|
|
rescan:
|
|
|
|
|
if (strcmp(args[cur_arg], "min-recv") == 0) {
|
|
|
|
|
if (in_pattern) {
|
|
|
|
|
memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a integer as argument", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
/* Use an signed integer here because of chksize */
|
|
|
|
|
cur_arg++;
|
|
|
|
|
min_recv = atol(args[cur_arg]);
|
|
|
|
|
if (min_recv < -1 || min_recv > INT_MAX) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects -1 or an integer from 0 to INT_MAX" , args[cur_arg-1]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (*(args[cur_arg]) == '!') {
|
|
|
|
|
in_pattern = 1;
|
|
|
|
|
while (*(args[cur_arg]) == '!') {
|
|
|
|
|
inverse = !inverse;
|
|
|
|
|
args[cur_arg]++;
|
|
|
|
|
}
|
|
|
|
|
if (!*(args[cur_arg]))
|
|
|
|
|
cur_arg++;
|
|
|
|
|
goto rescan;
|
|
|
|
|
}
|
|
|
|
|
else if (strcmp(args[cur_arg], "string") == 0 || strcmp(args[cur_arg], "binary") == 0 ||
|
|
|
|
|
strcmp(args[cur_arg], "rstring") == 0 || strcmp(args[cur_arg], "rbinary") == 0) {
|
|
|
|
|
if (type != TCPCHK_EXPECT_UNDEF) {
|
|
|
|
|
memprintf(errmsg, "only on pattern expected");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_STRING :
|
|
|
|
|
((*(args[cur_arg]) == 'b') ? TCPCHK_EXPECT_BINARY :
|
|
|
|
|
((*(args[cur_arg]+1) == 's') ? TCPCHK_EXPECT_REGEX : TCPCHK_EXPECT_REGEX_BINARY)));
|
|
|
|
|
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
pattern = args[cur_arg];
|
|
|
|
|
}
|
|
|
|
|
else if (strcmp(args[cur_arg], "comment") == 0) {
|
|
|
|
|
if (in_pattern) {
|
|
|
|
|
memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
free(comment);
|
|
|
|
|
comment = strdup(args[cur_arg]);
|
|
|
|
|
if (!comment) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-01 05:04:52 -04:00
|
|
|
else if (strcmp(args[cur_arg], "error-status") == 0) {
|
|
|
|
|
if (in_pattern) {
|
|
|
|
|
memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (strcasecmp(args[cur_arg+1], "L7RSP") == 0)
|
|
|
|
|
err_st = HCHK_STATUS_L7RSP;
|
|
|
|
|
else if (strcasecmp(args[cur_arg+1], "L7STS") == 0)
|
|
|
|
|
err_st = HCHK_STATUS_L7STS;
|
|
|
|
|
else if (strcasecmp(args[cur_arg+1], "L6RSP") == 0)
|
|
|
|
|
err_st = HCHK_STATUS_L6RSP;
|
|
|
|
|
else if (strcasecmp(args[cur_arg+1], "L4CON") == 0)
|
|
|
|
|
err_st = HCHK_STATUS_L4CON;
|
|
|
|
|
else {
|
|
|
|
|
memprintf(errmsg, "'%s' only supports 'L4CON', 'L6RSP', 'L7RSP' or 'L7STS' status (got '%s').",
|
|
|
|
|
args[cur_arg], args[cur_arg+1]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
}
|
|
|
|
|
else if (strcmp(args[cur_arg], "tout-status") == 0) {
|
|
|
|
|
if (in_pattern) {
|
|
|
|
|
memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (!*(args[cur_arg+1])) {
|
|
|
|
|
memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
if (strcasecmp(args[cur_arg+1], "L7TOUT") == 0)
|
|
|
|
|
tout_st = HCHK_STATUS_L7TOUT;
|
|
|
|
|
else if (strcasecmp(args[cur_arg+1], "L6TOUT") == 0)
|
|
|
|
|
tout_st = HCHK_STATUS_L6TOUT;
|
|
|
|
|
else if (strcasecmp(args[cur_arg+1], "L4TOUT") == 0)
|
|
|
|
|
tout_st = HCHK_STATUS_L4TOUT;
|
|
|
|
|
else {
|
|
|
|
|
memprintf(errmsg, "'%s' only supports 'L4TOUT', 'L6TOUT' or 'L7TOUT' status (got '%s').",
|
|
|
|
|
args[cur_arg], args[cur_arg+1]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
cur_arg++;
|
|
|
|
|
}
|
2020-03-25 13:20:15 -04:00
|
|
|
else {
|
|
|
|
|
memprintf(errmsg, "'only supports min-recv, '[!]binary', '[!]string', '[!]rstring', '[!]rbinary'"
|
|
|
|
|
" or comment but got '%s' as argument.", args[cur_arg]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cur_arg++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (comment) {
|
|
|
|
|
char *p = comment;
|
|
|
|
|
|
|
|
|
|
while (*p) {
|
|
|
|
|
if (*p == '\\') {
|
|
|
|
|
p++;
|
|
|
|
|
if (!*p || !isdigit((unsigned char)*p) ||
|
|
|
|
|
(*p == 'x' && (!*(p+1) || !*(p+2) || !ishex(*(p+1)) || !ishex(*(p+2))))) {
|
|
|
|
|
memprintf(errmsg, "invalid backreference in 'comment' argument");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
with_capture = 1;
|
|
|
|
|
}
|
|
|
|
|
p++;
|
|
|
|
|
}
|
|
|
|
|
if (with_capture && !inverse)
|
|
|
|
|
memprintf(errmsg, "using backreference in a positive expect comment is useless");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chk = calloc(1, sizeof(*chk));
|
|
|
|
|
if (!chk) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
chk->action = TCPCHK_ACT_EXPECT;
|
|
|
|
|
chk->comment = comment;
|
|
|
|
|
chk->expect.type = type;
|
|
|
|
|
chk->expect.min_recv = min_recv;
|
|
|
|
|
chk->expect.inverse = inverse;
|
|
|
|
|
chk->expect.with_capture = with_capture;
|
2020-04-01 05:04:52 -04:00
|
|
|
chk->expect.err_status = err_st;
|
|
|
|
|
chk->expect.tout_status = tout_st;
|
2020-03-25 13:20:15 -04:00
|
|
|
|
|
|
|
|
switch (chk->expect.type) {
|
|
|
|
|
case TCPCHK_EXPECT_STRING:
|
|
|
|
|
chk->expect.string = strdup(pattern);
|
|
|
|
|
chk->expect.length = strlen(pattern);
|
|
|
|
|
if (!chk->expect.string) {
|
|
|
|
|
memprintf(errmsg, "out of memory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_BINARY:
|
|
|
|
|
if (parse_binary(pattern, &chk->expect.string, &chk->expect.length, errmsg) == 0) {
|
|
|
|
|
memprintf(errmsg, "invalid binary string (%s)", *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
case TCPCHK_EXPECT_REGEX:
|
|
|
|
|
case TCPCHK_EXPECT_REGEX_BINARY:
|
|
|
|
|
chk->expect.regex = regex_comp(pattern, 1, with_capture, errmsg);
|
|
|
|
|
if (!chk->expect.regex)
|
|
|
|
|
goto error;
|
|
|
|
|
break;
|
|
|
|
|
case TCPCHK_EXPECT_UNDEF:
|
|
|
|
|
free(chk);
|
|
|
|
|
memprintf(errmsg, "pattern not found");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* All tcp-check expect points back to the first inverse expect rule in
|
|
|
|
|
* a chain of one or more expect rule, potentially itself.
|
|
|
|
|
*/
|
|
|
|
|
chk->expect.head = chk;
|
|
|
|
|
list_for_each_entry_rev(prev_check, rules, list) {
|
|
|
|
|
if (prev_check->action == TCPCHK_ACT_EXPECT) {
|
|
|
|
|
if (prev_check->expect.inverse)
|
|
|
|
|
chk->expect.head = prev_check;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-02-24 11:34:11 -05:00
|
|
|
if (prev_check->action != TCPCHK_ACT_COMMENT && prev_check->action != TCPCHK_ACT_ACTION_KW)
|
2020-03-25 13:20:15 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return chk;
|
|
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
free(chk);
|
|
|
|
|
free(str);
|
|
|
|
|
free(comment);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parses the "tcp-check" proxy keyword */
|
|
|
|
|
static int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx,
|
|
|
|
|
struct proxy *defpx, const char *file, int line,
|
|
|
|
|
char **errmsg)
|
|
|
|
|
{
|
2020-03-30 14:34:34 -04:00
|
|
|
struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
|
2020-03-25 13:20:15 -04:00
|
|
|
struct tcpcheck_rule *chk = NULL;
|
2020-02-25 11:19:17 -05:00
|
|
|
int index, cur_arg, ret = 0;
|
2020-03-25 13:20:15 -04:00
|
|
|
|
|
|
|
|
if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[0], NULL))
|
|
|
|
|
ret = 1;
|
|
|
|
|
|
|
|
|
|
if (curpx == defpx) {
|
|
|
|
|
memprintf(errmsg, "'%s' not allowed in 'defaults' section.", args[0]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 14:34:34 -04:00
|
|
|
if (rules->flags & TCPCHK_RULES_DEF) {
|
|
|
|
|
/* Only shared ruleset can be inherited from the default section */
|
|
|
|
|
rules->flags = 0;
|
|
|
|
|
rules->list = NULL;
|
|
|
|
|
}
|
|
|
|
|
if (rules->list && (rules->flags & TCPCHK_RULES_SHARED)) {
|
|
|
|
|
memprintf(errmsg, "%s : A shared tcp-check ruleset already configured.", args[0]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!rules->list) {
|
|
|
|
|
rules->list = calloc(1, sizeof(*rules->list));
|
|
|
|
|
if (!rules->list) {
|
2020-03-25 13:20:15 -04:00
|
|
|
memprintf(errmsg, "%s : out of memory.", args[0]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2020-03-30 14:34:34 -04:00
|
|
|
LIST_INIT(rules->list);
|
2020-03-25 13:20:15 -04:00
|
|
|
}
|
|
|
|
|
|
2020-02-25 11:19:17 -05:00
|
|
|
index = 0;
|
2020-03-30 14:34:34 -04:00
|
|
|
if (!LIST_ISEMPTY(rules->list)) {
|
|
|
|
|
chk = LIST_PREV(rules->list, typeof(chk), list);
|
2020-02-25 11:19:17 -05:00
|
|
|
index = chk->index + 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-25 13:20:15 -04:00
|
|
|
cur_arg = 1;
|
|
|
|
|
if (strcmp(args[cur_arg], "connect") == 0)
|
2020-03-30 14:34:34 -04:00
|
|
|
chk = parse_tcpcheck_connect(args, cur_arg, curpx, rules->list, file, line, errmsg);
|
2020-03-25 13:20:15 -04:00
|
|
|
else if (strcmp(args[cur_arg], "send") == 0 || strcmp(args[cur_arg], "send-binary") == 0)
|
2020-03-30 14:34:34 -04:00
|
|
|
chk = parse_tcpcheck_send(args, cur_arg, curpx, rules->list, file, line, errmsg);
|
2020-03-25 13:20:15 -04:00
|
|
|
else if (strcmp(args[cur_arg], "expect") == 0)
|
2020-03-30 14:34:34 -04:00
|
|
|
chk = parse_tcpcheck_expect(args, cur_arg, curpx, rules->list, file, line, errmsg);
|
2020-03-25 13:20:15 -04:00
|
|
|
else if (strcmp(args[cur_arg], "comment") == 0)
|
2020-03-30 14:34:34 -04:00
|
|
|
chk = parse_tcpcheck_comment(args, cur_arg, curpx, rules->list, file, line, errmsg);
|
2020-03-25 13:20:15 -04:00
|
|
|
else {
|
2020-02-21 12:14:59 -05:00
|
|
|
struct action_kw *kw = action_kw_tcp_check_lookup(args[cur_arg]);
|
|
|
|
|
|
|
|
|
|
if (!kw) {
|
|
|
|
|
action_kw_tcp_check_build_list(&trash);
|
|
|
|
|
memprintf(errmsg, "'%s' only supports 'comment', 'connect', 'send', 'send-binary', 'expect'"
|
|
|
|
|
"%s%s. but got '%s'",
|
|
|
|
|
args[0], (*trash.area ? ", " : ""), trash.area, args[1]);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2020-03-30 14:34:34 -04:00
|
|
|
chk = parse_tcpcheck_action(args, cur_arg, curpx, rules->list, kw, file, line, errmsg);
|
2020-03-25 13:20:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!chk) {
|
|
|
|
|
memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
ret = (*errmsg != NULL); /* Handle warning */
|
|
|
|
|
|
|
|
|
|
/* No error: add the tcp-check rule in the list */
|
2020-02-25 11:19:17 -05:00
|
|
|
chk->index = index;
|
2020-03-30 14:34:34 -04:00
|
|
|
LIST_ADDQ(rules->list, &chk->list);
|
2020-03-25 13:20:15 -04:00
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
error:
|
2020-03-30 14:34:34 -04:00
|
|
|
deinit_proxy_tcpcheck(curpx);
|
2020-03-25 13:20:15 -04:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct cfg_kw_list cfg_kws = {ILH, {
|
|
|
|
|
{ CFG_LISTEN, "tcp-check", proxy_parse_tcpcheck },
|
|
|
|
|
{ 0, NULL, NULL },
|
|
|
|
|
}};
|
|
|
|
|
|
|
|
|
|
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
|
|
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
/*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* c-indent-level: 8
|
|
|
|
|
* c-basic-offset: 8
|
|
|
|
|
* End:
|
|
|
|
|
*/
|