2006-06-25 20:48:02 -04:00
|
|
|
/*
|
|
|
|
|
* Health-checks functions.
|
|
|
|
|
*
|
2008-01-13 12:40:14 -05:00
|
|
|
* Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
|
2008-02-17 19:26:35 -05:00
|
|
|
* Copyright 2007-2008 Krzysztof Piotr Oledzki <ole@ans.pl>
|
2006-06-25 20:48:02 -04:00
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2008-01-18 06:18:15 -05:00
|
|
|
#include <assert.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <errno.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <stdio.h>
|
2007-10-14 17:40:01 -04:00
|
|
|
#include <stdlib.h>
|
2006-06-29 11:53:05 -04:00
|
|
|
#include <string.h>
|
2007-10-14 17:40:01 -04:00
|
|
|
#include <time.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <sys/socket.h>
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
|
|
2006-06-29 11:53:05 -04:00
|
|
|
#include <common/compat.h>
|
|
|
|
|
#include <common/config.h>
|
|
|
|
|
#include <common/mini-clist.h>
|
2007-04-15 14:56:27 -04:00
|
|
|
#include <common/standard.h>
|
2006-06-29 11:53:05 -04:00
|
|
|
#include <common/time.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
#include <types/global.h>
|
|
|
|
|
|
|
|
|
|
#include <proto/backend.h>
|
2008-02-17 19:26:35 -05:00
|
|
|
#include <proto/buffers.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/fd.h>
|
|
|
|
|
#include <proto/log.h>
|
|
|
|
|
#include <proto/queue.h>
|
2007-03-18 13:34:41 -04:00
|
|
|
#include <proto/proto_http.h>
|
2008-01-13 12:40:14 -05:00
|
|
|
#include <proto/proto_tcp.h>
|
2006-12-31 11:46:05 -05:00
|
|
|
#include <proto/proxy.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
#include <proto/server.h>
|
|
|
|
|
#include <proto/task.h>
|
|
|
|
|
|
2007-11-30 04:41:39 -05:00
|
|
|
/* sends a log message when a backend goes down, and also sets last
|
|
|
|
|
* change date.
|
|
|
|
|
*/
|
|
|
|
|
static void set_backend_down(struct proxy *be)
|
|
|
|
|
{
|
|
|
|
|
be->last_change = now.tv_sec;
|
|
|
|
|
be->down_trans++;
|
|
|
|
|
|
|
|
|
|
Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
|
|
|
|
|
send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Redistribute pending connections when a server goes down. The number of
|
|
|
|
|
* connections redistributed is returned.
|
|
|
|
|
*/
|
|
|
|
|
static int redistribute_pending(struct server *s)
|
|
|
|
|
{
|
|
|
|
|
struct pendconn *pc, *pc_bck, *pc_end;
|
|
|
|
|
int xferred = 0;
|
|
|
|
|
|
|
|
|
|
FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
|
|
|
|
|
struct session *sess = pc->sess;
|
|
|
|
|
if (sess->be->options & PR_O_REDISP) {
|
|
|
|
|
/* The REDISP option was specified. We will ignore
|
|
|
|
|
* cookie and force to balance or use the dispatcher.
|
|
|
|
|
*/
|
2008-01-06 10:36:16 -05:00
|
|
|
|
[MEDIUM]: Prevent redispatcher from selecting the same server, version #3
When haproxy decides that session needs to be redispatched it chose a server,
but there is no guarantee for it to be a different one. So, it often
happens that selected server is exactly the same that it was previously, so
a client ends up with a 503 error anyway, especially when one sever has
much bigger weight than others.
Changes from the previous version:
- drop stupid and unnecessary SN_DIRECT changes
- assign_server(): use srvtoavoid to keep the old server and clear s->srv
so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again)
and get_server_rr_with_conns has chances to work (previously
we were passing a NULL here)
- srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns
incrementing as t->srv was guaranteed to be NULL
- add avoididx to get_server_rr_with_conns. I hope I correctly understand this code.
- fix http_flush_cookie_flags() and move it to assign_server_and_queue()
directly. The code here was supposed to set CK_DOWN and clear CK_VALID,
but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so:
if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
txn->flags ^= (TX_CK_VALID | TX_CK_DOWN);
was really a:
if ((txn->flags & TX_CK_MASK) == TX_CK_VALID)
txn->flags &= TX_CK_VALID
Now haproxy logs "--DI" after redispatching connection.
- defer srv->redispatches++ and s->be->redispatches++ so there
are called only if a conenction was redispatched, not only
supposed to.
- don't increment lbconn if redispatcher selected the same sarver
- don't count unsuccessfully redispatched connections as redispatched
connections
- don't count redispatched connections as errors, so:
- the number of connections effectively served by a server is:
srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches
and
SUM(servers->failed_conns) == be->failed_conns
- requires the "Don't increment server connections too much + fix retries" patch
- needs little more testing and probably some discussion so reverting to the RFC state
Tests #1:
retries 4
redispatch
i) 1 server(s): b (wght=1, down)
b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0
-> request failed
ii) server(s): b (wght=1, down), u (wght=1, down)
b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0
-> request FAILED
iii) 2 server(s): b (wght=1, down), u (wght=1, up)
b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0
-> request OK
iv) 2 server(s): b (wght=100, down), u (wght=1, up)
b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1
u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0
-> request OK
v) 1 server(s): b (down for first 4 SYNS)
b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0
-> request OK
Tests #2:
retries 4
i) 1 server(s): b (down)
b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0
-> request FAILED
2008-02-21 21:50:19 -05:00
|
|
|
/* it's left to the dispatcher to choose a server */
|
2007-11-30 04:41:39 -05:00
|
|
|
sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
|
2008-01-06 10:36:16 -05:00
|
|
|
|
2007-11-30 04:41:39 -05:00
|
|
|
pendconn_free(pc);
|
2008-08-29 12:19:04 -04:00
|
|
|
task_wakeup(sess->task, TASK_WOKEN_RES);
|
2007-11-30 04:41:39 -05:00
|
|
|
xferred++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return xferred;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check for pending connections at the backend, and assign some of them to
|
|
|
|
|
* the server coming up. The server's weight is checked before being assigned
|
|
|
|
|
* connections it may not be able to handle. The total number of transferred
|
|
|
|
|
* connections is returned.
|
|
|
|
|
*/
|
|
|
|
|
static int check_for_pending(struct server *s)
|
|
|
|
|
{
|
|
|
|
|
int xferred;
|
|
|
|
|
|
|
|
|
|
if (!s->eweight)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
|
|
|
|
|
struct session *sess;
|
|
|
|
|
struct pendconn *p;
|
|
|
|
|
|
|
|
|
|
p = pendconn_from_px(s->proxy);
|
|
|
|
|
if (!p)
|
|
|
|
|
break;
|
|
|
|
|
p->sess->srv = s;
|
|
|
|
|
sess = p->sess;
|
|
|
|
|
pendconn_free(p);
|
2008-08-29 12:19:04 -04:00
|
|
|
task_wakeup(sess->task, TASK_WOKEN_RES);
|
2007-11-30 04:41:39 -05:00
|
|
|
}
|
|
|
|
|
return xferred;
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
/* Sets server <s> down, notifies by all available means, recounts the
|
|
|
|
|
* remaining servers on the proxy and transfers queued sessions whenever
|
2007-07-24 17:32:33 -04:00
|
|
|
* possible to other servers. It automatically recomputes the number of
|
|
|
|
|
* servers, but not the map.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2007-04-15 14:56:27 -04:00
|
|
|
static void set_server_down(struct server *s)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2008-02-17 19:26:35 -05:00
|
|
|
struct server *srv;
|
|
|
|
|
struct chunk msg;
|
2006-06-25 20:48:02 -04:00
|
|
|
int xferred;
|
|
|
|
|
|
2008-02-17 19:26:35 -05:00
|
|
|
if (s->health == s->rise || s->tracked) {
|
2007-11-30 04:41:39 -05:00
|
|
|
int srv_was_paused = s->state & SRV_GOINGDOWN;
|
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
|
|
|
|
|
|
|
|
s->last_change = now.tv_sec;
|
2007-11-30 04:41:39 -05:00
|
|
|
s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
|
2007-11-25 19:15:43 -05:00
|
|
|
s->proxy->lbprm.set_server_status_down(s);
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
/* we might have sessions queued on this server and waiting for
|
|
|
|
|
* a connection. Those which are redispatchable will be queued
|
|
|
|
|
* to another server or to the proxy itself.
|
|
|
|
|
*/
|
2007-11-30 04:41:39 -05:00
|
|
|
xferred = redistribute_pending(s);
|
2008-02-17 19:26:35 -05:00
|
|
|
|
|
|
|
|
msg.len = 0;
|
|
|
|
|
msg.str = trash;
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash),
|
|
|
|
|
"%sServer %s/%s is DOWN", s->state & SRV_BACKUP ? "Backup " : "",
|
|
|
|
|
s->proxy->id, s->id);
|
|
|
|
|
|
|
|
|
|
if (s->tracked)
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), " via %s/%s",
|
|
|
|
|
s->tracked->proxy->id, s->tracked->id);
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers left.%s"
|
2006-06-25 20:48:02 -04:00
|
|
|
" %d sessions active, %d requeued, %d remaining in queue.\n",
|
2008-02-17 19:26:35 -05:00
|
|
|
s->proxy->srv_act, s->proxy->srv_bck,
|
2006-06-25 20:48:02 -04:00
|
|
|
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
|
|
|
|
|
s->cur_sess, xferred, s->nbpend);
|
|
|
|
|
|
|
|
|
|
Warning("%s", trash);
|
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
|
|
|
|
2007-11-30 04:41:39 -05:00
|
|
|
/* we don't send an alert if the server was previously paused */
|
|
|
|
|
if (srv_was_paused)
|
|
|
|
|
send_log(s->proxy, LOG_NOTICE, "%s", trash);
|
|
|
|
|
else
|
|
|
|
|
send_log(s->proxy, LOG_ALERT, "%s", trash);
|
|
|
|
|
|
|
|
|
|
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
|
|
|
|
|
set_backend_down(s->proxy);
|
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
s->down_trans++;
|
2008-02-17 19:26:35 -05:00
|
|
|
|
|
|
|
|
if (s->state && SRV_CHECKED)
|
|
|
|
|
for(srv = s->tracknext; srv; srv = srv->tracknext)
|
|
|
|
|
set_server_down(srv);
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
2008-02-17 19:26:35 -05:00
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
s->health = 0; /* failure */
|
|
|
|
|
}
|
|
|
|
|
|
2008-02-17 19:26:35 -05:00
|
|
|
static void set_server_up(struct server *s) {
|
|
|
|
|
|
|
|
|
|
struct server *srv;
|
|
|
|
|
struct chunk msg;
|
|
|
|
|
int xferred;
|
|
|
|
|
|
|
|
|
|
if (s->health == s->rise || s->tracked) {
|
|
|
|
|
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
|
|
|
|
|
if (s->proxy->last_change < now.tv_sec) // ignore negative times
|
|
|
|
|
s->proxy->down_time += now.tv_sec - s->proxy->last_change;
|
|
|
|
|
s->proxy->last_change = now.tv_sec;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (s->last_change < now.tv_sec) // ignore negative times
|
|
|
|
|
s->down_time += now.tv_sec - s->last_change;
|
|
|
|
|
|
|
|
|
|
s->last_change = now.tv_sec;
|
|
|
|
|
s->state |= SRV_RUNNING;
|
|
|
|
|
|
|
|
|
|
if (s->slowstart > 0) {
|
|
|
|
|
s->state |= SRV_WARMINGUP;
|
|
|
|
|
if (s->proxy->lbprm.algo & BE_LB_PROP_DYN) {
|
|
|
|
|
/* For dynamic algorithms, start at the first step of the weight,
|
|
|
|
|
* without multiplying by BE_WEIGHT_SCALE.
|
|
|
|
|
*/
|
|
|
|
|
s->eweight = s->uweight;
|
|
|
|
|
if (s->proxy->lbprm.update_server_eweight)
|
|
|
|
|
s->proxy->lbprm.update_server_eweight(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
s->proxy->lbprm.set_server_status_up(s);
|
|
|
|
|
|
|
|
|
|
/* check if we can handle some connections queued at the proxy. We
|
|
|
|
|
* will take as many as we can handle.
|
|
|
|
|
*/
|
|
|
|
|
xferred = check_for_pending(s);
|
|
|
|
|
|
|
|
|
|
msg.len = 0;
|
|
|
|
|
msg.str = trash;
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash),
|
|
|
|
|
"%sServer %s/%s is UP", s->state & SRV_BACKUP ? "Backup " : "",
|
|
|
|
|
s->proxy->id, s->id);
|
|
|
|
|
|
|
|
|
|
if (s->tracked)
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), " via %s/%s",
|
|
|
|
|
s->tracked->proxy->id, s->tracked->id);
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers online.%s"
|
|
|
|
|
" %d sessions requeued, %d total in queue.\n",
|
|
|
|
|
s->proxy->srv_act, s->proxy->srv_bck,
|
|
|
|
|
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
|
|
|
|
|
s->cur_sess, xferred, s->nbpend);
|
|
|
|
|
|
|
|
|
|
Warning("%s", trash);
|
|
|
|
|
send_log(s->proxy, LOG_NOTICE, "%s", trash);
|
|
|
|
|
|
|
|
|
|
if (s->state && SRV_CHECKED)
|
|
|
|
|
for(srv = s->tracknext; srv; srv = srv->tracknext)
|
|
|
|
|
set_server_up(srv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (s->health >= s->rise)
|
|
|
|
|
s->health = s->rise + s->fall - 1; /* OK now */
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void set_server_disabled(struct server *s) {
|
|
|
|
|
|
|
|
|
|
struct server *srv;
|
|
|
|
|
struct chunk msg;
|
|
|
|
|
int xferred;
|
|
|
|
|
|
|
|
|
|
s->state |= SRV_GOINGDOWN;
|
|
|
|
|
s->proxy->lbprm.set_server_status_down(s);
|
|
|
|
|
|
|
|
|
|
/* we might have sessions queued on this server and waiting for
|
|
|
|
|
* a connection. Those which are redispatchable will be queued
|
|
|
|
|
* to another server or to the proxy itself.
|
|
|
|
|
*/
|
|
|
|
|
xferred = redistribute_pending(s);
|
|
|
|
|
|
|
|
|
|
msg.len = 0;
|
|
|
|
|
msg.str = trash;
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash),
|
|
|
|
|
"Load-balancing on %sServer %s/%s is disabled",
|
|
|
|
|
s->state & SRV_BACKUP ? "Backup " : "",
|
|
|
|
|
s->proxy->id, s->id);
|
|
|
|
|
|
|
|
|
|
if (s->tracked)
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), " via %s/%s",
|
|
|
|
|
s->tracked->proxy->id, s->tracked->id);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash),". %d active and %d backup servers online.%s"
|
|
|
|
|
" %d sessions requeued, %d total in queue.\n",
|
|
|
|
|
s->proxy->srv_act, s->proxy->srv_bck,
|
|
|
|
|
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
|
|
|
|
|
xferred, s->nbpend);
|
|
|
|
|
|
|
|
|
|
Warning("%s", trash);
|
|
|
|
|
|
|
|
|
|
send_log(s->proxy, LOG_NOTICE, "%s", trash);
|
|
|
|
|
|
|
|
|
|
if (!s->proxy->srv_bck && !s->proxy->srv_act)
|
|
|
|
|
set_backend_down(s->proxy);
|
|
|
|
|
|
|
|
|
|
if (s->state && SRV_CHECKED)
|
|
|
|
|
for(srv = s->tracknext; srv; srv = srv->tracknext)
|
|
|
|
|
set_server_disabled(srv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void set_server_enabled(struct server *s) {
|
|
|
|
|
|
|
|
|
|
struct server *srv;
|
|
|
|
|
struct chunk msg;
|
|
|
|
|
int xferred;
|
|
|
|
|
|
|
|
|
|
s->state &= ~SRV_GOINGDOWN;
|
|
|
|
|
s->proxy->lbprm.set_server_status_up(s);
|
|
|
|
|
|
|
|
|
|
/* check if we can handle some connections queued at the proxy. We
|
|
|
|
|
* will take as many as we can handle.
|
|
|
|
|
*/
|
|
|
|
|
xferred = check_for_pending(s);
|
|
|
|
|
|
|
|
|
|
msg.len = 0;
|
|
|
|
|
msg.str = trash;
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash),
|
|
|
|
|
"Load-balancing on %sServer %s/%s is enabled again",
|
|
|
|
|
s->state & SRV_BACKUP ? "Backup " : "",
|
|
|
|
|
s->proxy->id, s->id);
|
|
|
|
|
|
|
|
|
|
if (s->tracked)
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), " via %s/%s",
|
|
|
|
|
s->tracked->proxy->id, s->tracked->id);
|
|
|
|
|
|
|
|
|
|
chunk_printf(&msg, sizeof(trash), ". %d active and %d backup servers online.%s"
|
|
|
|
|
" %d sessions requeued, %d total in queue.\n",
|
|
|
|
|
s->proxy->srv_act, s->proxy->srv_bck,
|
|
|
|
|
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
|
|
|
|
|
xferred, s->nbpend);
|
|
|
|
|
|
|
|
|
|
Warning("%s", trash);
|
|
|
|
|
send_log(s->proxy, LOG_NOTICE, "%s", trash);
|
|
|
|
|
|
|
|
|
|
if (s->state && SRV_CHECKED)
|
|
|
|
|
for(srv = s->tracknext; srv; srv = srv->tracknext)
|
|
|
|
|
set_server_enabled(srv);
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This function is used only for server health-checks. It handles
|
|
|
|
|
* the connection acknowledgement. If the proxy requires HTTP health-checks,
|
2007-11-30 02:33:21 -05:00
|
|
|
* it sends the request. In other cases, it fills s->result with SRV_CHK_*.
|
2007-04-15 14:56:27 -04:00
|
|
|
* The function itself returns 0 if it needs some polling before being called
|
|
|
|
|
* again, otherwise 1.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2007-04-15 14:56:27 -04:00
|
|
|
static int event_srv_chk_w(int fd)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2007-04-30 08:37:43 -04:00
|
|
|
__label__ out_wakeup, out_nowake, out_poll, out_error;
|
2006-06-25 20:48:02 -04:00
|
|
|
struct task *t = fdtab[fd].owner;
|
|
|
|
|
struct server *s = t->context;
|
|
|
|
|
|
2008-01-20 19:54:06 -05:00
|
|
|
//fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state));
|
2007-04-30 08:37:43 -04:00
|
|
|
if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR)))
|
|
|
|
|
goto out_error;
|
|
|
|
|
|
|
|
|
|
/* here, we know that the connection is established */
|
2007-04-15 14:56:27 -04:00
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
if (!(s->result & SRV_CHK_ERROR)) {
|
2006-06-25 20:48:02 -04:00
|
|
|
/* we don't want to mark 'UP' a server on which we detected an error earlier */
|
2006-07-09 10:42:34 -04:00
|
|
|
if ((s->proxy->options & PR_O_HTTP_CHK) ||
|
2007-05-08 17:50:35 -04:00
|
|
|
(s->proxy->options & PR_O_SSL3_CHK) ||
|
|
|
|
|
(s->proxy->options & PR_O_SMTP_CHK)) {
|
2006-06-25 20:48:02 -04:00
|
|
|
int ret;
|
2006-07-09 10:42:34 -04:00
|
|
|
/* we want to check if this host replies to HTTP or SSLv3 requests
|
2006-06-25 20:48:02 -04:00
|
|
|
* so we'll send the request, and won't wake the checker up now.
|
|
|
|
|
*/
|
2006-07-09 10:42:34 -04:00
|
|
|
|
|
|
|
|
if (s->proxy->options & PR_O_SSL3_CHK) {
|
|
|
|
|
/* SSL requires that we put Unix time in the request */
|
2008-06-22 11:18:02 -04:00
|
|
|
int gmt_time = htonl(date.tv_sec);
|
2006-07-09 10:42:34 -04:00
|
|
|
memcpy(s->proxy->check_req + 11, &gmt_time, 4);
|
|
|
|
|
}
|
|
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
#ifndef MSG_NOSIGNAL
|
|
|
|
|
ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT);
|
|
|
|
|
#else
|
|
|
|
|
ret = send(fd, s->proxy->check_req, s->proxy->check_len, MSG_DONTWAIT | MSG_NOSIGNAL);
|
|
|
|
|
#endif
|
|
|
|
|
if (ret == s->proxy->check_len) {
|
2008-01-20 19:54:06 -05:00
|
|
|
/* we allow up to <timeout.check> if nonzero for a responce */
|
2008-12-21 07:00:41 -05:00
|
|
|
if (s->proxy->timeout.check)
|
|
|
|
|
t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
|
2007-04-08 10:59:42 -04:00
|
|
|
EV_FD_SET(fd, DIR_RD); /* prepare for reading reply */
|
2007-04-15 14:56:27 -04:00
|
|
|
goto out_nowake;
|
|
|
|
|
}
|
2007-04-30 08:37:43 -04:00
|
|
|
else if (ret == 0 || errno == EAGAIN)
|
|
|
|
|
goto out_poll;
|
|
|
|
|
else
|
|
|
|
|
goto out_error;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
else {
|
2007-04-30 08:37:43 -04:00
|
|
|
/* We have no data to send to check the connection, and
|
|
|
|
|
* getsockopt() will not inform us whether the connection
|
|
|
|
|
* is still pending. So we'll reuse connect() to check the
|
|
|
|
|
* state of the socket. This has the advantage of givig us
|
|
|
|
|
* the following info :
|
|
|
|
|
* - error
|
|
|
|
|
* - connecting (EALREADY, EINPROGRESS)
|
|
|
|
|
* - connected (EISCONN, 0)
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
struct sockaddr_in sa;
|
|
|
|
|
|
|
|
|
|
sa = (s->check_addr.sin_addr.s_addr) ? s->check_addr : s->addr;
|
|
|
|
|
sa.sin_port = htons(s->check_port);
|
|
|
|
|
|
|
|
|
|
if (connect(fd, (struct sockaddr *)&sa, sizeof(sa)) == 0)
|
|
|
|
|
errno = 0;
|
|
|
|
|
|
|
|
|
|
if (errno == EALREADY || errno == EINPROGRESS)
|
|
|
|
|
goto out_poll;
|
|
|
|
|
|
|
|
|
|
if (errno && errno != EISCONN)
|
|
|
|
|
goto out_error;
|
|
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
/* good TCP connection is enough */
|
2007-11-30 02:33:21 -05:00
|
|
|
s->result |= SRV_CHK_RUNNING;
|
2007-04-30 08:37:43 -04:00
|
|
|
goto out_wakeup;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
}
|
2007-04-15 14:56:27 -04:00
|
|
|
out_wakeup:
|
2008-08-29 12:19:04 -04:00
|
|
|
task_wakeup(t, TASK_WOKEN_IO);
|
2007-04-15 14:56:27 -04:00
|
|
|
out_nowake:
|
|
|
|
|
EV_FD_CLR(fd, DIR_WR); /* nothing more to write */
|
2008-01-18 11:20:13 -05:00
|
|
|
fdtab[fd].ev &= ~FD_POLL_OUT;
|
2007-04-15 14:56:27 -04:00
|
|
|
return 1;
|
2007-04-30 08:37:43 -04:00
|
|
|
out_poll:
|
|
|
|
|
/* The connection is still pending. We'll have to poll it
|
|
|
|
|
* before attempting to go further. */
|
2008-01-18 11:20:13 -05:00
|
|
|
fdtab[fd].ev &= ~FD_POLL_OUT;
|
2007-04-30 08:37:43 -04:00
|
|
|
return 0;
|
|
|
|
|
out_error:
|
2007-11-30 02:33:21 -05:00
|
|
|
s->result |= SRV_CHK_ERROR;
|
2007-04-30 08:37:43 -04:00
|
|
|
fdtab[fd].state = FD_STERROR;
|
|
|
|
|
goto out_wakeup;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2006-07-09 10:42:34 -04:00
|
|
|
* This function is used only for server health-checks. It handles the server's
|
2007-11-30 02:33:21 -05:00
|
|
|
* reply to an HTTP request or SSL HELLO. It sets s->result to SRV_CHK_RUNNING
|
|
|
|
|
* if an HTTP server replies HTTP 2xx or 3xx (valid responses), if an SMTP
|
|
|
|
|
* server returns 2xx, or if an SSL server returns at least 5 bytes in response
|
|
|
|
|
* to an SSL HELLO (the principle is that this is enough to distinguish between
|
|
|
|
|
* an SSL server and a pure TCP relay). All other cases will set s->result to
|
|
|
|
|
* SRV_CHK_ERROR. The function returns 0 if it needs to be called again after
|
|
|
|
|
* some polling, otherwise non-zero..
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2007-04-15 14:56:27 -04:00
|
|
|
static int event_srv_chk_r(int fd)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2007-04-15 14:56:27 -04:00
|
|
|
__label__ out_wakeup;
|
2007-11-30 02:33:21 -05:00
|
|
|
int len;
|
2006-06-25 20:48:02 -04:00
|
|
|
struct task *t = fdtab[fd].owner;
|
|
|
|
|
struct server *s = t->context;
|
|
|
|
|
int skerr;
|
|
|
|
|
socklen_t lskerr = sizeof(skerr);
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
len = -1;
|
2007-04-15 14:56:27 -04:00
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
if (unlikely((s->result & SRV_CHK_ERROR) ||
|
|
|
|
|
(fdtab[fd].state == FD_STERROR) ||
|
2007-04-15 14:56:27 -04:00
|
|
|
(fdtab[fd].ev & FD_POLL_ERR) ||
|
|
|
|
|
(getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == -1) ||
|
|
|
|
|
(skerr != 0))) {
|
|
|
|
|
/* in case of TCP only, this tells us if the connection failed */
|
2007-11-30 02:33:21 -05:00
|
|
|
s->result |= SRV_CHK_ERROR;
|
2007-04-15 14:56:27 -04:00
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
|
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
#ifndef MSG_NOSIGNAL
|
[MINOR] prevent the system from sending an RST when closing health-checks
On Sat, 22 Sep 2007, Willy Tarreau wrote:
> On Sun, Sep 23, 2007 at 03:23:38AM +0200, Krzysztof Oledzki wrote:
> > I noticed that with httpchk, haproxy generates TCP RST at end of a check.
> > IMHO, it would be more polite to send FIN to a server, especially that
> > each TCP RST found by a tcpdump makes me concerned that something is
> > wrong, as it is hard to distinguish between a RST from a httpchk and from
> > a normal request, forwarded for a client.
>
> I have also noticed it very recently. In fact, it's never the
> application (here haproxy) which decides to send an RST, it's the
> system. It does so because the server returns data on a terminated
> socket. I guess it's because the health-check code does not read much
> of the response. In fact, we just need to read enough to process common
> responses. If people are dumb enough to check with something like "GET
> /image.iso", they should expect to get an RST after a few kbytes
> instead of reading the whole file!
Right, that was easy. Attached patch changed what you described. Now
haproxy finishes http checks with FIN.
2007-10-11 12:41:08 -04:00
|
|
|
len = recv(fd, trash, sizeof(trash), 0);
|
2006-06-25 20:48:02 -04:00
|
|
|
#else
|
2007-04-15 14:56:27 -04:00
|
|
|
/* Warning! Linux returns EAGAIN on SO_ERROR if data are still available
|
|
|
|
|
* but the connection was closed on the remote end. Fortunately, recv still
|
|
|
|
|
* works correctly and we don't need to do the getsockopt() on linux.
|
|
|
|
|
*/
|
[MINOR] prevent the system from sending an RST when closing health-checks
On Sat, 22 Sep 2007, Willy Tarreau wrote:
> On Sun, Sep 23, 2007 at 03:23:38AM +0200, Krzysztof Oledzki wrote:
> > I noticed that with httpchk, haproxy generates TCP RST at end of a check.
> > IMHO, it would be more polite to send FIN to a server, especially that
> > each TCP RST found by a tcpdump makes me concerned that something is
> > wrong, as it is hard to distinguish between a RST from a httpchk and from
> > a normal request, forwarded for a client.
>
> I have also noticed it very recently. In fact, it's never the
> application (here haproxy) which decides to send an RST, it's the
> system. It does so because the server returns data on a terminated
> socket. I guess it's because the health-check code does not read much
> of the response. In fact, we just need to read enough to process common
> responses. If people are dumb enough to check with something like "GET
> /image.iso", they should expect to get an RST after a few kbytes
> instead of reading the whole file!
Right, that was easy. Attached patch changed what you described. Now
haproxy finishes http checks with FIN.
2007-10-11 12:41:08 -04:00
|
|
|
len = recv(fd, trash, sizeof(trash), MSG_NOSIGNAL);
|
2006-06-25 20:48:02 -04:00
|
|
|
#endif
|
2007-04-15 14:56:27 -04:00
|
|
|
if (unlikely(len < 0 && errno == EAGAIN)) {
|
|
|
|
|
/* we want some polling to happen first */
|
2008-01-18 11:20:13 -05:00
|
|
|
fdtab[fd].ev &= ~FD_POLL_IN;
|
2007-04-15 14:56:27 -04:00
|
|
|
return 0;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
/* Note: the response will only be accepted if read at once */
|
|
|
|
|
if (s->proxy->options & PR_O_HTTP_CHK) {
|
|
|
|
|
/* Check if the server speaks HTTP 1.X */
|
|
|
|
|
if ((len < strlen("HTTP/1.0 000\r")) ||
|
|
|
|
|
(memcmp(trash, "HTTP/1.", 7) != 0)) {
|
|
|
|
|
s->result |= SRV_CHK_ERROR;
|
|
|
|
|
goto out_wakeup;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* check the reply : HTTP/1.X 2xx and 3xx are OK */
|
|
|
|
|
if (trash[9] == '2' || trash[9] == '3')
|
|
|
|
|
s->result |= SRV_CHK_RUNNING;
|
2007-11-30 04:41:39 -05:00
|
|
|
else if ((s->proxy->options & PR_O_DISABLE404) &&
|
|
|
|
|
(s->state & SRV_RUNNING) &&
|
|
|
|
|
(memcmp(&trash[9], "404", 3) == 0)) {
|
|
|
|
|
/* 404 may be accepted as "stopping" only if the server was up */
|
|
|
|
|
s->result |= SRV_CHK_RUNNING | SRV_CHK_DISABLE;
|
|
|
|
|
}
|
2007-11-30 02:33:21 -05:00
|
|
|
else
|
|
|
|
|
s->result |= SRV_CHK_ERROR;
|
|
|
|
|
}
|
|
|
|
|
else if (s->proxy->options & PR_O_SSL3_CHK) {
|
|
|
|
|
/* Check for SSLv3 alert or handshake */
|
|
|
|
|
if ((len >= 5) && (trash[0] == 0x15 || trash[0] == 0x16))
|
|
|
|
|
s->result |= SRV_CHK_RUNNING;
|
|
|
|
|
else
|
|
|
|
|
s->result |= SRV_CHK_ERROR;
|
2007-04-30 08:37:43 -04:00
|
|
|
}
|
2007-11-30 02:33:21 -05:00
|
|
|
else if (s->proxy->options & PR_O_SMTP_CHK) {
|
|
|
|
|
/* Check for SMTP code 2xx (should be 250) */
|
|
|
|
|
if ((len >= 3) && (trash[0] == '2'))
|
|
|
|
|
s->result |= SRV_CHK_RUNNING;
|
|
|
|
|
else
|
|
|
|
|
s->result |= SRV_CHK_ERROR;
|
2007-04-30 08:37:43 -04:00
|
|
|
}
|
2007-11-30 02:33:21 -05:00
|
|
|
else {
|
|
|
|
|
/* other checks are valid if the connection succeeded anyway */
|
|
|
|
|
s->result |= SRV_CHK_RUNNING;
|
2007-05-08 17:50:35 -04:00
|
|
|
}
|
2007-04-15 14:56:27 -04:00
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
out_wakeup:
|
|
|
|
|
if (s->result & SRV_CHK_ERROR)
|
2006-06-25 20:48:02 -04:00
|
|
|
fdtab[fd].state = FD_STERROR;
|
|
|
|
|
|
2007-04-08 10:59:42 -04:00
|
|
|
EV_FD_CLR(fd, DIR_RD);
|
2008-08-29 12:19:04 -04:00
|
|
|
task_wakeup(t, TASK_WOKEN_IO);
|
2008-01-18 11:20:13 -05:00
|
|
|
fdtab[fd].ev &= ~FD_POLL_IN;
|
2007-04-15 14:56:27 -04:00
|
|
|
return 1;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* manages a server health-check. Returns
|
|
|
|
|
* the time the task accepts to wait, or TIME_ETERNITY for infinity.
|
|
|
|
|
*/
|
2008-07-06 18:09:58 -04:00
|
|
|
void process_chk(struct task *t, int *next)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2007-05-08 18:54:10 -04:00
|
|
|
__label__ new_chk, out;
|
2006-06-25 20:48:02 -04:00
|
|
|
struct server *s = t->context;
|
|
|
|
|
struct sockaddr_in sa;
|
|
|
|
|
int fd;
|
2007-10-14 17:40:01 -04:00
|
|
|
int rv;
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
//fprintf(stderr, "process_chk: task=%p\n", t);
|
|
|
|
|
|
|
|
|
|
new_chk:
|
|
|
|
|
fd = s->curfd;
|
|
|
|
|
if (fd < 0) { /* no check currently running */
|
|
|
|
|
//fprintf(stderr, "process_chk: 2\n");
|
2008-07-06 18:09:58 -04:00
|
|
|
if (!tick_is_expired(t->expire, now_ms)) { /* not good time yet */
|
2006-06-25 20:48:02 -04:00
|
|
|
task_queue(t); /* restore t to its place in the task list */
|
2007-05-12 16:35:00 -04:00
|
|
|
*next = t->expire;
|
2007-05-08 18:54:10 -04:00
|
|
|
goto out;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* we don't send any health-checks when the proxy is stopped or when
|
|
|
|
|
* the server should not be checked.
|
|
|
|
|
*/
|
|
|
|
|
if (!(s->state & SRV_CHECKED) || s->proxy->state == PR_STSTOPPED) {
|
2008-07-06 18:09:58 -04:00
|
|
|
while (tick_is_expired(t->expire, now_ms))
|
|
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
|
2006-06-25 20:48:02 -04:00
|
|
|
task_queue(t); /* restore t to its place in the task list */
|
2007-05-12 16:35:00 -04:00
|
|
|
*next = t->expire;
|
2007-05-08 18:54:10 -04:00
|
|
|
goto out;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* we'll initiate a new check */
|
2007-11-30 02:33:21 -05:00
|
|
|
s->result = SRV_CHK_UNKNOWN; /* no result yet */
|
2006-06-25 20:48:02 -04:00
|
|
|
if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) {
|
|
|
|
|
if ((fd < global.maxsock) &&
|
|
|
|
|
(fcntl(fd, F_SETFL, O_NONBLOCK) != -1) &&
|
|
|
|
|
(setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) != -1)) {
|
|
|
|
|
//fprintf(stderr, "process_chk: 3\n");
|
|
|
|
|
|
2007-10-18 12:07:48 -04:00
|
|
|
if (s->proxy->options & PR_O_TCP_NOLING) {
|
|
|
|
|
/* We don't want to useless data */
|
|
|
|
|
setsockopt(fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
|
|
|
|
|
}
|
2007-03-25 10:45:16 -04:00
|
|
|
|
2007-03-25 14:46:19 -04:00
|
|
|
if (s->check_addr.sin_addr.s_addr)
|
|
|
|
|
/* we'll connect to the check addr specified on the server */
|
2007-03-25 10:45:16 -04:00
|
|
|
sa = s->check_addr;
|
|
|
|
|
else
|
2007-03-25 14:46:19 -04:00
|
|
|
/* we'll connect to the addr on the server */
|
2007-03-25 10:45:16 -04:00
|
|
|
sa = s->addr;
|
2007-03-25 14:46:19 -04:00
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
/* we'll connect to the check port on the server */
|
|
|
|
|
sa.sin_port = htons(s->check_port);
|
|
|
|
|
|
|
|
|
|
/* allow specific binding :
|
|
|
|
|
* - server-specific at first
|
|
|
|
|
* - proxy-specific next
|
|
|
|
|
*/
|
|
|
|
|
if (s->state & SRV_BIND_SRC) {
|
2008-01-13 12:40:14 -05:00
|
|
|
struct sockaddr_in *remote = NULL;
|
|
|
|
|
int ret, flags = 0;
|
|
|
|
|
|
2008-02-14 14:28:18 -05:00
|
|
|
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
|
2006-11-14 10:18:41 -05:00
|
|
|
if ((s->state & SRV_TPROXY_MASK) == SRV_TPROXY_ADDR) {
|
2008-01-13 12:40:14 -05:00
|
|
|
remote = (struct sockaddr_in *)&s->tproxy_addr;
|
|
|
|
|
flags = 3;
|
|
|
|
|
}
|
2008-02-14 14:28:18 -05:00
|
|
|
#endif
|
2008-01-13 12:40:14 -05:00
|
|
|
ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
|
|
|
|
|
if (ret) {
|
|
|
|
|
s->result |= SRV_CHK_ERROR;
|
|
|
|
|
switch (ret) {
|
|
|
|
|
case 1:
|
|
|
|
|
Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
|
|
|
|
|
s->proxy->id, s->id);
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
2006-11-14 10:18:41 -05:00
|
|
|
Alert("Cannot bind to tproxy source address before connect() for server %s/%s. Aborting.\n",
|
|
|
|
|
s->proxy->id, s->id);
|
2008-01-13 12:40:14 -05:00
|
|
|
break;
|
2006-11-14 10:18:41 -05:00
|
|
|
}
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
else if (s->proxy->options & PR_O_BIND_SRC) {
|
2008-01-13 12:40:14 -05:00
|
|
|
struct sockaddr_in *remote = NULL;
|
|
|
|
|
int ret, flags = 0;
|
|
|
|
|
|
2008-02-14 14:28:18 -05:00
|
|
|
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
|
2006-11-14 10:18:41 -05:00
|
|
|
if ((s->proxy->options & PR_O_TPXY_MASK) == PR_O_TPXY_ADDR) {
|
2008-01-13 12:40:14 -05:00
|
|
|
remote = (struct sockaddr_in *)&s->proxy->tproxy_addr;
|
|
|
|
|
flags = 3;
|
|
|
|
|
}
|
2008-02-14 14:28:18 -05:00
|
|
|
#endif
|
2008-01-13 12:40:14 -05:00
|
|
|
ret = tcpv4_bind_socket(fd, flags, &s->proxy->source_addr, remote);
|
|
|
|
|
if (ret) {
|
|
|
|
|
s->result |= SRV_CHK_ERROR;
|
|
|
|
|
switch (ret) {
|
|
|
|
|
case 1:
|
|
|
|
|
Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n",
|
|
|
|
|
proxy_type_str(s->proxy), s->proxy->id);
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
2006-12-31 11:46:05 -05:00
|
|
|
Alert("Cannot bind to tproxy source address before connect() for %s '%s'. Aborting.\n",
|
|
|
|
|
proxy_type_str(s->proxy), s->proxy->id);
|
2008-01-13 12:40:14 -05:00
|
|
|
break;
|
2006-11-14 10:18:41 -05:00
|
|
|
}
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
if (s->result == SRV_CHK_UNKNOWN) {
|
2006-06-25 20:48:02 -04:00
|
|
|
if ((connect(fd, (struct sockaddr *)&sa, sizeof(sa)) != -1) || (errno == EINPROGRESS)) {
|
|
|
|
|
/* OK, connection in progress or established */
|
|
|
|
|
|
|
|
|
|
//fprintf(stderr, "process_chk: 4\n");
|
|
|
|
|
|
|
|
|
|
s->curfd = fd; /* that's how we know a test is in progress ;-) */
|
2007-04-15 04:58:02 -04:00
|
|
|
fd_insert(fd);
|
2006-06-25 20:48:02 -04:00
|
|
|
fdtab[fd].owner = t;
|
2006-07-29 10:59:06 -04:00
|
|
|
fdtab[fd].cb[DIR_RD].f = &event_srv_chk_r;
|
|
|
|
|
fdtab[fd].cb[DIR_RD].b = NULL;
|
|
|
|
|
fdtab[fd].cb[DIR_WR].f = &event_srv_chk_w;
|
|
|
|
|
fdtab[fd].cb[DIR_WR].b = NULL;
|
2007-10-09 11:14:37 -04:00
|
|
|
fdtab[fd].peeraddr = (struct sockaddr *)&sa;
|
|
|
|
|
fdtab[fd].peerlen = sizeof(sa);
|
2006-06-25 20:48:02 -04:00
|
|
|
fdtab[fd].state = FD_STCONN; /* connection in progress */
|
2007-04-08 10:59:42 -04:00
|
|
|
EV_FD_SET(fd, DIR_WR); /* for connect status */
|
2006-06-25 20:48:02 -04:00
|
|
|
#ifdef DEBUG_FULL
|
2007-04-08 10:59:42 -04:00
|
|
|
assert (!EV_FD_ISSET(fd, DIR_RD));
|
2006-06-25 20:48:02 -04:00
|
|
|
#endif
|
2008-01-20 19:54:06 -05:00
|
|
|
//fprintf(stderr, "process_chk: 4+, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
|
|
|
|
|
/* we allow up to min(inter, timeout.connect) for a connection
|
|
|
|
|
* to establish but only when timeout.check is set
|
|
|
|
|
* as it may be to short for a full check otherwise
|
|
|
|
|
*/
|
2008-07-06 18:09:58 -04:00
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(s->inter));
|
2008-01-20 19:54:06 -05:00
|
|
|
|
2008-07-06 18:09:58 -04:00
|
|
|
if (s->proxy->timeout.check && s->proxy->timeout.connect) {
|
|
|
|
|
int t_con = tick_add(now_ms, s->proxy->timeout.connect);
|
|
|
|
|
t->expire = tick_first(t->expire, t_con);
|
2008-02-17 05:34:10 -05:00
|
|
|
}
|
2008-01-20 19:54:06 -05:00
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
task_queue(t); /* restore t to its place in the task list */
|
2007-05-12 16:35:00 -04:00
|
|
|
*next = t->expire;
|
2007-05-13 21:40:11 -04:00
|
|
|
return;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
else if (errno != EALREADY && errno != EISCONN && errno != EAGAIN) {
|
2007-11-30 02:33:21 -05:00
|
|
|
s->result |= SRV_CHK_ERROR; /* a real error */
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
close(fd); /* socket creation error */
|
|
|
|
|
}
|
|
|
|
|
|
2007-11-30 02:33:21 -05:00
|
|
|
if (s->result == SRV_CHK_UNKNOWN) { /* nothing done */
|
2006-06-25 20:48:02 -04:00
|
|
|
//fprintf(stderr, "process_chk: 6\n");
|
2008-07-06 18:09:58 -04:00
|
|
|
while (tick_is_expired(t->expire, now_ms))
|
|
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
|
2006-06-25 20:48:02 -04:00
|
|
|
goto new_chk; /* may be we should initialize a new check */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* here, we have seen a failure */
|
|
|
|
|
if (s->health > s->rise) {
|
|
|
|
|
s->health--; /* still good */
|
|
|
|
|
s->failed_checks++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
set_server_down(s);
|
|
|
|
|
|
2008-01-20 19:54:06 -05:00
|
|
|
//fprintf(stderr, "process_chk: 7, %lu\n", __tv_to_ms(&s->proxy->timeout.connect));
|
|
|
|
|
/* we allow up to min(inter, timeout.connect) for a connection
|
|
|
|
|
* to establish but only when timeout.check is set
|
|
|
|
|
* as it may be to short for a full check otherwise
|
|
|
|
|
*/
|
2008-07-06 18:09:58 -04:00
|
|
|
while (tick_is_expired(t->expire, now_ms)) {
|
|
|
|
|
int t_con;
|
2008-01-20 19:54:06 -05:00
|
|
|
|
2008-07-06 18:09:58 -04:00
|
|
|
t_con = tick_add(t->expire, s->proxy->timeout.connect);
|
|
|
|
|
t->expire = tick_add(t->expire, MS_TO_TICKS(s->inter));
|
2008-01-20 19:54:06 -05:00
|
|
|
|
2008-07-06 18:09:58 -04:00
|
|
|
if (s->proxy->timeout.check)
|
|
|
|
|
t->expire = tick_first(t->expire, t_con);
|
2008-01-20 19:54:06 -05:00
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
goto new_chk;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
//fprintf(stderr, "process_chk: 8\n");
|
|
|
|
|
/* there was a test running */
|
2007-11-30 02:33:21 -05:00
|
|
|
if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
|
2006-06-25 20:48:02 -04:00
|
|
|
//fprintf(stderr, "process_chk: 9\n");
|
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
|
|
|
|
2007-11-30 11:42:05 -05:00
|
|
|
if (s->state & SRV_WARMINGUP) {
|
|
|
|
|
if (now.tv_sec < s->last_change || now.tv_sec >= s->last_change + s->slowstart) {
|
|
|
|
|
s->state &= ~SRV_WARMINGUP;
|
|
|
|
|
if (s->proxy->lbprm.algo & BE_LB_PROP_DYN)
|
|
|
|
|
s->eweight = s->uweight * BE_WEIGHT_SCALE;
|
|
|
|
|
if (s->proxy->lbprm.update_server_eweight)
|
|
|
|
|
s->proxy->lbprm.update_server_eweight(s);
|
|
|
|
|
}
|
|
|
|
|
else if (s->proxy->lbprm.algo & BE_LB_PROP_DYN) {
|
|
|
|
|
/* for dynamic algorithms, let's update the weight */
|
2007-12-02 20:04:00 -05:00
|
|
|
s->eweight = (BE_WEIGHT_SCALE * (now.tv_sec - s->last_change) +
|
|
|
|
|
s->slowstart - 1) / s->slowstart;
|
2007-11-30 11:42:05 -05:00
|
|
|
s->eweight *= s->uweight;
|
|
|
|
|
if (s->proxy->lbprm.update_server_eweight)
|
|
|
|
|
s->proxy->lbprm.update_server_eweight(s);
|
|
|
|
|
}
|
|
|
|
|
/* probably that we can refill this server with a bit more connections */
|
|
|
|
|
check_for_pending(s);
|
|
|
|
|
}
|
|
|
|
|
|
2007-11-30 04:41:39 -05:00
|
|
|
/* we may have to add/remove this server from the LB group */
|
|
|
|
|
if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
|
|
|
|
|
if ((s->state & SRV_GOINGDOWN) &&
|
2008-02-17 19:26:35 -05:00
|
|
|
((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING))
|
|
|
|
|
set_server_enabled(s);
|
2007-11-30 04:41:39 -05:00
|
|
|
else if (!(s->state & SRV_GOINGDOWN) &&
|
|
|
|
|
((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) ==
|
2008-02-17 19:26:35 -05:00
|
|
|
(SRV_CHK_RUNNING | SRV_CHK_DISABLE)))
|
|
|
|
|
set_server_disabled(s);
|
2007-11-30 04:41:39 -05:00
|
|
|
}
|
|
|
|
|
|
[MEDIUM] stats: report server and backend cumulated downtime
Hello,
This patch implements new statistics for SLA calculation by adding new
field 'Dwntime' with total down time since restart (both HTTP/CSV) and
extending status field (HTTP) or inserting a new one (CSV) with time
showing how long each server/backend is in a current state. Additionaly,
down transations are also calculated and displayed for backends, so it is
possible to know how many times selected backend was down, generating "No
server is available to handle this request." error.
New information are presentetd in two different ways:
- for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or
"59m 59s"
- for CSV: seconds
I believe that seconds resolution is enough.
As there are more columns in the status page I decided to shrink some
names to make more space:
- Weight -> Wght
- Check -> Chk
- Down -> Dwn
Making described changes I also made some improvements and fixed some
small bugs:
- don't increment s->health above 's->rise + s->fall - 1'. Previously it
was incremented an then (re)set to 's->rise + s->fall - 1'.
- do not set server down if it is down already
- do not set server up if it is up already
- fix colspan in multiple places (mostly introduced by my previous patch)
- add missing "status" header to CSV
- fix order of retries/redispatches in server (CSV)
- s/Tthen/Then/
- s/server/backend/ in DATA_ST_PX_BE (dumpstats.c)
Changes from previous version:
- deal with negative time intervales
- don't relay on s->state (SRV_RUNNING)
- little reworked human_time + compacted format (no spaces). If needed it
can be used in the future for other purposes by optionally making "cnt"
as an argument
- leave set_server_down mostly unchanged
- only little reworked "process_chk: 9"
- additional fields in CSV are appended to the rigth
- fix "SEC" macro
- named arguments (human_time, be_downtime, srv_downtime)
Hope it is OK. If there are only cosmetic changes needed please fill free
to correct it, however if there are some bigger changes required I would
like to discuss it first or at last to know what exactly was changed
especially since I already put this patch into my production server. :)
Thank you,
Best regards,
Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
|
|
|
if (s->health < s->rise + s->fall - 1) {
|
|
|
|
|
s->health++; /* was bad, stays for a while */
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2008-02-17 19:26:35 -05:00
|
|
|
set_server_up(s);
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
s->curfd = -1; /* no check running anymore */
|
|
|
|
|
fd_delete(fd);
|
2007-10-14 17:47:04 -04:00
|
|
|
|
|
|
|
|
rv = 0;
|
|
|
|
|
if (global.spread_checks > 0) {
|
2008-01-20 19:54:06 -05:00
|
|
|
rv = srv_getinter(s) * global.spread_checks / 100;
|
2007-10-14 17:47:04 -04:00
|
|
|
rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
|
2008-01-20 19:54:06 -05:00
|
|
|
//fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv);
|
2007-10-14 17:47:04 -04:00
|
|
|
}
|
2008-07-06 18:09:58 -04:00
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
|
2006-06-25 20:48:02 -04:00
|
|
|
goto new_chk;
|
|
|
|
|
}
|
2008-07-06 18:09:58 -04:00
|
|
|
else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) {
|
2006-06-25 20:48:02 -04:00
|
|
|
//fprintf(stderr, "process_chk: 10\n");
|
|
|
|
|
/* failure or timeout detected */
|
|
|
|
|
if (s->health > s->rise) {
|
|
|
|
|
s->health--; /* still good */
|
|
|
|
|
s->failed_checks++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
set_server_down(s);
|
|
|
|
|
s->curfd = -1;
|
|
|
|
|
fd_delete(fd);
|
2007-10-14 17:40:01 -04:00
|
|
|
|
|
|
|
|
rv = 0;
|
|
|
|
|
if (global.spread_checks > 0) {
|
2008-01-20 19:54:06 -05:00
|
|
|
rv = srv_getinter(s) * global.spread_checks / 100;
|
2007-10-14 17:40:01 -04:00
|
|
|
rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0)));
|
2008-01-20 19:54:06 -05:00
|
|
|
//fprintf(stderr, "process_chk(%p): (%d+/-%d%%) random=%d\n", s, srv_getinter(s), global.spread_checks, rv);
|
2007-10-14 17:40:01 -04:00
|
|
|
}
|
2008-07-06 18:09:58 -04:00
|
|
|
t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(s) + rv));
|
2006-06-25 20:48:02 -04:00
|
|
|
goto new_chk;
|
|
|
|
|
}
|
2007-11-30 02:33:21 -05:00
|
|
|
/* if result is unknown and there's no timeout, we have to wait again */
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
//fprintf(stderr, "process_chk: 11\n");
|
2007-11-30 02:33:21 -05:00
|
|
|
s->result = SRV_CHK_UNKNOWN;
|
2006-06-25 20:48:02 -04:00
|
|
|
task_queue(t); /* restore t to its place in the task list */
|
2007-05-12 16:35:00 -04:00
|
|
|
*next = t->expire;
|
2007-05-08 18:54:10 -04:00
|
|
|
out:
|
2007-05-12 16:35:00 -04:00
|
|
|
return;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2007-10-14 17:40:01 -04:00
|
|
|
/*
|
|
|
|
|
* Start health-check.
|
|
|
|
|
* Returns 0 if OK, -1 if error, and prints the error in this case.
|
|
|
|
|
*/
|
|
|
|
|
int start_checks() {
|
|
|
|
|
|
|
|
|
|
struct proxy *px;
|
|
|
|
|
struct server *s;
|
|
|
|
|
struct task *t;
|
|
|
|
|
int nbchk=0, mininter=0, srvpos=0;
|
|
|
|
|
|
2007-10-14 17:05:39 -04:00
|
|
|
/* 1- count the checkers to run simultaneously.
|
|
|
|
|
* We also determine the minimum interval among all of those which
|
|
|
|
|
* have an interval larger than SRV_CHK_INTER_THRES. This interval
|
|
|
|
|
* will be used to spread their start-up date. Those which have
|
|
|
|
|
* a shorter interval will start independantly and will not dictate
|
|
|
|
|
* too short an interval for all others.
|
|
|
|
|
*/
|
2007-10-14 17:40:01 -04:00
|
|
|
for (px = proxy; px; px = px->next) {
|
|
|
|
|
for (s = px->srv; s; s = s->next) {
|
|
|
|
|
if (!(s->state & SRV_CHECKED))
|
|
|
|
|
continue;
|
|
|
|
|
|
2008-01-20 19:54:06 -05:00
|
|
|
if ((srv_getinter(s) >= SRV_CHK_INTER_THRES) &&
|
|
|
|
|
(!mininter || mininter > srv_getinter(s)))
|
|
|
|
|
mininter = srv_getinter(s);
|
2007-10-14 17:40:01 -04:00
|
|
|
|
|
|
|
|
nbchk++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!nbchk)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
srand((unsigned)time(NULL));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* 2- start them as far as possible from each others. For this, we will
|
|
|
|
|
* start them after their interval set to the min interval divided by
|
|
|
|
|
* the number of servers, weighted by the server's position in the list.
|
|
|
|
|
*/
|
|
|
|
|
for (px = proxy; px; px = px->next) {
|
|
|
|
|
for (s = px->srv; s; s = s->next) {
|
|
|
|
|
if (!(s->state & SRV_CHECKED))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if ((t = pool_alloc2(pool2_task)) == NULL) {
|
|
|
|
|
Alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
[MEDIUM] Fix memory freeing at exit
New functions implemented:
- deinit_pollers: called at the end of deinit())
- prune_acl: called via list_for_each_entry_safe
Add missing pool_destroy2 calls:
- p->hdr_idx_pool
- pool2_tree64
Implement all task stopping:
- health-check: needs new "struct task" in the struct server
- queue processing: queue_mgt
- appsess_refresh: appsession_refresh
before (idle system):
==6079== LEAK SUMMARY:
==6079== definitely lost: 1,112 bytes in 75 blocks.
==6079== indirectly lost: 53,356 bytes in 2,090 blocks.
==6079== possibly lost: 52 bytes in 1 blocks.
==6079== still reachable: 150,996 bytes in 504 blocks.
==6079== suppressed: 0 bytes in 0 blocks.
after (idle system):
==6945== LEAK SUMMARY:
==6945== definitely lost: 7,644 bytes in 137 blocks.
==6945== indirectly lost: 9,913 bytes in 587 blocks.
==6945== possibly lost: 0 bytes in 0 blocks.
==6945== still reachable: 0 bytes in 0 blocks.
==6945== suppressed: 0 bytes in 0 blocks.
before (running system for ~2m):
==9343== LEAK SUMMARY:
==9343== definitely lost: 1,112 bytes in 75 blocks.
==9343== indirectly lost: 54,199 bytes in 2,122 blocks.
==9343== possibly lost: 52 bytes in 1 blocks.
==9343== still reachable: 151,128 bytes in 509 blocks.
==9343== suppressed: 0 bytes in 0 blocks.
after (running system for ~2m):
==11616== LEAK SUMMARY:
==11616== definitely lost: 7,644 bytes in 137 blocks.
==11616== indirectly lost: 9,981 bytes in 591 blocks.
==11616== possibly lost: 0 bytes in 0 blocks.
==11616== still reachable: 4 bytes in 1 blocks.
==11616== suppressed: 0 bytes in 0 blocks.
Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
|
|
|
s->check = t;
|
|
|
|
|
|
2008-06-24 02:17:16 -04:00
|
|
|
task_init(t);
|
2007-10-14 17:40:01 -04:00
|
|
|
t->process = process_chk;
|
|
|
|
|
t->context = s;
|
|
|
|
|
|
|
|
|
|
/* check this every ms */
|
2008-07-06 18:09:58 -04:00
|
|
|
t->expire = tick_add(now_ms,
|
|
|
|
|
MS_TO_TICKS(((mininter && mininter >= srv_getinter(s)) ?
|
|
|
|
|
mininter : srv_getinter(s)) * srvpos / nbchk));
|
2007-10-14 17:40:01 -04:00
|
|
|
task_queue(t);
|
|
|
|
|
|
|
|
|
|
srvpos++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* c-indent-level: 8
|
|
|
|
|
* c-basic-offset: 8
|
|
|
|
|
* End:
|
|
|
|
|
*/
|