2010-01-04 09:23:48 -05:00
/*
* Stick tables management functions .
*
* Copyright 2009 - 2010 EXCELIANCE , Emeric Brun < ebrun @ exceliance . fr >
2010-06-06 07:34:54 -04:00
* Copyright ( C ) 2010 Willy Tarreau < w @ 1 wt . eu >
2010-01-04 09:23:48 -05:00
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
*/
# include <string.h>
2016-11-22 12:00:53 -05:00
# include <errno.h>
2010-01-04 09:23:48 -05:00
2020-06-09 03:07:15 -04:00
# include <import/ebmbtree.h>
# include <import/ebsttree.h>
# include <import/ebistree.h>
2020-05-27 06:58:42 -04:00
# include <haproxy/api.h>
2022-05-03 05:35:07 -04:00
# include <haproxy/applet.h>
2020-06-09 03:07:15 -04:00
# include <haproxy/arg.h>
2020-06-04 18:00:29 -04:00
# include <haproxy/cfgparse.h>
2020-06-04 14:19:54 -04:00
# include <haproxy/cli.h>
2020-11-20 03:28:26 -05:00
# include <haproxy/dict.h>
2020-06-05 11:27:29 -04:00
# include <haproxy/errors.h>
2020-06-04 11:05:57 -04:00
# include <haproxy/global.h>
2020-06-04 05:40:28 -04:00
# include <haproxy/http_rules.h>
2020-05-27 12:01:47 -04:00
# include <haproxy/list.h>
2020-06-04 16:01:04 -04:00
# include <haproxy/log.h>
2020-06-02 10:48:09 -04:00
# include <haproxy/net_helper.h>
2020-06-04 12:38:21 -04:00
# include <haproxy/peers.h>
2020-06-09 03:07:15 -04:00
# include <haproxy/pool.h>
# include <haproxy/proto_tcp.h>
2020-06-04 16:29:18 -04:00
# include <haproxy/proxy.h>
2020-06-09 03:07:15 -04:00
# include <haproxy/sample.h>
2022-05-27 03:25:10 -04:00
# include <haproxy/sc_strm.h>
2020-06-04 13:58:55 -04:00
# include <haproxy/stats-t.h>
2022-05-27 03:47:12 -04:00
# include <haproxy/stconn.h>
2020-06-09 03:07:15 -04:00
# include <haproxy/stick_table.h>
2020-06-04 17:46:14 -04:00
# include <haproxy/stream.h>
2020-06-04 11:25:40 -04:00
# include <haproxy/task.h>
2020-06-04 11:42:48 -04:00
# include <haproxy/tcp_rules.h>
2021-10-06 10:18:40 -04:00
# include <haproxy/ticks.h>
2020-06-09 03:07:15 -04:00
# include <haproxy/tools.h>
2022-11-24 01:35:17 -05:00
# include <haproxy/xxhash.h>
2010-01-04 09:23:48 -05:00
2024-01-31 04:33:55 -05:00
# if defined(USE_PROMEX)
# include <promex/promex.h>
# endif
/* stick table base fields */
enum sticktable_field {
STICKTABLE_SIZE = 0 ,
STICKTABLE_USED ,
/* must always be the last one */
STICKTABLE_TOTAL_FIELDS
} ;
2010-01-04 09:23:48 -05:00
2012-04-27 15:37:17 -04:00
/* structure used to return a table key built from a sample */
2017-06-13 13:37:32 -04:00
static THREAD_LOCAL struct stktable_key static_table_key ;
2020-08-28 05:31:31 -04:00
static int ( * smp_fetch_src ) ( const struct arg * , struct sample * , const char * , void * ) ;
2023-01-06 10:09:58 -05:00
struct pool_head * pool_head_stk_ctr __read_mostly = NULL ;
2019-03-14 02:07:41 -04:00
struct stktable * stktables_list ;
struct eb_root stktable_by_name = EB_ROOT ;
2018-11-14 11:54:36 -05:00
# define round_ptr_size(i) (((i) + (sizeof(void *) - 1)) &~ (sizeof(void *) - 1))
2019-03-14 02:07:41 -04:00
/* This function inserts stktable <t> into the tree of known stick-table.
* The stick - table ID is used as the storing key so it must already have
* been initialized .
*/
void stktable_store_name ( struct stktable * t )
{
t - > name . key = t - > id ;
ebis_insert ( & stktable_by_name , & t - > name ) ;
}
struct stktable * stktable_find_by_name ( const char * name )
{
struct ebpt_node * node ;
struct stktable * t ;
node = ebis_lookup ( & stktable_by_name , name ) ;
if ( node ) {
t = container_of ( node , struct stktable , name ) ;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero
According to coding-style.txt it is recommended to use:
`strcmp(a, b) == 0` instead of `!strcmp(a, b)`
So let's do this.
The change was performed by running the following (very long) coccinelle patch
on src/:
@@
statement S;
expression E;
expression F;
@@
if (
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) != 0
)
(
S
|
{ ... }
)
@@
statement S;
expression E;
expression F;
@@
if (
- !
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) == 0
)
(
S
|
{ ... }
)
@@
expression E;
expression F;
expression G;
@@
(
G &&
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) != 0
)
@@
expression E;
expression F;
expression G;
@@
(
G ||
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) != 0
)
@@
expression E;
expression F;
expression G;
@@
(
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) != 0
&& G
)
@@
expression E;
expression F;
expression G;
@@
(
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) != 0
|| G
)
@@
expression E;
expression F;
expression G;
@@
(
G &&
- !
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) == 0
)
@@
expression E;
expression F;
expression G;
@@
(
G ||
- !
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) == 0
)
@@
expression E;
expression F;
expression G;
@@
(
- !
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) == 0
&& G
)
@@
expression E;
expression F;
expression G;
@@
(
- !
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) == 0
|| G
)
@@
expression E;
expression F;
expression G;
@@
(
- !
(
dns_hostname_cmp
|
eb_memcmp
|
memcmp
|
strcasecmp
|
strcmp
|
strncasecmp
|
strncmp
)
- (E, F)
+ (E, F) == 0
)
2021-01-02 16:31:53 -05:00
if ( strcmp ( t - > id , name ) = = 0 )
2019-03-14 02:07:41 -04:00
return t ;
}
return NULL ;
}
2010-01-04 09:23:48 -05:00
/*
2010-06-06 05:56:36 -04:00
* Free an allocated sticky session < ts > , and decrease sticky sessions counter
2022-10-11 10:19:35 -04:00
* in table < t > . It ' s safe to call it under or out of a lock .
2010-01-04 09:23:48 -05:00
*/
2017-06-13 13:37:32 -04:00
void __stksess_free ( struct stktable * t , struct stksess * ts )
2010-01-04 09:23:48 -05:00
{
2022-10-11 10:19:35 -04:00
HA_ATOMIC_DEC ( & t - > current ) ;
2018-11-14 11:54:36 -05:00
pool_free ( t - > pool , ( void * ) ts - round_ptr_size ( t - > data_size ) ) ;
2010-01-04 09:23:48 -05:00
}
2017-06-13 13:37:32 -04:00
/*
* Free an allocated sticky session < ts > , and decrease sticky sessions counter
* in table < t > .
* This function locks the table
*/
void stksess_free ( struct stktable * t , struct stksess * ts )
{
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
uint shard ;
size_t len ;
2020-11-20 03:28:26 -05:00
void * data ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
2020-11-20 03:28:26 -05:00
data = stktable_data_ptr ( t , ts , STKTABLE_DT_SERVER_KEY ) ;
if ( data ) {
2021-06-30 11:18:28 -04:00
dict_entry_unref ( & server_key_dict , stktable_data_cast ( data , std_t_dict ) ) ;
stktable_data_cast ( data , std_t_dict ) = NULL ;
2020-11-20 03:28:26 -05:00
}
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( t - > type = = SMP_T_STR )
len = strlen ( ( const char * ) ts - > key . key ) ;
else
len = t - > key_size ;
shard = stktable_calc_shard_num ( t , ts - > key . key , len ) ;
2024-04-24 02:19:20 -04:00
/* make the compiler happy when shard is not used without threads */
ALREADY_CHECKED ( shard ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2017-06-13 13:37:32 -04:00
__stksess_free ( t , ts ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2017-06-13 13:37:32 -04:00
}
2010-08-03 14:34:06 -04:00
/*
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
* Kill an stksess ( only if its ref_cnt is zero ) . This must be called under the
* write lock . Returns zero if could not deleted , non - zero otherwise .
2010-08-03 14:34:06 -04:00
*/
2017-06-13 13:37:32 -04:00
int __stksess_kill ( struct stktable * t , struct stksess * ts )
2010-08-03 14:34:06 -04:00
{
2024-05-23 05:14:41 -04:00
int updt_locked = 0 ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
if ( HA_ATOMIC_LOAD ( & ts - > ref_cnt ) )
2017-06-13 13:37:32 -04:00
return 0 ;
2010-08-03 14:34:06 -04:00
2023-05-27 13:55:15 -04:00
if ( ts - > upd . node . leaf_p ) {
2024-05-23 05:14:41 -04:00
updt_locked = 1 ;
2023-05-27 13:55:15 -04:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
2024-05-23 05:14:41 -04:00
if ( HA_ATOMIC_LOAD ( & ts - > ref_cnt ) )
goto out_unlock ;
2023-05-27 13:55:15 -04:00
}
2024-05-23 05:14:41 -04:00
eb32_delete ( & ts - > exp ) ;
eb32_delete ( & ts - > upd ) ;
2010-08-03 14:34:06 -04:00
ebmb_delete ( & ts - > key ) ;
2017-06-13 13:37:32 -04:00
__stksess_free ( t , ts ) ;
2024-05-23 05:14:41 -04:00
out_unlock :
if ( updt_locked )
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
2017-06-13 13:37:32 -04:00
return 1 ;
}
/*
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
* Decrease the refcount if decrefcnt is not 0 , and try to kill the stksess .
* Returns non - zero if deleted , zero otherwise .
2017-06-13 13:37:32 -04:00
* This function locks the table
*/
int stksess_kill ( struct stktable * t , struct stksess * ts , int decrefcnt )
{
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
uint shard ;
size_t len ;
2017-06-13 13:37:32 -04:00
int ret ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
if ( decrefcnt & & HA_ATOMIC_SUB_FETCH ( & ts - > ref_cnt , 1 ) ! = 0 )
return 0 ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( t - > type = = SMP_T_STR )
len = strlen ( ( const char * ) ts - > key . key ) ;
else
len = t - > key_size ;
shard = stktable_calc_shard_num ( t , ts - > key . key , len ) ;
2024-04-24 02:19:20 -04:00
/* make the compiler happy when shard is not used without threads */
ALREADY_CHECKED ( shard ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2017-06-13 13:37:32 -04:00
ret = __stksess_kill ( t , ts ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2017-06-13 13:37:32 -04:00
return ret ;
2010-08-03 14:34:06 -04:00
}
2010-01-04 09:23:48 -05:00
/*
2010-06-06 05:56:36 -04:00
* Initialize or update the key in the sticky session < ts > present in table < t >
* from the value present in < key > .
2010-01-04 09:23:48 -05:00
*/
2010-06-06 06:11:37 -04:00
void stksess_setkey ( struct stktable * t , struct stksess * ts , struct stktable_key * key )
2010-01-04 09:23:48 -05:00
{
2015-07-24 02:46:42 -04:00
if ( t - > type ! = SMP_T_STR )
2010-06-06 06:57:10 -04:00
memcpy ( ts - > key . key , key - > key , t - > key_size ) ;
2010-01-04 09:23:48 -05:00
else {
2010-06-06 06:57:10 -04:00
memcpy ( ts - > key . key , key - > key , MIN ( t - > key_size - 1 , key - > key_len ) ) ;
ts - > key . key [ MIN ( t - > key_size - 1 , key - > key_len ) ] = 0 ;
2010-01-04 09:23:48 -05:00
}
}
2022-11-29 11:36:44 -05:00
/* return a shard number for key <key> of len <len> present in table <t>. This
* takes into account the presence or absence of a peers section with shards
* and the number of shards , the table ' s hash_seed , and of course the key . The
* caller must pass a valid < key > and < len > . The shard number to be used by the
* entry is returned ( from 1 to nb_shards , otherwise 0 for none ) .
2022-10-17 08:58:19 -04:00
*/
2022-11-29 11:36:44 -05:00
int stktable_get_key_shard ( struct stktable * t , const void * key , size_t len )
2022-10-17 08:58:19 -04:00
{
2022-11-29 11:36:44 -05:00
/* no peers section or no shards in the peers section */
if ( ! t - > peers . p | | ! t - > peers . p - > nb_shards )
return 0 ;
2022-10-17 08:58:19 -04:00
2022-11-29 11:36:44 -05:00
return XXH64 ( key , len , t - > hash_seed ) % t - > peers . p - > nb_shards + 1 ;
2022-10-17 08:58:19 -04:00
}
/*
* Set the shard for < key > key of < ts > sticky session attached to < t > stick table .
2022-11-29 11:36:44 -05:00
* Use zero for stick - table without peers synchronisation .
2022-10-17 08:58:19 -04:00
*/
static void stksess_setkey_shard ( struct stktable * t , struct stksess * ts ,
struct stktable_key * key )
{
2022-11-29 11:36:44 -05:00
size_t keylen ;
2022-10-17 08:58:19 -04:00
2022-11-29 11:36:44 -05:00
if ( t - > type = = SMP_T_STR )
keylen = key - > key_len ;
2022-10-17 08:58:19 -04:00
else
2022-11-29 11:36:44 -05:00
keylen = t - > key_size ;
ts - > shard = stktable_get_key_shard ( t , key - > key , keylen ) ;
2022-10-17 08:58:19 -04:00
}
2010-01-04 09:23:48 -05:00
/*
2010-06-06 06:11:37 -04:00
* Init sticky session < ts > of table < t > . The data parts are cleared and < ts >
* is returned .
2010-01-04 09:23:48 -05:00
*/
2017-06-13 13:37:32 -04:00
static struct stksess * __stksess_init ( struct stktable * t , struct stksess * ts )
2010-01-04 09:23:48 -05:00
{
2010-06-06 06:11:37 -04:00
memset ( ( void * ) ts - t - > data_size , 0 , t - > data_size ) ;
2010-06-14 08:53:07 -04:00
ts - > ref_cnt = 0 ;
2022-11-29 10:08:35 -05:00
ts - > shard = 0 ;
2024-04-02 12:49:53 -04:00
ts - > seen = 0 ;
2010-06-06 06:57:10 -04:00
ts - > key . node . leaf_p = NULL ;
ts - > exp . node . leaf_p = NULL ;
2010-09-23 12:16:52 -04:00
ts - > upd . node . leaf_p = NULL ;
2017-06-13 13:37:32 -04:00
ts - > expire = tick_add ( now_ms , MS_TO_TICKS ( t - > expire ) ) ;
2017-11-07 04:42:54 -05:00
HA_RWLOCK_INIT ( & ts - > lock ) ;
2010-01-04 09:23:48 -05:00
return ts ;
}
/*
2010-06-06 05:56:36 -04:00
* Trash oldest < to_batch > sticky sessions from table < t >
BUG/MEDIUM: stick-table: limit the time spent purging old entries
An interesting case was reported with threads and moderately sized
stick-tables. Sometimes the watchdog would trigger during the purge.
It turns out that the stick tables were sized in the 10s of K entries
which is the order of magnitude of the possible number of connections,
and that threads were used over distinct NUMA nodes. While at first
glance nothing looks problematic there, actually there is a risk that
a thread trying to purge the table faces 100% of entries still in use
by a connection with (ts->ref_cnt > 0), and ends up scanning the whole
table, while other threads on the other NUMA node are causing the
cache lines to bounce back and forth and considerably slow down its
progress to the point of possibly spending hundreds of milliseconds
there, multiplied by the number of queued threads all failing on the
same point.
Interestingly, smaller tables would not trigger it because the scan
would be faster, and larger ones would not trigger it because plenty
of entries would be idle!
The most efficient solution is to increase the table size to be large
enough for this never to happen, but this is not reliable. We could
have a parallel list of idle entries but that would significantly
increase the storage and processing cost only to improve a few rare
corner cases.
This patch takes a more pragmatic approach, it considers that it will
not visit more than twice the number of nodes to be deleted, which
means that it accepts to fail up to 50% of the time. Given that very
small batches are programmed each time (1/256 of the table size), this
means the operation will finish quickly (128 times faster than now),
and will reduce the inter-thread contention. If this needs to be
reconsidered, it will probably mean that the batch size needs to be
fixed differently.
This needs to be backported to stable releases which extensively use
threads, typically 2.0.
Kudos to Nenad Merdanovic for figuring the root cause triggering this!
2020-11-03 11:47:41 -05:00
* Returns number of trashed sticky sessions . It may actually trash less
* than expected if finding these requires too long a search time ( e . g .
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
* most of them have ts - > ref_cnt > 0 ) . This function locks the table .
2010-01-04 09:23:48 -05:00
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int stktable_trash_oldest ( struct stktable * t , int to_batch )
2010-01-04 09:23:48 -05:00
{
struct stksess * ts ;
struct eb32_node * eb ;
BUG/MEDIUM: stick-table: limit the time spent purging old entries
An interesting case was reported with threads and moderately sized
stick-tables. Sometimes the watchdog would trigger during the purge.
It turns out that the stick tables were sized in the 10s of K entries
which is the order of magnitude of the possible number of connections,
and that threads were used over distinct NUMA nodes. While at first
glance nothing looks problematic there, actually there is a risk that
a thread trying to purge the table faces 100% of entries still in use
by a connection with (ts->ref_cnt > 0), and ends up scanning the whole
table, while other threads on the other NUMA node are causing the
cache lines to bounce back and forth and considerably slow down its
progress to the point of possibly spending hundreds of milliseconds
there, multiplied by the number of queued threads all failing on the
same point.
Interestingly, smaller tables would not trigger it because the scan
would be faster, and larger ones would not trigger it because plenty
of entries would be idle!
The most efficient solution is to increase the table size to be large
enough for this never to happen, but this is not reliable. We could
have a parallel list of idle entries but that would significantly
increase the storage and processing cost only to improve a few rare
corner cases.
This patch takes a more pragmatic approach, it considers that it will
not visit more than twice the number of nodes to be deleted, which
means that it accepts to fail up to 50% of the time. Given that very
small batches are programmed each time (1/256 of the table size), this
means the operation will finish quickly (128 times faster than now),
and will reduce the inter-thread contention. If this needs to be
reconsidered, it will probably mean that the batch size needs to be
fixed differently.
This needs to be backported to stable releases which extensively use
threads, typically 2.0.
Kudos to Nenad Merdanovic for figuring the root cause triggering this!
2020-11-03 11:47:41 -05:00
int max_search = to_batch * 2 ; // no more than 50% misses
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int max_per_shard = ( to_batch + CONFIG_HAP_TBL_BUCKETS - 1 ) / CONFIG_HAP_TBL_BUCKETS ;
int done_per_shard ;
2010-01-04 09:23:48 -05:00
int batched = 0 ;
2024-05-21 08:21:58 -04:00
int updt_locked ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int looped ;
int shard ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
shard = 0 ;
2010-01-04 09:23:48 -05:00
while ( batched < to_batch ) {
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
done_per_shard = 0 ;
looped = 0 ;
2024-05-21 08:21:58 -04:00
updt_locked = 0 ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
eb = eb32_lookup_ge ( & t - > shards [ shard ] . exps , now_ms - TIMER_LOOK_BACK ) ;
while ( batched < to_batch & & done_per_shard < max_per_shard ) {
if ( unlikely ( ! eb ) ) {
/* we might have reached the end of the tree, typically because
* < now_ms > is in the first half and we ' re first scanning the last
* half . Let ' s loop back to the beginning of the tree now if we
* have not yet visited it .
*/
if ( looped )
break ;
looped = 1 ;
eb = eb32_first ( & t - > shards [ shard ] . exps ) ;
if ( likely ( ! eb ) )
break ;
}
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( - - max_search < 0 )
2010-01-04 09:23:48 -05:00
break ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* timer looks expired, detach it from the queue */
ts = eb32_entry ( eb , struct stksess , exp ) ;
eb = eb32_next ( eb ) ;
BUG/MEDIUM: stick-table: limit the time spent purging old entries
An interesting case was reported with threads and moderately sized
stick-tables. Sometimes the watchdog would trigger during the purge.
It turns out that the stick tables were sized in the 10s of K entries
which is the order of magnitude of the possible number of connections,
and that threads were used over distinct NUMA nodes. While at first
glance nothing looks problematic there, actually there is a risk that
a thread trying to purge the table faces 100% of entries still in use
by a connection with (ts->ref_cnt > 0), and ends up scanning the whole
table, while other threads on the other NUMA node are causing the
cache lines to bounce back and forth and considerably slow down its
progress to the point of possibly spending hundreds of milliseconds
there, multiplied by the number of queued threads all failing on the
same point.
Interestingly, smaller tables would not trigger it because the scan
would be faster, and larger ones would not trigger it because plenty
of entries would be idle!
The most efficient solution is to increase the table size to be large
enough for this never to happen, but this is not reliable. We could
have a parallel list of idle entries but that would significantly
increase the storage and processing cost only to improve a few rare
corner cases.
This patch takes a more pragmatic approach, it considers that it will
not visit more than twice the number of nodes to be deleted, which
means that it accepts to fail up to 50% of the time. Given that very
small batches are programmed each time (1/256 of the table size), this
means the operation will finish quickly (128 times faster than now),
and will reduce the inter-thread contention. If this needs to be
reconsidered, it will probably mean that the batch size needs to be
fixed differently.
This needs to be backported to stable releases which extensively use
threads, typically 2.0.
Kudos to Nenad Merdanovic for figuring the root cause triggering this!
2020-11-03 11:47:41 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* don't delete an entry which is currently referenced */
if ( HA_ATOMIC_LOAD ( & ts - > ref_cnt ) ! = 0 )
continue ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb32_delete ( & ts - > exp ) ;
2010-06-14 08:53:07 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ts - > expire ! = ts - > exp . key ) {
if ( ! tick_isset ( ts - > expire ) )
continue ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
ts - > exp . key = ts - > expire ;
eb32_insert ( & t - > shards [ shard ] . exps , & ts - > exp ) ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* the update might have jumped beyond the next element,
* possibly causing a wrapping . We need to check whether
* the next element should be used instead . If the next
* element doesn ' t exist it means we ' re on the right
* side and have to check the first one then . If it
* exists and is closer , we must use it , otherwise we
* use the current one .
*/
if ( ! eb )
eb = eb32_first ( & t - > shards [ shard ] . exps ) ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ! eb | | tick_is_lt ( ts - > exp . key , eb - > key ) )
eb = & ts - > exp ;
BUG/MEDIUM: stick-table: do not leave entries in end of window during purge
At some moments expired stick table records stop being removed. This
happens when the internal time wraps around the 32-bit limit, or every
49.7 days. What precisely happens is that some elements that are collected
close to the end of the time window (2^32 - table's "expire" setting)
might have been updated and will be requeued further, at the beginning
of the next window. Here, three bad situations happen:
- the incorrect integer-based comparison that is not aware of wrapping
will result in the scan to restart from the freshly requeued element,
skipping all those at the end of the window. The net effect of this
is that at each wakeup of the expiration task, only one element from
the end of the window will be expired, and other ones will remain
there for a very long time, especially if they have to wait for all
the predecessors to be picked one at a time after slow wakeups due
to a long expiration ; this is what was observed in issue #2034
making the table fill up and appear as not expiring at all, and it
seems that issue #2024 reports the same problem at the same moment
(since such issues happen for everyone roughly at the same time
when the clock doesn't drift too much).
- the elements that were placed at the beginning of the next window
are skipped as well for as long as there are refreshed entries at
the end of the previous window, so these ones participate to filling
the table as well. This is cause by the restart from the current,
updated node that is generally placed after most other less recently
updated elements.
- once the last element at the end of the window is picked, suddenly
there is a large amount of expired entries at the beginning of the
next window that all have to be requeued. If the expiration delay
is large, the number can be big and it can take a long time, which
can very likely explain the periodic crashes reported in issue #2025.
Limiting the batch size as done in commit dfe79251d ("BUG/MEDIUM:
stick-table: limit the time spent purging old entries") would make
sense for process_table_expire() as well.
This patch addresses the incorrect tree scan algorithm to make sure that:
- there's always a next element to compare against, even when dealing
with the last one in the tree, the first one must be used ;
- time comparisons used to decide whether to restart from the current
element use tick_is_lt() as it is the only case where we know the
current element will be placed before any other one (since the tree
respects insertion ordering for duplicates)
In order to reproduce the issue, it was found that injecting traffic on
a random key that spans over half of the size of a table whose expiration
is set to 15s while the date is going to wrap in 20s does exhibit an
increase of the table's size 5s after startup, when entries start to be
pushed to the next window. It's more effective when a second load
generator constantly hammers a same key to be certain that none of them
is ready to expire. This doesn't happen anymore after this patch.
This fix needs to be backported to all stable versions. The bug has been
there for as long as the stick tables were introduced in 1.4-dev7 with
commit 3bd697e07 ("[MEDIUM] Add stick table (persistence) management
functions and types"). A cleanup could consists in deduplicating that
code by having process_table_expire() call __stktable_trash_oldest(),
with that one improved to support an optional time check.
2023-02-07 13:27:06 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
continue ;
}
2010-01-04 09:23:48 -05:00
2024-05-21 08:21:58 -04:00
/* if the entry is in the update list, we must be extremely careful
* because peers can see it at any moment and start to use it . Peers
* will take the table ' s updt_lock for reading when doing that , and
* with that lock held , will grab a ref_cnt before releasing the
* lock . So we must take this lock as well and check the ref_cnt .
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ts - > upd . node . leaf_p ) {
2024-05-21 08:21:58 -04:00
if ( ! updt_locked ) {
updt_locked = 1 ;
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
}
/* now we're locked, new peers can't grab it anymore,
* existing ones already have the ref_cnt .
*/
if ( HA_ATOMIC_LOAD ( & ts - > ref_cnt ) )
continue ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
}
2024-05-21 08:21:58 -04:00
/* session expired, trash it */
ebmb_delete ( & ts - > key ) ;
eb32_delete ( & ts - > upd ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
__stksess_free ( t , ts ) ;
batched + + ;
done_per_shard + + ;
2010-01-04 09:23:48 -05:00
}
2024-05-21 08:21:58 -04:00
if ( updt_locked )
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2024-04-12 04:02:26 -04:00
if ( max_search < = 0 )
break ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
shard = ( shard + 1 ) % CONFIG_HAP_TBL_BUCKETS ;
if ( ! shard )
break ;
2010-01-04 09:23:48 -05:00
}
return batched ;
}
/*
2010-06-06 05:56:36 -04:00
* Allocate and initialise a new sticky session .
* The new sticky session is returned or NULL in case of lack of memory .
* Sticky sessions should only be allocated this way , and must be freed using
2013-04-11 10:55:37 -04:00
* stksess_free ( ) . Table < t > ' s sticky session counter is increased . If < key >
2022-10-11 10:19:35 -04:00
* is not NULL , it is assigned to the new session . It must be called unlocked
* as it may rely on a lock to trash older entries .
2010-01-04 09:23:48 -05:00
*/
2022-10-11 10:19:35 -04:00
struct stksess * stksess_new ( struct stktable * t , struct stktable_key * key )
2010-01-04 09:23:48 -05:00
{
struct stksess * ts ;
2022-10-11 10:19:35 -04:00
unsigned int current ;
2010-01-04 09:23:48 -05:00
2022-10-11 10:19:35 -04:00
current = HA_ATOMIC_FETCH_ADD ( & t - > current , 1 ) ;
2010-01-04 09:23:48 -05:00
2022-10-11 10:19:35 -04:00
if ( unlikely ( current > = t - > size ) ) {
/* the table was already full, we may have to purge entries */
if ( t - > nopurge | | ! stktable_trash_oldest ( t , ( t - > size > > 8 ) + 1 ) ) {
HA_ATOMIC_DEC ( & t - > current ) ;
2010-01-04 09:23:48 -05:00
return NULL ;
2022-10-11 10:19:35 -04:00
}
2010-01-04 09:23:48 -05:00
}
2017-11-24 11:34:44 -05:00
ts = pool_alloc ( t - > pool ) ;
2010-01-04 09:23:48 -05:00
if ( ts ) {
2018-11-14 11:54:36 -05:00
ts = ( void * ) ts + round_ptr_size ( t - > data_size ) ;
2017-06-13 13:37:32 -04:00
__stksess_init ( t , ts ) ;
2022-10-17 08:58:19 -04:00
if ( key ) {
2013-04-11 10:55:37 -04:00
stksess_setkey ( t , ts , key ) ;
2022-10-17 08:58:19 -04:00
stksess_setkey_shard ( t , ts , key ) ;
}
2010-01-04 09:23:48 -05:00
}
return ts ;
}
/*
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
* Looks in table < t > for a sticky session matching key < key > in shard < shard > .
2010-06-06 05:56:36 -04:00
* Returns pointer on requested sticky session or NULL if none was found .
2010-01-04 09:23:48 -05:00
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
struct stksess * __stktable_lookup_key ( struct stktable * t , struct stktable_key * key , uint shard )
2010-01-04 09:23:48 -05:00
{
struct ebmb_node * eb ;
2015-07-24 02:46:42 -04:00
if ( t - > type = = SMP_T_STR )
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb = ebst_lookup_len ( & t - > shards [ shard ] . keys , key - > key , key - > key_len + 1 < t - > key_size ? key - > key_len : t - > key_size - 1 ) ;
2010-01-04 09:23:48 -05:00
else
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb = ebmb_lookup ( & t - > shards [ shard ] . keys , key - > key , t - > key_size ) ;
2010-01-04 09:23:48 -05:00
if ( unlikely ( ! eb ) ) {
/* no session found */
return NULL ;
}
2010-06-06 06:57:10 -04:00
return ebmb_entry ( eb , struct stksess , key ) ;
2010-01-04 09:23:48 -05:00
}
2017-06-13 13:37:32 -04:00
/*
* Looks in table < t > for a sticky session matching key < key > .
* Returns pointer on requested sticky session or NULL if none was found .
* The refcount of the found entry is increased and this function
* is protected using the table lock
2010-06-20 06:27:21 -04:00
*/
2017-06-13 13:37:32 -04:00
struct stksess * stktable_lookup_key ( struct stktable * t , struct stktable_key * key )
2010-06-20 06:27:21 -04:00
{
struct stksess * ts ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
uint shard ;
size_t len ;
if ( t - > type = = SMP_T_STR )
len = key - > key_len + 1 < t - > key_size ? key - > key_len : t - > key_size - 1 ;
else
len = t - > key_size ;
2010-06-20 06:27:21 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
shard = stktable_calc_shard_num ( t , key - > key , len ) ;
HA_RWLOCK_RDLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
ts = __stktable_lookup_key ( t , key , shard ) ;
2017-06-13 13:37:32 -04:00
if ( ts )
2022-10-11 09:42:54 -04:00
HA_ATOMIC_INC ( & ts - > ref_cnt ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2010-06-20 06:27:21 -04:00
return ts ;
}
2023-12-18 09:37:25 -05:00
/*
* Looks in table < t > for a sticky session matching ptr < ptr > .
* Returns pointer on requested sticky session or NULL if none was found .
* The refcount of the found entry is increased and this function
* is protected using the table lock
*/
struct stksess * stktable_lookup_ptr ( struct stktable * t , void * ptr )
{
struct stksess * ts = NULL ;
struct ebmb_node * eb ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int shard ;
2023-12-18 09:37:25 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
for ( shard = 0 ; shard < CONFIG_HAP_TBL_BUCKETS ; shard + + ) {
HA_RWLOCK_RDLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
/* linear search is performed, this could be optimized by adding
* an eb node dedicated to ptr lookups into stksess struct to
* leverage eb_lookup function instead .
*/
eb = ebmb_first ( & t - > shards [ shard ] . keys ) ;
while ( eb ) {
struct stksess * cur ;
2023-12-18 09:37:25 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
cur = ebmb_entry ( eb , struct stksess , key ) ;
if ( cur = = ptr ) {
ts = cur ;
break ;
}
eb = ebmb_next ( eb ) ;
2023-12-18 09:37:25 -05:00
}
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ts )
HA_ATOMIC_INC ( & ts - > ref_cnt ) ;
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
if ( ts )
return ts ;
2023-12-18 09:37:25 -05:00
}
return ts ;
}
2010-06-06 09:38:59 -04:00
/*
* Looks in table < t > for a sticky session with same key as < ts > .
* Returns pointer on requested sticky session or NULL if none was found .
2010-01-04 09:23:48 -05:00
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
struct stksess * __stktable_lookup ( struct stktable * t , struct stksess * ts , uint shard )
2010-01-04 09:23:48 -05:00
{
struct ebmb_node * eb ;
2015-07-24 02:46:42 -04:00
if ( t - > type = = SMP_T_STR )
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb = ebst_lookup ( & t - > shards [ shard ] . keys , ( char * ) ts - > key . key ) ;
2010-01-04 09:23:48 -05:00
else
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb = ebmb_lookup ( & t - > shards [ shard ] . keys , ts - > key . key , t - > key_size ) ;
2010-01-04 09:23:48 -05:00
2010-06-06 09:38:59 -04:00
if ( unlikely ( ! eb ) )
return NULL ;
2010-01-04 09:23:48 -05:00
2010-06-06 09:38:59 -04:00
return ebmb_entry ( eb , struct stksess , key ) ;
}
2010-01-04 09:23:48 -05:00
2017-06-13 13:37:32 -04:00
/*
* Looks in table < t > for a sticky session with same key as < ts > .
* Returns pointer on requested sticky session or NULL if none was found .
* The refcount of the found entry is increased and this function
* is protected using the table lock
*/
struct stksess * stktable_lookup ( struct stktable * t , struct stksess * ts )
{
struct stksess * lts ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
uint shard ;
size_t len ;
if ( t - > type = = SMP_T_STR )
len = strlen ( ( const char * ) ts - > key . key ) ;
else
len = t - > key_size ;
2017-06-13 13:37:32 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
shard = stktable_calc_shard_num ( t , ts - > key . key , len ) ;
HA_RWLOCK_RDLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
lts = __stktable_lookup ( t , ts , shard ) ;
2017-06-13 13:37:32 -04:00
if ( lts )
2022-10-11 09:42:54 -04:00
HA_ATOMIC_INC ( & lts - > ref_cnt ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
2017-06-13 13:37:32 -04:00
return lts ;
}
2010-06-06 11:58:34 -04:00
/* Update the expiration timer for <ts> but do not touch its expiration node.
* The table ' s expiration timer is updated if set .
2017-06-13 13:37:32 -04:00
* The node will be also inserted into the update tree if needed , at a position
2022-10-11 14:17:58 -04:00
* depending if the update is a local or coming from a remote node .
* If < decrefcnt > is set , the ts entry ' s ref_cnt will be decremented . The table ' s
2024-04-02 13:04:12 -04:00
* updt_lock may be taken for writes .
2010-06-06 11:58:34 -04:00
*/
2022-10-11 14:17:58 -04:00
void stktable_touch_with_exp ( struct stktable * t , struct stksess * ts , int local , int expire , int decrefcnt )
2010-06-06 11:58:34 -04:00
{
2010-09-23 12:16:52 -04:00
struct eb32_node * eb ;
2023-08-07 15:03:24 -04:00
int use_wrlock = 0 ;
2023-05-27 14:35:15 -04:00
int do_wakeup = 0 ;
2022-10-11 14:31:04 -04:00
if ( expire ! = HA_ATOMIC_LOAD ( & ts - > expire ) ) {
/* we'll need to set the expiration and to wake up the expiration timer .*/
HA_ATOMIC_STORE ( & ts - > expire , expire ) ;
2022-10-12 06:00:50 -04:00
stktable_requeue_exp ( t , ts ) ;
2010-06-06 11:58:34 -04:00
}
2010-09-23 12:16:52 -04:00
2017-06-13 13:37:32 -04:00
/* If sync is enabled */
if ( t - > sync_task ) {
2023-08-07 15:03:24 -04:00
/* We'll need to reliably check that the entry is in the tree.
* It ' s only inserted / deleted using a write lock so a read lock
* is sufficient to verify this . We may then need to upgrade it
* to perform an update ( which is rare under load ) , and if the
* upgrade fails , we ' ll try again with a write lock directly .
*/
if ( use_wrlock )
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
2017-06-13 13:37:32 -04:00
if ( local ) {
2023-08-07 15:03:24 -04:00
/* Check if this entry is not in the tree or not
* scheduled for at least one peer .
2022-10-11 14:31:04 -04:00
*/
2024-04-02 13:04:12 -04:00
if ( ! ts - > upd . node . leaf_p | | _HA_ATOMIC_LOAD ( & ts - > seen ) ) {
2023-08-07 15:03:24 -04:00
/* Time to upgrade the read lock to write lock if needed */
if ( ! use_wrlock ) {
2024-04-02 13:04:12 -04:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
2023-08-07 15:03:24 -04:00
use_wrlock = 1 ;
}
/* here we're write-locked */
2024-04-02 12:49:53 -04:00
ts - > seen = 0 ;
2017-06-13 13:37:32 -04:00
ts - > upd . key = + + t - > update ;
t - > localupdate = t - > update ;
eb32_delete ( & ts - > upd ) ;
eb = eb32_insert ( & t - > updates , & ts - > upd ) ;
if ( eb ! = & ts - > upd ) {
eb32_delete ( eb ) ;
eb32_insert ( & t - > updates , & ts - > upd ) ;
}
}
2023-05-27 14:35:15 -04:00
do_wakeup = 1 ;
2017-06-13 13:37:32 -04:00
}
else {
BUG/MEDIUM: stick-tables: make sure never to create two same remote entries
In GH issue #2552, Christian Ruppert reported an increase in crashes
with recent 3.0-dev versions, always related with stick-tables and peers.
One particularity of his config is that it has a lot of peers.
While trying to reproduce, it empirically was found that firing 10 load
generators at 10 different haproxy instances tracking a random key among
100k against a table of max 5k entries, on 8 threads and between a total
of 50 parallel peers managed to reproduce the crashes in seconds, very
often in ebtree deletion or insertion code, but not only.
The debugging revealed that the crashes are often caused by a parent node
being corrupted while delete/insert tries to update it regarding a recently
inserted/removed node, and that that corrupted node had always been proven
to be deleted, then immediately freed, so it ought not be visited in the
tree from functions enclosed between a pair of lock/unlock. As such the
only possibility was that it had experienced unexpected inserts. Also,
running with pool integrity checking would 90% of the time cause crashes
during allocation based on corrupted contents in the node, likely because
it was found at two places in the same tree and still present as a parent
of a node being deleted or inserted (hence the __stksess_free and
stktable_trash_oldest callers being visible on these items).
Indeed the issue is in fact related to the test set (occasionally redundant
keys, many peers). What happens is that sometimes, a same key is learned
from two different peers. When it is learned for the first time, we end up
in stktable_touch_with_exp() in the "else" branch, where the test for
existence is made before taking the lock (since commit cfeca3a3a3
("MEDIUM: stick-table: touch updates under an upgradable read lock") that
was merged in 2.9), and from there the entry is added. But is one of the
threads manages to insert it before the other thread takes the lock, then
the second thread will try to insert this node again. And inserting an
already inserted node will corrupt the tree (note that we never switched
to enforcing a check in insertion code on this due to API history that
would break various code parts).
Here the solution is simple, it requires to recheck leaf_p after getting
the lock, to avoid touching anything if the entry has already been
inserted in the mean time.
Many thanks to Christian Ruppert for testing this and for his invaluable
help on this hard-to-trigger issue.
This fix needs to be backported to 2.9.
2024-05-23 14:06:25 -04:00
/* Note: we land here when learning new entries from
* remote peers . We hold one ref_cnt so the entry
* cannot vanish under us , however if two peers create
* the same key at the exact same time , we must be
* careful not to perform two parallel inserts ! Hence
* we need to first check leaf_p to know if the entry
* is new , then lock the tree and check the entry again
* ( since another thread could have created it in the
* mean time ) .
*/
2017-06-13 13:37:32 -04:00
if ( ! ts - > upd . node . leaf_p ) {
2023-08-07 15:03:24 -04:00
/* Time to upgrade the read lock to write lock if needed */
if ( ! use_wrlock ) {
2024-04-02 13:04:12 -04:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
2023-08-07 15:03:24 -04:00
use_wrlock = 1 ;
}
/* here we're write-locked */
BUG/MEDIUM: stick-tables: make sure never to create two same remote entries
In GH issue #2552, Christian Ruppert reported an increase in crashes
with recent 3.0-dev versions, always related with stick-tables and peers.
One particularity of his config is that it has a lot of peers.
While trying to reproduce, it empirically was found that firing 10 load
generators at 10 different haproxy instances tracking a random key among
100k against a table of max 5k entries, on 8 threads and between a total
of 50 parallel peers managed to reproduce the crashes in seconds, very
often in ebtree deletion or insertion code, but not only.
The debugging revealed that the crashes are often caused by a parent node
being corrupted while delete/insert tries to update it regarding a recently
inserted/removed node, and that that corrupted node had always been proven
to be deleted, then immediately freed, so it ought not be visited in the
tree from functions enclosed between a pair of lock/unlock. As such the
only possibility was that it had experienced unexpected inserts. Also,
running with pool integrity checking would 90% of the time cause crashes
during allocation based on corrupted contents in the node, likely because
it was found at two places in the same tree and still present as a parent
of a node being deleted or inserted (hence the __stksess_free and
stktable_trash_oldest callers being visible on these items).
Indeed the issue is in fact related to the test set (occasionally redundant
keys, many peers). What happens is that sometimes, a same key is learned
from two different peers. When it is learned for the first time, we end up
in stktable_touch_with_exp() in the "else" branch, where the test for
existence is made before taking the lock (since commit cfeca3a3a3
("MEDIUM: stick-table: touch updates under an upgradable read lock") that
was merged in 2.9), and from there the entry is added. But is one of the
threads manages to insert it before the other thread takes the lock, then
the second thread will try to insert this node again. And inserting an
already inserted node will corrupt the tree (note that we never switched
to enforcing a check in insertion code on this due to API history that
would break various code parts).
Here the solution is simple, it requires to recheck leaf_p after getting
the lock, to avoid touching anything if the entry has already been
inserted in the mean time.
Many thanks to Christian Ruppert for testing this and for his invaluable
help on this hard-to-trigger issue.
This fix needs to be backported to 2.9.
2024-05-23 14:06:25 -04:00
if ( ! ts - > upd . node . leaf_p ) {
ts - > seen = 0 ;
ts - > upd . key = ( + + t - > update ) + ( 2147483648U ) ;
eb = eb32_insert ( & t - > updates , & ts - > upd ) ;
if ( eb ! = & ts - > upd ) {
eb32_delete ( eb ) ;
eb32_insert ( & t - > updates , & ts - > upd ) ;
}
2017-06-13 13:37:32 -04:00
}
2015-06-15 11:23:30 -04:00
}
2010-09-23 12:16:52 -04:00
}
2022-10-11 14:17:58 -04:00
2023-08-07 15:03:24 -04:00
/* drop the lock now */
if ( use_wrlock )
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
}
2023-05-27 14:35:15 -04:00
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
if ( decrefcnt )
HA_ATOMIC_DEC ( & ts - > ref_cnt ) ;
2023-05-27 14:35:15 -04:00
if ( do_wakeup )
task_wakeup ( t - > sync_task , TASK_WOKEN_MSG ) ;
2010-06-06 11:58:34 -04:00
}
2016-10-12 11:30:30 -04:00
/* Update the expiration timer for <ts> but do not touch its expiration node.
2017-06-13 13:37:32 -04:00
* The table ' s expiration timer is updated using the date of expiration coming from
2016-10-12 11:30:30 -04:00
* < t > stick - table configuration .
2017-06-13 13:37:32 -04:00
* The node will be also inserted into the update tree if needed , at a position
* considering the update is coming from a remote node
2016-10-12 11:30:30 -04:00
*/
2017-06-13 13:37:32 -04:00
void stktable_touch_remote ( struct stktable * t , struct stksess * ts , int decrefcnt )
{
2022-10-11 14:17:58 -04:00
stktable_touch_with_exp ( t , ts , 0 , ts - > expire , decrefcnt ) ;
2017-06-13 13:37:32 -04:00
}
/* Update the expiration timer for <ts> but do not touch its expiration node.
* The table ' s expiration timer is updated using the date of expiration coming from
* < t > stick - table configuration .
* The node will be also inserted into the update tree if needed , at a position
* considering the update was made locally
*/
void stktable_touch_local ( struct stktable * t , struct stksess * ts , int decrefcnt )
2016-10-12 11:30:30 -04:00
{
int expire = tick_add ( now_ms , MS_TO_TICKS ( t - > expire ) ) ;
2022-10-11 14:17:58 -04:00
stktable_touch_with_exp ( t , ts , 1 , expire , decrefcnt ) ;
2017-06-13 13:37:32 -04:00
}
2022-10-11 14:10:27 -04:00
/* Just decrease the ref_cnt of the current session. Does nothing if <ts> is NULL.
* Note that we still need to take the read lock because a number of other places
* ( including in Lua and peers ) update the ref_cnt non - atomically under the write
* lock .
*/
2018-06-27 00:25:57 -04:00
static void stktable_release ( struct stktable * t , struct stksess * ts )
2017-06-13 13:37:32 -04:00
{
2018-06-27 00:25:57 -04:00
if ( ! ts )
return ;
2022-10-11 14:10:27 -04:00
HA_ATOMIC_DEC ( & ts - > ref_cnt ) ;
2016-10-12 11:30:30 -04:00
}
2010-06-06 09:38:59 -04:00
/* Insert new sticky session <ts> in the table. It is assumed that it does not
* yet exist ( the caller must check this ) . The table ' s timeout is updated if it
2022-10-11 09:09:46 -04:00
* is set . < ts > is returned if properly inserted , otherwise the one already
* present if any .
2010-06-06 09:38:59 -04:00
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
struct stksess * __stktable_store ( struct stktable * t , struct stksess * ts , uint shard )
2010-06-06 09:38:59 -04:00
{
2022-10-11 09:09:46 -04:00
struct ebmb_node * eb ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb = ebmb_insert ( & t - > shards [ shard ] . keys , & ts - > key , t - > key_size ) ;
2022-10-11 09:09:46 -04:00
if ( likely ( eb = = & ts - > key ) ) {
ts - > exp . key = ts - > expire ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb32_insert ( & t - > shards [ shard ] . exps , & ts - > exp ) ;
2022-10-11 09:09:46 -04:00
}
return ebmb_entry ( eb , struct stksess , key ) ; // most commonly this is <ts>
2016-10-12 11:30:30 -04:00
}
2022-10-12 05:56:08 -04:00
/* requeues the table's expiration task to take the recently added <ts> into
* account . This is performed atomically and doesn ' t require any lock .
*/
2022-10-12 06:00:50 -04:00
void stktable_requeue_exp ( struct stktable * t , const struct stksess * ts )
2022-10-12 05:56:08 -04:00
{
2022-10-12 06:00:50 -04:00
int old_exp , new_exp ;
int expire = ts - > expire ;
2022-10-12 05:56:08 -04:00
if ( ! t - > expire )
return ;
2022-10-12 06:00:50 -04:00
2022-11-14 11:33:02 -05:00
/* set the task's expire to the newest expiration date. */
2022-10-12 06:00:50 -04:00
old_exp = HA_ATOMIC_LOAD ( & t - > exp_task - > expire ) ;
2022-11-14 11:54:07 -05:00
new_exp = tick_first ( expire , old_exp ) ;
/* let's not go further if we're already up to date */
if ( new_exp = = old_exp )
return ;
BUG/MEDIUM: stick-table: fix a race condition when updating the expiration task
Pierre Cheynier reported a rare crash that can affect stick-tables. When
a entry is created, the stick-table's expiration date is updated. But if
at exactly the same time the expiration task runs, it finishes by updating
its expiration timer without any protection, which may collide with the
call to task_queue() in another thread. In this case, it sometimes happens
that the first test for TICK_ETERNITY in task_queue() passes, then the
"expire" field is reset, then the BUG_ON() triggers, like below:
FATAL: bug condition "task->expire == 0" matched at src/task.c:279
call trace(13):
| 0x649d86 [c6 04 25 01 00 00 00 00]: __task_queue+0xc6/0xce
| 0x596bef [eb 90 ba 03 00 00 00 be]: stktable_requeue_exp+0x1ef/0x258
| 0x596c87 [48 83 bb 90 00 00 00 00]: stktable_touch_with_exp+0x27/0x312
| 0x563698 [48 8b 4c 24 18 4c 8b 4c]: stream_process_counters+0x3a8/0x6a2
| 0x569344 [49 8b 87 f8 00 00 00 48]: process_stream+0x3964/0x3b4f
| 0x64a80b [49 89 c7 e9 23 ff ff ff]: run_tasks_from_lists+0x3ab/0x566
| 0x64ad66 [29 44 24 14 8b 7c 24 14]: process_runnable_tasks+0x396/0x71e
| 0x6184b2 [83 3d 47 b3 a6 00 01 0f]: run_poll_loop+0x92/0x4ff
| 0x618acf [48 8b 1d aa 20 7d 00 48]: main+0x1877ef
| 0x7fc7d6ec1e45 [64 48 89 04 25 30 06 00]: libpthread:+0x7e45
| 0x7fc7d6c9e4af [48 89 c7 b8 3c 00 00 00]: libc:clone+0x3f/0x5a
This one is extremely difficult to reproduce in practice, but adding a
printf() in process_table_expire() before assigning the value, while
running with an expire delay of 1ms helps a lot and may trigger the
crash in less than one minute on a 8-thread machine. Interestingly,
depending on the sequencing, this bug could also have made a table fail
to expire if the expire field got reset after the last update but before
the call to task_queue(). It would require to be quite unlucky so that
the table is never touched anymore after the race though.
The solution taken by this patch is to take the table's lock when
updating its expire value in stktable_requeue_exp(), enclosing the call
to task_queue(), and to update the task->expire field while still under
the lock in process_table_expire(). Note that thanks to previous changes,
taking the table's lock for the update in stktable_requeue_exp() costs
almost nothing since we now have the guarantee that this is not done more
than 1000 times a second.
Since process_table_expire() sets the timeout after returning from
stktable_trash_expired() which just released the lock, the two functions
were merged so that the task's expire field is updated while still under
the lock. Note that this heavily depends on the two previous patches
below:
CLEANUP: stick-table: remove the unused table->exp_next
OPTIM: stick-table: avoid atomic ops in stktable_requeue_exp() when possible
This is a bit complicated due to the fact that in 2.7 some parts were
made lockless. In 2.6 and older, the second part (the merge of the
two functions) will be sufficient since the task_queue() call was
already performed under the table's lock, and the patches above are
not needed.
This needs to be backported as far as 1.8 scrupulously following
instructions above.
2022-11-14 12:02:44 -05:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > lock ) ;
2022-11-14 11:54:07 -05:00
while ( new_exp ! = old_exp & &
! HA_ATOMIC_CAS ( & t - > exp_task - > expire , & old_exp , new_exp ) ) {
__ha_cpu_relax ( ) ;
2022-11-14 11:33:02 -05:00
new_exp = tick_first ( expire , old_exp ) ;
2022-11-14 11:54:07 -05:00
}
2022-10-12 06:00:50 -04:00
2023-05-27 14:32:05 -04:00
task_queue ( t - > exp_task ) ;
2024-04-02 01:07:57 -04:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > lock ) ;
2022-10-12 05:56:08 -04:00
}
2010-06-14 15:04:55 -04:00
/* Returns a valid or initialized stksess for the specified stktable_key in the
* specified table , or NULL if the key was NULL , or if no entry was found nor
2022-10-11 09:22:42 -04:00
* could be created . The entry ' s expiration is updated . This function locks the
* table , and the refcount of the entry is increased .
2010-06-14 15:04:55 -04:00
*/
2022-10-11 09:22:42 -04:00
struct stksess * stktable_get_entry ( struct stktable * table , struct stktable_key * key )
2010-06-14 15:04:55 -04:00
{
2022-10-11 09:13:46 -04:00
struct stksess * ts , * ts2 ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
uint shard ;
size_t len ;
2024-04-04 05:08:56 -04:00
if ( ! key )
return NULL ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( table - > type = = SMP_T_STR )
len = key - > key_len + 1 < table - > key_size ? key - > key_len : table - > key_size - 1 ;
else
len = table - > key_size ;
shard = stktable_calc_shard_num ( table , key - > key , len ) ;
2010-06-14 15:04:55 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
ts = __stktable_lookup_key ( table , key , shard ) ;
if ( ts )
HA_ATOMIC_INC ( & ts - > ref_cnt ) ;
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
2022-10-11 09:22:42 -04:00
if ( ts )
return ts ;
2022-10-11 10:19:35 -04:00
/* No such entry exists, let's try to create a new one. this doesn't
* require locking yet .
*/
ts = stksess_new ( table , key ) ;
if ( ! ts )
return NULL ;
/* Now we're certain to have a ts. We need to store it. For this we'll
2022-10-11 09:22:42 -04:00
* need an exclusive access . We don ' t need an atomic upgrade , this is
* rare and an unlock + lock sequence will do the job fine . Given that
* this will not be atomic , the missing entry might appear in the mean
* tome so we have to be careful that the one we try to insert is the
* one we find .
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
2022-10-11 09:22:42 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
ts2 = __stktable_store ( table , ts , shard ) ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
HA_ATOMIC_INC ( & ts2 - > ref_cnt ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
2022-10-11 09:22:42 -04:00
if ( unlikely ( ts2 ! = ts ) ) {
/* another entry was added in the mean time, let's
* switch to it .
*/
__stksess_free ( table , ts ) ;
ts = ts2 ;
2010-06-14 15:04:55 -04:00
}
2017-06-13 13:37:32 -04:00
2022-10-12 06:04:01 -04:00
stktable_requeue_exp ( table , ts ) ;
2017-06-13 13:37:32 -04:00
return ts ;
}
/* Lookup for an entry with the same key and store the submitted
2022-10-12 05:13:14 -04:00
* stksess if not found . This function locks the table either shared or
* exclusively , and the refcount of the entry is increased .
2017-06-13 13:37:32 -04:00
*/
2022-10-12 05:13:14 -04:00
struct stksess * stktable_set_entry ( struct stktable * table , struct stksess * nts )
2017-06-13 13:37:32 -04:00
{
struct stksess * ts ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
uint shard ;
size_t len ;
2017-06-13 13:37:32 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( table - > type = = SMP_T_STR )
len = strlen ( ( const char * ) nts - > key . key ) ;
else
len = table - > key_size ;
shard = stktable_calc_shard_num ( table , nts - > key . key , len ) ;
HA_RWLOCK_RDLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
ts = __stktable_lookup ( table , nts , shard ) ;
2022-10-12 05:13:14 -04:00
if ( ts ) {
HA_ATOMIC_INC ( & ts - > ref_cnt ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
2022-10-12 05:13:14 -04:00
return ts ;
2017-06-13 13:37:32 -04:00
}
2022-10-12 05:13:14 -04:00
ts = nts ;
2017-06-13 13:37:32 -04:00
2022-10-12 05:13:14 -04:00
/* let's increment it before switching to exclusive */
HA_ATOMIC_INC ( & ts - > ref_cnt ) ;
2017-06-13 13:37:32 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( HA_RWLOCK_TRYRDTOSK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ! = 0 ) {
2022-10-12 05:13:14 -04:00
/* upgrade to seek lock failed, let's drop and take */
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_RDUNLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
2022-10-12 05:13:14 -04:00
}
else
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_SKTOWR ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
2022-10-12 05:13:14 -04:00
/* now we're write-locked */
2010-06-14 15:04:55 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
__stktable_store ( table , ts , shard ) ;
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & table - > shards [ shard ] . sh_lock ) ;
2022-10-12 06:04:01 -04:00
stktable_requeue_exp ( table , ts ) ;
2017-06-13 13:37:32 -04:00
return ts ;
}
2022-10-12 05:13:14 -04:00
2010-01-04 09:23:48 -05:00
/*
BUG/MEDIUM: stick-table: fix a race condition when updating the expiration task
Pierre Cheynier reported a rare crash that can affect stick-tables. When
a entry is created, the stick-table's expiration date is updated. But if
at exactly the same time the expiration task runs, it finishes by updating
its expiration timer without any protection, which may collide with the
call to task_queue() in another thread. In this case, it sometimes happens
that the first test for TICK_ETERNITY in task_queue() passes, then the
"expire" field is reset, then the BUG_ON() triggers, like below:
FATAL: bug condition "task->expire == 0" matched at src/task.c:279
call trace(13):
| 0x649d86 [c6 04 25 01 00 00 00 00]: __task_queue+0xc6/0xce
| 0x596bef [eb 90 ba 03 00 00 00 be]: stktable_requeue_exp+0x1ef/0x258
| 0x596c87 [48 83 bb 90 00 00 00 00]: stktable_touch_with_exp+0x27/0x312
| 0x563698 [48 8b 4c 24 18 4c 8b 4c]: stream_process_counters+0x3a8/0x6a2
| 0x569344 [49 8b 87 f8 00 00 00 48]: process_stream+0x3964/0x3b4f
| 0x64a80b [49 89 c7 e9 23 ff ff ff]: run_tasks_from_lists+0x3ab/0x566
| 0x64ad66 [29 44 24 14 8b 7c 24 14]: process_runnable_tasks+0x396/0x71e
| 0x6184b2 [83 3d 47 b3 a6 00 01 0f]: run_poll_loop+0x92/0x4ff
| 0x618acf [48 8b 1d aa 20 7d 00 48]: main+0x1877ef
| 0x7fc7d6ec1e45 [64 48 89 04 25 30 06 00]: libpthread:+0x7e45
| 0x7fc7d6c9e4af [48 89 c7 b8 3c 00 00 00]: libc:clone+0x3f/0x5a
This one is extremely difficult to reproduce in practice, but adding a
printf() in process_table_expire() before assigning the value, while
running with an expire delay of 1ms helps a lot and may trigger the
crash in less than one minute on a 8-thread machine. Interestingly,
depending on the sequencing, this bug could also have made a table fail
to expire if the expire field got reset after the last update but before
the call to task_queue(). It would require to be quite unlucky so that
the table is never touched anymore after the race though.
The solution taken by this patch is to take the table's lock when
updating its expire value in stktable_requeue_exp(), enclosing the call
to task_queue(), and to update the task->expire field while still under
the lock in process_table_expire(). Note that thanks to previous changes,
taking the table's lock for the update in stktable_requeue_exp() costs
almost nothing since we now have the guarantee that this is not done more
than 1000 times a second.
Since process_table_expire() sets the timeout after returning from
stktable_trash_expired() which just released the lock, the two functions
were merged so that the task's expire field is updated while still under
the lock. Note that this heavily depends on the two previous patches
below:
CLEANUP: stick-table: remove the unused table->exp_next
OPTIM: stick-table: avoid atomic ops in stktable_requeue_exp() when possible
This is a bit complicated due to the fact that in 2.7 some parts were
made lockless. In 2.6 and older, the second part (the merge of the
two functions) will be sufficient since the task_queue() call was
already performed under the table's lock, and the patches above are
not needed.
This needs to be backported as far as 1.8 scrupulously following
instructions above.
2022-11-14 12:02:44 -05:00
* Task processing function to trash expired sticky sessions . A pointer to the
* task itself is returned since it never dies .
2010-01-04 09:23:48 -05:00
*/
BUG/MEDIUM: stick-table: fix a race condition when updating the expiration task
Pierre Cheynier reported a rare crash that can affect stick-tables. When
a entry is created, the stick-table's expiration date is updated. But if
at exactly the same time the expiration task runs, it finishes by updating
its expiration timer without any protection, which may collide with the
call to task_queue() in another thread. In this case, it sometimes happens
that the first test for TICK_ETERNITY in task_queue() passes, then the
"expire" field is reset, then the BUG_ON() triggers, like below:
FATAL: bug condition "task->expire == 0" matched at src/task.c:279
call trace(13):
| 0x649d86 [c6 04 25 01 00 00 00 00]: __task_queue+0xc6/0xce
| 0x596bef [eb 90 ba 03 00 00 00 be]: stktable_requeue_exp+0x1ef/0x258
| 0x596c87 [48 83 bb 90 00 00 00 00]: stktable_touch_with_exp+0x27/0x312
| 0x563698 [48 8b 4c 24 18 4c 8b 4c]: stream_process_counters+0x3a8/0x6a2
| 0x569344 [49 8b 87 f8 00 00 00 48]: process_stream+0x3964/0x3b4f
| 0x64a80b [49 89 c7 e9 23 ff ff ff]: run_tasks_from_lists+0x3ab/0x566
| 0x64ad66 [29 44 24 14 8b 7c 24 14]: process_runnable_tasks+0x396/0x71e
| 0x6184b2 [83 3d 47 b3 a6 00 01 0f]: run_poll_loop+0x92/0x4ff
| 0x618acf [48 8b 1d aa 20 7d 00 48]: main+0x1877ef
| 0x7fc7d6ec1e45 [64 48 89 04 25 30 06 00]: libpthread:+0x7e45
| 0x7fc7d6c9e4af [48 89 c7 b8 3c 00 00 00]: libc:clone+0x3f/0x5a
This one is extremely difficult to reproduce in practice, but adding a
printf() in process_table_expire() before assigning the value, while
running with an expire delay of 1ms helps a lot and may trigger the
crash in less than one minute on a 8-thread machine. Interestingly,
depending on the sequencing, this bug could also have made a table fail
to expire if the expire field got reset after the last update but before
the call to task_queue(). It would require to be quite unlucky so that
the table is never touched anymore after the race though.
The solution taken by this patch is to take the table's lock when
updating its expire value in stktable_requeue_exp(), enclosing the call
to task_queue(), and to update the task->expire field while still under
the lock in process_table_expire(). Note that thanks to previous changes,
taking the table's lock for the update in stktable_requeue_exp() costs
almost nothing since we now have the guarantee that this is not done more
than 1000 times a second.
Since process_table_expire() sets the timeout after returning from
stktable_trash_expired() which just released the lock, the two functions
were merged so that the task's expire field is updated while still under
the lock. Note that this heavily depends on the two previous patches
below:
CLEANUP: stick-table: remove the unused table->exp_next
OPTIM: stick-table: avoid atomic ops in stktable_requeue_exp() when possible
This is a bit complicated due to the fact that in 2.7 some parts were
made lockless. In 2.6 and older, the second part (the merge of the
two functions) will be sufficient since the task_queue() call was
already performed under the table's lock, and the patches above are
not needed.
This needs to be backported as far as 1.8 scrupulously following
instructions above.
2022-11-14 12:02:44 -05:00
struct task * process_table_expire ( struct task * task , void * context , unsigned int state )
2010-01-04 09:23:48 -05:00
{
BUG/MEDIUM: stick-table: fix a race condition when updating the expiration task
Pierre Cheynier reported a rare crash that can affect stick-tables. When
a entry is created, the stick-table's expiration date is updated. But if
at exactly the same time the expiration task runs, it finishes by updating
its expiration timer without any protection, which may collide with the
call to task_queue() in another thread. In this case, it sometimes happens
that the first test for TICK_ETERNITY in task_queue() passes, then the
"expire" field is reset, then the BUG_ON() triggers, like below:
FATAL: bug condition "task->expire == 0" matched at src/task.c:279
call trace(13):
| 0x649d86 [c6 04 25 01 00 00 00 00]: __task_queue+0xc6/0xce
| 0x596bef [eb 90 ba 03 00 00 00 be]: stktable_requeue_exp+0x1ef/0x258
| 0x596c87 [48 83 bb 90 00 00 00 00]: stktable_touch_with_exp+0x27/0x312
| 0x563698 [48 8b 4c 24 18 4c 8b 4c]: stream_process_counters+0x3a8/0x6a2
| 0x569344 [49 8b 87 f8 00 00 00 48]: process_stream+0x3964/0x3b4f
| 0x64a80b [49 89 c7 e9 23 ff ff ff]: run_tasks_from_lists+0x3ab/0x566
| 0x64ad66 [29 44 24 14 8b 7c 24 14]: process_runnable_tasks+0x396/0x71e
| 0x6184b2 [83 3d 47 b3 a6 00 01 0f]: run_poll_loop+0x92/0x4ff
| 0x618acf [48 8b 1d aa 20 7d 00 48]: main+0x1877ef
| 0x7fc7d6ec1e45 [64 48 89 04 25 30 06 00]: libpthread:+0x7e45
| 0x7fc7d6c9e4af [48 89 c7 b8 3c 00 00 00]: libc:clone+0x3f/0x5a
This one is extremely difficult to reproduce in practice, but adding a
printf() in process_table_expire() before assigning the value, while
running with an expire delay of 1ms helps a lot and may trigger the
crash in less than one minute on a 8-thread machine. Interestingly,
depending on the sequencing, this bug could also have made a table fail
to expire if the expire field got reset after the last update but before
the call to task_queue(). It would require to be quite unlucky so that
the table is never touched anymore after the race though.
The solution taken by this patch is to take the table's lock when
updating its expire value in stktable_requeue_exp(), enclosing the call
to task_queue(), and to update the task->expire field while still under
the lock in process_table_expire(). Note that thanks to previous changes,
taking the table's lock for the update in stktable_requeue_exp() costs
almost nothing since we now have the guarantee that this is not done more
than 1000 times a second.
Since process_table_expire() sets the timeout after returning from
stktable_trash_expired() which just released the lock, the two functions
were merged so that the task's expire field is updated while still under
the lock. Note that this heavily depends on the two previous patches
below:
CLEANUP: stick-table: remove the unused table->exp_next
OPTIM: stick-table: avoid atomic ops in stktable_requeue_exp() when possible
This is a bit complicated due to the fact that in 2.7 some parts were
made lockless. In 2.6 and older, the second part (the merge of the
two functions) will be sufficient since the task_queue() call was
already performed under the table's lock, and the patches above are
not needed.
This needs to be backported as far as 1.8 scrupulously following
instructions above.
2022-11-14 12:02:44 -05:00
struct stktable * t = context ;
2010-01-04 09:23:48 -05:00
struct stksess * ts ;
struct eb32_node * eb ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int updt_locked ;
int looped ;
2022-11-14 11:33:02 -05:00
int exp_next ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int task_exp ;
int shard ;
task_exp = TICK_ETERNITY ;
for ( shard = 0 ; shard < CONFIG_HAP_TBL_BUCKETS ; shard + + ) {
updt_locked = 0 ;
looped = 0 ;
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
eb = eb32_lookup_ge ( & t - > shards [ shard ] . exps , now_ms - TIMER_LOOK_BACK ) ;
while ( 1 ) {
if ( unlikely ( ! eb ) ) {
/* we might have reached the end of the tree, typically because
* < now_ms > is in the first half and we ' re first scanning the last
* half . Let ' s loop back to the beginning of the tree now if we
* have not yet visited it .
*/
if ( looped )
break ;
looped = 1 ;
eb = eb32_first ( & t - > shards [ shard ] . exps ) ;
if ( likely ( ! eb ) )
break ;
}
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( likely ( tick_is_lt ( now_ms , eb - > key ) ) ) {
/* timer not expired yet, revisit it later */
exp_next = eb - > key ;
goto out_unlock ;
}
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* timer looks expired, detach it from the queue */
ts = eb32_entry ( eb , struct stksess , exp ) ;
eb = eb32_next ( eb ) ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* don't delete an entry which is currently referenced */
if ( HA_ATOMIC_LOAD ( & ts - > ref_cnt ) ! = 0 )
continue ;
2010-06-14 08:53:07 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
eb32_delete ( & ts - > exp ) ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ! tick_is_expired ( ts - > expire , now_ms ) ) {
if ( ! tick_isset ( ts - > expire ) )
continue ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
ts - > exp . key = ts - > expire ;
eb32_insert ( & t - > shards [ shard ] . exps , & ts - > exp ) ;
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* the update might have jumped beyond the next element,
* possibly causing a wrapping . We need to check whether
* the next element should be used instead . If the next
* element doesn ' t exist it means we ' re on the right
* side and have to check the first one then . If it
* exists and is closer , we must use it , otherwise we
* use the current one .
*/
if ( ! eb )
eb = eb32_first ( & t - > shards [ shard ] . exps ) ;
BUG/MEDIUM: stick-table: do not leave entries in end of window during purge
At some moments expired stick table records stop being removed. This
happens when the internal time wraps around the 32-bit limit, or every
49.7 days. What precisely happens is that some elements that are collected
close to the end of the time window (2^32 - table's "expire" setting)
might have been updated and will be requeued further, at the beginning
of the next window. Here, three bad situations happen:
- the incorrect integer-based comparison that is not aware of wrapping
will result in the scan to restart from the freshly requeued element,
skipping all those at the end of the window. The net effect of this
is that at each wakeup of the expiration task, only one element from
the end of the window will be expired, and other ones will remain
there for a very long time, especially if they have to wait for all
the predecessors to be picked one at a time after slow wakeups due
to a long expiration ; this is what was observed in issue #2034
making the table fill up and appear as not expiring at all, and it
seems that issue #2024 reports the same problem at the same moment
(since such issues happen for everyone roughly at the same time
when the clock doesn't drift too much).
- the elements that were placed at the beginning of the next window
are skipped as well for as long as there are refreshed entries at
the end of the previous window, so these ones participate to filling
the table as well. This is cause by the restart from the current,
updated node that is generally placed after most other less recently
updated elements.
- once the last element at the end of the window is picked, suddenly
there is a large amount of expired entries at the beginning of the
next window that all have to be requeued. If the expiration delay
is large, the number can be big and it can take a long time, which
can very likely explain the periodic crashes reported in issue #2025.
Limiting the batch size as done in commit dfe79251d ("BUG/MEDIUM:
stick-table: limit the time spent purging old entries") would make
sense for process_table_expire() as well.
This patch addresses the incorrect tree scan algorithm to make sure that:
- there's always a next element to compare against, even when dealing
with the last one in the tree, the first one must be used ;
- time comparisons used to decide whether to restart from the current
element use tick_is_lt() as it is the only case where we know the
current element will be placed before any other one (since the tree
respects insertion ordering for duplicates)
In order to reproduce the issue, it was found that injecting traffic on
a random key that spans over half of the size of a table whose expiration
is set to 15s while the date is going to wrap in 20s does exhibit an
increase of the table's size 5s after startup, when entries start to be
pushed to the next window. It's more effective when a second load
generator constantly hammers a same key to be certain that none of them
is ready to expire. This doesn't happen anymore after this patch.
This fix needs to be backported to all stable versions. The bug has been
there for as long as the stick tables were introduced in 1.4-dev7 with
commit 3bd697e07 ("[MEDIUM] Add stick table (persistence) management
functions and types"). A cleanup could consists in deduplicating that
code by having process_table_expire() call __stktable_trash_oldest(),
with that one improved to support an optional time check.
2023-02-07 13:27:06 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ! eb | | tick_is_lt ( ts - > exp . key , eb - > key ) )
eb = & ts - > exp ;
continue ;
}
2010-01-04 09:23:48 -05:00
BUG/MAJOR: stick-tables: fix race with peers in entry expiration
In 2.9 with commit 7968fe3889 ("MEDIUM: stick-table: change the ref_cnt
atomically") we significantly relaxed the stick-tables locking when
dealing with peers by adjusting the ref_cnt atomically and moving it
out of the lock.
However it opened a tiny window that became problematic in 3.0-dev7
when the table's contention was lowered by commit 1a088da7c2 ("MAJOR:
stktable: split the keys across multiple shards to reduce contention").
What happens is that some peers may access the entry for reading at
the moment it's about to expire, and while the read accesses to push
the data remain unnoticed (possibly that from time to time we push
crap), but the releasing of the refcount causes a new write that may
damage anything else. The scenario is the following:
process_table_expire() peer_send_teachmsgs()
RDLOCK(&updt_lock);
tick_is_expired() != 0
ebmb_delete(ts->key);
if (ts->upd.node.leaf_p) {
HA_ATOMIC_INC(&ts->ref_cnt);
RDUNLOCK(&updt_lock);
WRLOCK(&updt_lock);
eb32_delete(&ts->upd);
}
__stksess_free(t, ts);
peer_send_updatemsg(ts);
RDLOCK(&updt_lock);
HA_ATOMIC_DEC(&ts->ref_cnt);
Here it's clear that the bottom part of peer_send_teachmsgs() believes
to be protected but may act on freed data.
This is more visible when enabling -dMtag,no-merge,integrity because
the ATOMIC_DEC(&ref_cnt) decrements one byte in the area, that makes
the eviction check fail while the tag has the address of the left
__stksess_free(), proving a completed pool_free() before the decrement,
and the anomaly there is pretty visible in the crash dump. Changing
INC()/DEC() with ADD(2)/DEC(2) shows that the byte is now off by two,
confirming that the operation happened there.
The solution is not very hard, it consists in checking for the ref_cnt
on the left after grabbing the lock, and doing both before deleting the
element, so that we have the guarantee that either the peer will not
take it or that it has already started taking it.
This was proven to be sufficient, as instead of crashing after 3s of
injection with 4 peers, 16 threads and 130k RPS, it survived for 15mn.
In order to stress the setup, a config involving 4+ peers, tracking
HTTP request with randoms and applying a bwlim-out filter with a
random key, with a client made of 160 h2 conns downloading 10 streams
of 4MB objects in parallel managed to trigger it within a few seconds:
frontend ft
http-request track-sc0 rand(100000) table tbl
filter bwlim-out lim-out limit 2047m key rand(100000000),ipmask(32) min-size 1 table tbl
http-request set-bandwidth-limit lim-out
use_backend bk
backend bk
server s1 198.18.0.30:8000
server s2 198.18.0.34:8000
backend tbl
stick-table type ip size 1000k expire 1s store http_req_cnt,bytes_in_rate(1s),bytes_out_rate(1s) peers peers
This seems to be very dependent on the timing and setup though.
This will need to be backported to 2.9. This part of the code was
reindented with shards but the block should remain mostly unchanged.
The logic to apply is the same.
2024-04-12 11:31:00 -04:00
/* if the entry is in the update list, we must be extremely careful
* because peers can see it at any moment and start to use it . Peers
* will take the table ' s updt_lock for reading when doing that , and
* with that lock held , will grab a ref_cnt before releasing the
* lock . So we must take this lock as well and check the ref_cnt .
*/
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( ts - > upd . node . leaf_p ) {
if ( ! updt_locked ) {
updt_locked = 1 ;
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
}
BUG/MAJOR: stick-tables: fix race with peers in entry expiration
In 2.9 with commit 7968fe3889 ("MEDIUM: stick-table: change the ref_cnt
atomically") we significantly relaxed the stick-tables locking when
dealing with peers by adjusting the ref_cnt atomically and moving it
out of the lock.
However it opened a tiny window that became problematic in 3.0-dev7
when the table's contention was lowered by commit 1a088da7c2 ("MAJOR:
stktable: split the keys across multiple shards to reduce contention").
What happens is that some peers may access the entry for reading at
the moment it's about to expire, and while the read accesses to push
the data remain unnoticed (possibly that from time to time we push
crap), but the releasing of the refcount causes a new write that may
damage anything else. The scenario is the following:
process_table_expire() peer_send_teachmsgs()
RDLOCK(&updt_lock);
tick_is_expired() != 0
ebmb_delete(ts->key);
if (ts->upd.node.leaf_p) {
HA_ATOMIC_INC(&ts->ref_cnt);
RDUNLOCK(&updt_lock);
WRLOCK(&updt_lock);
eb32_delete(&ts->upd);
}
__stksess_free(t, ts);
peer_send_updatemsg(ts);
RDLOCK(&updt_lock);
HA_ATOMIC_DEC(&ts->ref_cnt);
Here it's clear that the bottom part of peer_send_teachmsgs() believes
to be protected but may act on freed data.
This is more visible when enabling -dMtag,no-merge,integrity because
the ATOMIC_DEC(&ref_cnt) decrements one byte in the area, that makes
the eviction check fail while the tag has the address of the left
__stksess_free(), proving a completed pool_free() before the decrement,
and the anomaly there is pretty visible in the crash dump. Changing
INC()/DEC() with ADD(2)/DEC(2) shows that the byte is now off by two,
confirming that the operation happened there.
The solution is not very hard, it consists in checking for the ref_cnt
on the left after grabbing the lock, and doing both before deleting the
element, so that we have the guarantee that either the peer will not
take it or that it has already started taking it.
This was proven to be sufficient, as instead of crashing after 3s of
injection with 4 peers, 16 threads and 130k RPS, it survived for 15mn.
In order to stress the setup, a config involving 4+ peers, tracking
HTTP request with randoms and applying a bwlim-out filter with a
random key, with a client made of 160 h2 conns downloading 10 streams
of 4MB objects in parallel managed to trigger it within a few seconds:
frontend ft
http-request track-sc0 rand(100000) table tbl
filter bwlim-out lim-out limit 2047m key rand(100000000),ipmask(32) min-size 1 table tbl
http-request set-bandwidth-limit lim-out
use_backend bk
backend bk
server s1 198.18.0.30:8000
server s2 198.18.0.34:8000
backend tbl
stick-table type ip size 1000k expire 1s store http_req_cnt,bytes_in_rate(1s),bytes_out_rate(1s) peers peers
This seems to be very dependent on the timing and setup though.
This will need to be backported to 2.9. This part of the code was
reindented with shards but the block should remain mostly unchanged.
The logic to apply is the same.
2024-04-12 11:31:00 -04:00
/* now we're locked, new peers can't grab it anymore,
* existing ones already have the ref_cnt .
*/
if ( HA_ATOMIC_LOAD ( & ts - > ref_cnt ) )
continue ;
2023-05-27 13:55:15 -04:00
}
BUG/MAJOR: stick-tables: fix race with peers in entry expiration
In 2.9 with commit 7968fe3889 ("MEDIUM: stick-table: change the ref_cnt
atomically") we significantly relaxed the stick-tables locking when
dealing with peers by adjusting the ref_cnt atomically and moving it
out of the lock.
However it opened a tiny window that became problematic in 3.0-dev7
when the table's contention was lowered by commit 1a088da7c2 ("MAJOR:
stktable: split the keys across multiple shards to reduce contention").
What happens is that some peers may access the entry for reading at
the moment it's about to expire, and while the read accesses to push
the data remain unnoticed (possibly that from time to time we push
crap), but the releasing of the refcount causes a new write that may
damage anything else. The scenario is the following:
process_table_expire() peer_send_teachmsgs()
RDLOCK(&updt_lock);
tick_is_expired() != 0
ebmb_delete(ts->key);
if (ts->upd.node.leaf_p) {
HA_ATOMIC_INC(&ts->ref_cnt);
RDUNLOCK(&updt_lock);
WRLOCK(&updt_lock);
eb32_delete(&ts->upd);
}
__stksess_free(t, ts);
peer_send_updatemsg(ts);
RDLOCK(&updt_lock);
HA_ATOMIC_DEC(&ts->ref_cnt);
Here it's clear that the bottom part of peer_send_teachmsgs() believes
to be protected but may act on freed data.
This is more visible when enabling -dMtag,no-merge,integrity because
the ATOMIC_DEC(&ref_cnt) decrements one byte in the area, that makes
the eviction check fail while the tag has the address of the left
__stksess_free(), proving a completed pool_free() before the decrement,
and the anomaly there is pretty visible in the crash dump. Changing
INC()/DEC() with ADD(2)/DEC(2) shows that the byte is now off by two,
confirming that the operation happened there.
The solution is not very hard, it consists in checking for the ref_cnt
on the left after grabbing the lock, and doing both before deleting the
element, so that we have the guarantee that either the peer will not
take it or that it has already started taking it.
This was proven to be sufficient, as instead of crashing after 3s of
injection with 4 peers, 16 threads and 130k RPS, it survived for 15mn.
In order to stress the setup, a config involving 4+ peers, tracking
HTTP request with randoms and applying a bwlim-out filter with a
random key, with a client made of 160 h2 conns downloading 10 streams
of 4MB objects in parallel managed to trigger it within a few seconds:
frontend ft
http-request track-sc0 rand(100000) table tbl
filter bwlim-out lim-out limit 2047m key rand(100000000),ipmask(32) min-size 1 table tbl
http-request set-bandwidth-limit lim-out
use_backend bk
backend bk
server s1 198.18.0.30:8000
server s2 198.18.0.34:8000
backend tbl
stick-table type ip size 1000k expire 1s store http_req_cnt,bytes_in_rate(1s),bytes_out_rate(1s) peers peers
This seems to be very dependent on the timing and setup though.
This will need to be backported to 2.9. This part of the code was
reindented with shards but the block should remain mostly unchanged.
The logic to apply is the same.
2024-04-12 11:31:00 -04:00
/* session expired, trash it */
ebmb_delete ( & ts - > key ) ;
eb32_delete ( & ts - > upd ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
__stksess_free ( t , ts ) ;
2023-05-27 13:55:15 -04:00
}
2010-01-04 09:23:48 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
/* We have found no task to expire in any tree */
exp_next = TICK_ETERNITY ;
BUG/MEDIUM: stick-table: fix a race condition when updating the expiration task
Pierre Cheynier reported a rare crash that can affect stick-tables. When
a entry is created, the stick-table's expiration date is updated. But if
at exactly the same time the expiration task runs, it finishes by updating
its expiration timer without any protection, which may collide with the
call to task_queue() in another thread. In this case, it sometimes happens
that the first test for TICK_ETERNITY in task_queue() passes, then the
"expire" field is reset, then the BUG_ON() triggers, like below:
FATAL: bug condition "task->expire == 0" matched at src/task.c:279
call trace(13):
| 0x649d86 [c6 04 25 01 00 00 00 00]: __task_queue+0xc6/0xce
| 0x596bef [eb 90 ba 03 00 00 00 be]: stktable_requeue_exp+0x1ef/0x258
| 0x596c87 [48 83 bb 90 00 00 00 00]: stktable_touch_with_exp+0x27/0x312
| 0x563698 [48 8b 4c 24 18 4c 8b 4c]: stream_process_counters+0x3a8/0x6a2
| 0x569344 [49 8b 87 f8 00 00 00 48]: process_stream+0x3964/0x3b4f
| 0x64a80b [49 89 c7 e9 23 ff ff ff]: run_tasks_from_lists+0x3ab/0x566
| 0x64ad66 [29 44 24 14 8b 7c 24 14]: process_runnable_tasks+0x396/0x71e
| 0x6184b2 [83 3d 47 b3 a6 00 01 0f]: run_poll_loop+0x92/0x4ff
| 0x618acf [48 8b 1d aa 20 7d 00 48]: main+0x1877ef
| 0x7fc7d6ec1e45 [64 48 89 04 25 30 06 00]: libpthread:+0x7e45
| 0x7fc7d6c9e4af [48 89 c7 b8 3c 00 00 00]: libc:clone+0x3f/0x5a
This one is extremely difficult to reproduce in practice, but adding a
printf() in process_table_expire() before assigning the value, while
running with an expire delay of 1ms helps a lot and may trigger the
crash in less than one minute on a 8-thread machine. Interestingly,
depending on the sequencing, this bug could also have made a table fail
to expire if the expire field got reset after the last update but before
the call to task_queue(). It would require to be quite unlucky so that
the table is never touched anymore after the race though.
The solution taken by this patch is to take the table's lock when
updating its expire value in stktable_requeue_exp(), enclosing the call
to task_queue(), and to update the task->expire field while still under
the lock in process_table_expire(). Note that thanks to previous changes,
taking the table's lock for the update in stktable_requeue_exp() costs
almost nothing since we now have the guarantee that this is not done more
than 1000 times a second.
Since process_table_expire() sets the timeout after returning from
stktable_trash_expired() which just released the lock, the two functions
were merged so that the task's expire field is updated while still under
the lock. Note that this heavily depends on the two previous patches
below:
CLEANUP: stick-table: remove the unused table->exp_next
OPTIM: stick-table: avoid atomic ops in stktable_requeue_exp() when possible
This is a bit complicated due to the fact that in 2.7 some parts were
made lockless. In 2.6 and older, the second part (the merge of the
two functions) will be sufficient since the task_queue() call was
already performed under the table's lock, and the patches above are
not needed.
This needs to be backported as far as 1.8 scrupulously following
instructions above.
2022-11-14 12:02:44 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
out_unlock :
if ( updt_locked )
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > updt_lock ) ;
task_exp = tick_first ( task_exp , exp_next ) ;
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > shards [ shard ] . sh_lock ) ;
}
/* Reset the task's expiration. We do this under the lock so as not
* to ruin a call to task_queue ( ) in stktable_requeue_exp ( ) if we
* were to update with TICK_ETERNITY .
*/
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & t - > lock ) ;
2024-04-12 03:57:32 -04:00
task - > expire = task_exp ;
2022-10-11 06:02:50 -04:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & t - > lock ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
2010-01-04 09:23:48 -05:00
return task ;
}
2023-11-02 13:34:51 -04:00
/* Perform minimal stick table initialization. In case of error, the
* function will return 0 and < err_msg > will contain hints about the
* error and it is up to the caller to free it .
*
* Returns 1 on success
*/
int stktable_init ( struct stktable * t , char * * err_msg )
2010-01-04 09:23:48 -05:00
{
2021-05-12 11:39:04 -04:00
int peers_retval = 0 ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int shard ;
2022-11-28 12:53:06 -05:00
t - > hash_seed = XXH64 ( t - > id , t - > idlen , 0 ) ;
2010-01-04 09:23:48 -05:00
if ( t - > size ) {
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
for ( shard = 0 ; shard < CONFIG_HAP_TBL_BUCKETS ; shard + + ) {
t - > shards [ shard ] . keys = EB_ROOT_UNIQUE ;
memset ( & t - > shards [ shard ] . exps , 0 , sizeof ( t - > shards [ shard ] . exps ) ) ;
HA_RWLOCK_INIT ( & t - > shards [ shard ] . sh_lock ) ;
}
2015-12-16 09:28:12 -05:00
t - > updates = EB_ROOT_UNIQUE ;
2022-10-12 10:47:59 -04:00
HA_RWLOCK_INIT ( & t - > lock ) ;
2010-01-04 09:23:48 -05:00
2018-11-14 11:54:36 -05:00
t - > pool = create_pool ( " sticktables " , sizeof ( struct stksess ) + round_ptr_size ( t - > data_size ) + t - > key_size , MEM_F_SHARED ) ;
2010-01-04 09:23:48 -05:00
if ( t - > expire ) {
2021-10-01 12:23:30 -04:00
t - > exp_task = task_new_anywhere ( ) ;
2018-10-15 05:12:15 -04:00
if ( ! t - > exp_task )
2023-11-02 13:34:51 -04:00
goto mem_error ;
2010-01-04 09:23:48 -05:00
t - > exp_task - > process = process_table_expire ;
t - > exp_task - > context = ( void * ) t ;
}
2021-10-06 08:24:19 -04:00
if ( t - > peers . p & & t - > peers . p - > peers_fe & & ! ( t - > peers . p - > peers_fe - > flags & ( PR_FL_DISABLED | PR_FL_STOPPED ) ) ) {
2021-05-12 11:39:04 -04:00
peers_retval = peers_register_table ( t - > peers . p , t ) ;
2010-09-23 12:39:19 -04:00
}
2023-11-02 13:34:51 -04:00
if ( t - > pool = = NULL | | peers_retval )
goto mem_error ;
2010-01-04 09:23:48 -05:00
}
MEDIUM: stktable/peers: "write-to" local table on peer updates
In this patch, we add the possibility to declare on a table definition
("table" in peer section, or "stick-table" in proxy section) that we
want the remote/peer updates on that table to be pushed on a local
haproxy table in addition to the source table.
Consider this example:
|peers mypeers
| peer local 127.0.0.1:3334
| peer clust 127.0.0.1:3333
| table t1.local type string size 10m store server_id,server_key expire 30s
| table t1.clust type string size 10m store server_id,server_key write-to mypeers/t1.local expire 30s
With this setup, we consider haproxy uses t1.local as cache/local table
for read and write operations, and that t1.clust is a remote table
containing datas processed from t1.local and similar tables from other
haproxy peers in a cluster setup. The t1.clust table will be used to
refresh the local/cache one via the "write-to" statement.
What will happen, is that every time haproxy will see entry updates for
the t1.clust table: it will overwrite t1.local table with fresh data and
will update the entry expiration timer. If t1.local entry doesn't exist
yet (key doesn't exist), it will automatically create it. Note that only
types that cannot be used for arithmetic ops will be handled, and this
to prevent processed values from the remote table from interfering with
computations based on values from the local table. (ie: prevent
cumulative counters from growing indefinitely).
"write-to" will only push supported types if they both exist in the source
and the target table. Be careful with server_id and server_key storage
because they are often declared implicitly when referencing a table in
sticking rules but it is required to declare them explicitly for them to
be pushed between a remote and a local table through "write-to" option.
Also note that the "write-to" target table should have the same type as
the source one, and that the key length should be strictly equal,
otherwise haproxy will raise an error due to the tables being
incompatibles. A table that is already being written to cannot be used
as a source table for a "write-to" target.
Thanks to this patch, it will now be possible to use sticking rules in
peer cluster context by using a local table as a local cache which
will be automatically refreshed by one or multiple remote table(s).
This commit depends on:
- "MINOR: stktable: stktable_init() sets err_msg on error"
- "MINOR: stktable: check if a type should be used as-is"
2023-10-02 10:40:27 -04:00
if ( t - > write_to . name ) {
struct stktable * table ;
/* postresolve write_to table */
table = stktable_find_by_name ( t - > write_to . name ) ;
if ( ! table ) {
memprintf ( err_msg , " write-to: table '%s' doesn't exist " , t - > write_to . name ) ;
ha_free ( & t - > write_to . name ) ; /* no longer need this */
return 0 ;
}
ha_free ( & t - > write_to . name ) ; /* no longer need this */
if ( table - > write_to . ptr ) {
memprintf ( err_msg , " write-to: table '%s' is already used as a source table " , table - > id ) ;
return 0 ;
}
if ( table - > type ! = t - > type ) {
memprintf ( err_msg , " write-to: cannot mix table types ('%s' has '%s' type and '%s' has '%s' type) " ,
table - > id , stktable_types [ table - > type ] . kw ,
t - > id , stktable_types [ t - > type ] . kw ) ;
return 0 ;
}
if ( table - > key_size ! = t - > key_size ) {
memprintf ( err_msg , " write-to: cannot mix key sizes ('%s' has '%ld' key_size and '%s' has '%ld' key_size) " ,
table - > id , ( long ) table - > key_size ,
t - > id , ( long ) t - > key_size ) ;
return 0 ;
}
t - > write_to . t = table ;
}
2010-01-04 09:23:48 -05:00
return 1 ;
2023-11-02 13:34:51 -04:00
mem_error :
memprintf ( err_msg , " memory allocation error " ) ;
return 0 ;
2010-01-04 09:23:48 -05:00
}
2023-11-16 10:17:12 -05:00
/* Performs stick table cleanup: it's meant to be called after the table
* has been initialized ith stktable_init ( ) , else it will lead to undefined
* behavior .
*
* However it does not free the table pointer itself
*/
void stktable_deinit ( struct stktable * t )
{
if ( ! t )
return ;
2023-11-16 10:18:14 -05:00
task_destroy ( t - > exp_task ) ;
2023-11-16 10:17:12 -05:00
pool_destroy ( t - > pool ) ;
}
2010-01-04 09:23:48 -05:00
/*
* Configuration keywords of known table types
*/
2015-07-24 02:46:42 -04:00
struct stktable_type stktable_types [ SMP_TYPES ] = {
[ SMP_T_SINT ] = { " integer " , 0 , 4 } ,
[ SMP_T_IPV4 ] = { " ip " , 0 , 4 } ,
[ SMP_T_IPV6 ] = { " ipv6 " , 0 , 16 } ,
[ SMP_T_STR ] = { " string " , STK_F_CUSTOM_KEYSIZE , 32 } ,
[ SMP_T_BIN ] = { " binary " , STK_F_CUSTOM_KEYSIZE , 32 }
} ;
2010-01-04 09:23:48 -05:00
/*
* Parse table type configuration .
* Returns 0 on successful parsing , else 1.
* < myidx > is set at next configuration < args > index .
*/
2023-04-13 08:33:52 -04:00
int stktable_parse_type ( char * * args , int * myidx , unsigned long * type , size_t * key_size , const char * file , int linenum )
2010-01-04 09:23:48 -05:00
{
2015-07-24 02:46:42 -04:00
for ( * type = 0 ; * type < SMP_TYPES ; ( * type ) + + ) {
if ( ! stktable_types [ * type ] . kw )
continue ;
2010-01-04 09:23:48 -05:00
if ( strcmp ( args [ * myidx ] , stktable_types [ * type ] . kw ) ! = 0 )
continue ;
* key_size = stktable_types [ * type ] . default_size ;
( * myidx ) + + ;
2010-06-06 05:56:36 -04:00
if ( stktable_types [ * type ] . flags & STK_F_CUSTOM_KEYSIZE ) {
2010-01-04 09:23:48 -05:00
if ( strcmp ( " len " , args [ * myidx ] ) = = 0 ) {
2023-04-13 08:33:52 -04:00
char * stop ;
2010-01-04 09:23:48 -05:00
( * myidx ) + + ;
2023-04-13 08:33:52 -04:00
* key_size = strtol ( args [ * myidx ] , & stop , 10 ) ;
if ( * stop ! = ' \0 ' | | ! * key_size ) {
ha_alert ( " parsing [%s:%d] : 'len' expects a positive integer argument. \n " , file , linenum ) ;
return 1 ;
}
2015-07-24 02:46:42 -04:00
if ( * type = = SMP_T_STR ) {
2010-09-23 12:02:19 -04:00
/* null terminated string needs +1 for '\0'. */
( * key_size ) + + ;
}
2010-01-04 09:23:48 -05:00
( * myidx ) + + ;
}
}
return 0 ;
}
2023-04-13 08:33:52 -04:00
ha_alert ( " parsing [%s:%d] : %s: unknown type '%s'. \n " , file , linenum , args [ 0 ] , args [ * myidx ] ) ;
2010-01-04 09:23:48 -05:00
return 1 ;
}
2021-06-30 12:01:02 -04:00
/* reserve some space for data type <type>, there is 2 optionnals
* argument at < sa > and < sa2 > to configure this data type and
* they can be NULL if unused for a given type .
* Returns PE_NONE ( 0 ) if OK or an error code among :
2021-05-08 08:10:42 -04:00
* - PE_ENUM_OOR if < type > does not exist
* - PE_EXIST if < type > is already registered
2021-06-30 12:01:02 -04:00
* - PE_ARG_NOT_USE if < sa > / < sa2 > was provided but not expected
* - PE_ARG_MISSING if < sa > / < sa2 > was expected but not provided
* - PE_ARG_VALUE_OOR if type is an array and < sa > it out of array size range .
2021-05-08 08:10:42 -04:00
*/
2021-06-30 12:01:02 -04:00
int stktable_alloc_data_type ( struct stktable * t , int type , const char * sa , const char * sa2 )
2021-05-08 08:10:42 -04:00
{
if ( type > = STKTABLE_DATA_TYPES )
return PE_ENUM_OOR ;
if ( t - > data_ofs [ type ] )
/* already allocated */
return PE_EXIST ;
2021-06-30 12:01:02 -04:00
t - > data_nbelem [ type ] = 1 ;
if ( stktable_data_types [ type ] . is_array ) {
/* arrays take their element count on first argument */
if ( ! sa )
return PE_ARG_MISSING ;
t - > data_nbelem [ type ] = atoi ( sa ) ;
if ( ! t - > data_nbelem [ type ] | | ( t - > data_nbelem [ type ] > STKTABLE_MAX_DT_ARRAY_SIZE ) )
return PE_ARG_VALUE_OOR ;
sa = sa2 ;
}
2021-05-08 08:10:42 -04:00
switch ( stktable_data_types [ type ] . arg_type ) {
case ARG_T_NONE :
if ( sa )
return PE_ARG_NOT_USED ;
break ;
case ARG_T_INT :
if ( ! sa )
return PE_ARG_MISSING ;
t - > data_arg [ type ] . i = atoi ( sa ) ;
break ;
case ARG_T_DELAY :
if ( ! sa )
return PE_ARG_MISSING ;
sa = parse_time_err ( sa , & t - > data_arg [ type ] . u , TIME_UNIT_MS ) ;
if ( sa )
return PE_ARG_INVC ; /* invalid char */
break ;
}
2021-06-30 12:01:02 -04:00
t - > data_size + = t - > data_nbelem [ type ] * stktable_type_size ( stktable_data_types [ type ] . std_type ) ;
2021-05-08 08:10:42 -04:00
t - > data_ofs [ type ] = - t - > data_size ;
return PE_NONE ;
}
2019-03-08 08:47:00 -05:00
/*
2019-03-20 10:06:55 -04:00
* Parse a line with < linenum > as number in < file > configuration file to configure
* the stick - table with < t > as address and < id > as ID .
* < peers > provides the " peers " section pointer only if this function is called
* from a " peers " section .
* < nid > is the stick - table name which is sent over the network . It must be equal
* to < id > if this stick - table is parsed from a proxy section , and prefixed by < peers >
* " peers " section name followed by a ' / ' character if parsed from a " peers " section .
2020-04-07 16:07:56 -04:00
* This is the responsibility of the caller to check this .
2019-03-08 08:47:00 -05:00
* Return an error status with ERR_ * flags set if required , 0 if no error was encountered .
*/
int parse_stick_table ( const char * file , int linenum , char * * args ,
2019-03-20 10:06:55 -04:00
struct stktable * t , char * id , char * nid , struct peers * peers )
2019-03-08 08:47:00 -05:00
{
int err_code = 0 ;
int idx = 1 ;
unsigned int val ;
if ( ! id | | ! * id ) {
ha_alert ( " parsing [%s:%d] : %s: ID not provided. \n " , file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_ABORT ;
goto out ;
}
/* Store the "peers" section if this function is called from a "peers" section. */
if ( peers ) {
t - > peers . p = peers ;
idx + + ;
}
t - > id = id ;
2022-10-17 08:58:19 -04:00
t - > idlen = strlen ( id ) ;
2019-03-20 10:06:55 -04:00
t - > nid = nid ;
2019-03-08 08:47:00 -05:00
t - > type = ( unsigned int ) - 1 ;
t - > conf . file = file ;
t - > conf . line = linenum ;
MEDIUM: stktable/peers: "write-to" local table on peer updates
In this patch, we add the possibility to declare on a table definition
("table" in peer section, or "stick-table" in proxy section) that we
want the remote/peer updates on that table to be pushed on a local
haproxy table in addition to the source table.
Consider this example:
|peers mypeers
| peer local 127.0.0.1:3334
| peer clust 127.0.0.1:3333
| table t1.local type string size 10m store server_id,server_key expire 30s
| table t1.clust type string size 10m store server_id,server_key write-to mypeers/t1.local expire 30s
With this setup, we consider haproxy uses t1.local as cache/local table
for read and write operations, and that t1.clust is a remote table
containing datas processed from t1.local and similar tables from other
haproxy peers in a cluster setup. The t1.clust table will be used to
refresh the local/cache one via the "write-to" statement.
What will happen, is that every time haproxy will see entry updates for
the t1.clust table: it will overwrite t1.local table with fresh data and
will update the entry expiration timer. If t1.local entry doesn't exist
yet (key doesn't exist), it will automatically create it. Note that only
types that cannot be used for arithmetic ops will be handled, and this
to prevent processed values from the remote table from interfering with
computations based on values from the local table. (ie: prevent
cumulative counters from growing indefinitely).
"write-to" will only push supported types if they both exist in the source
and the target table. Be careful with server_id and server_key storage
because they are often declared implicitly when referencing a table in
sticking rules but it is required to declare them explicitly for them to
be pushed between a remote and a local table through "write-to" option.
Also note that the "write-to" target table should have the same type as
the source one, and that the key length should be strictly equal,
otherwise haproxy will raise an error due to the tables being
incompatibles. A table that is already being written to cannot be used
as a source table for a "write-to" target.
Thanks to this patch, it will now be possible to use sticking rules in
peer cluster context by using a local table as a local cache which
will be automatically refreshed by one or multiple remote table(s).
This commit depends on:
- "MINOR: stktable: stktable_init() sets err_msg on error"
- "MINOR: stktable: check if a type should be used as-is"
2023-10-02 10:40:27 -04:00
t - > write_to . name = NULL ;
2019-03-08 08:47:00 -05:00
while ( * args [ idx ] ) {
const char * err ;
if ( strcmp ( args [ idx ] , " size " ) = = 0 ) {
idx + + ;
if ( ! * ( args [ idx ] ) ) {
ha_alert ( " parsing [%s:%d] : %s: missing argument after '%s'. \n " ,
file , linenum , args [ 0 ] , args [ idx - 1 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
if ( ( err = parse_size_err ( args [ idx ] , & t - > size ) ) ) {
ha_alert ( " parsing [%s:%d] : %s: unexpected character '%c' in argument of '%s'. \n " ,
file , linenum , args [ 0 ] , * err , args [ idx - 1 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
idx + + ;
}
/* This argument does not exit in "peers" section. */
else if ( ! peers & & strcmp ( args [ idx ] , " peers " ) = = 0 ) {
idx + + ;
if ( ! * ( args [ idx ] ) ) {
ha_alert ( " parsing [%s:%d] : %s: missing argument after '%s'. \n " ,
file , linenum , args [ 0 ] , args [ idx - 1 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
2023-11-02 04:18:55 -04:00
ha_free ( & t - > peers . name ) ;
2019-03-08 08:47:00 -05:00
t - > peers . name = strdup ( args [ idx + + ] ) ;
}
else if ( strcmp ( args [ idx ] , " expire " ) = = 0 ) {
idx + + ;
if ( ! * ( args [ idx ] ) ) {
ha_alert ( " parsing [%s:%d] : %s: missing argument after '%s'. \n " ,
file , linenum , args [ 0 ] , args [ idx - 1 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
err = parse_time_err ( args [ idx ] , & val , TIME_UNIT_MS ) ;
2019-06-07 13:00:37 -04:00
if ( err = = PARSE_TIME_OVER ) {
ha_alert ( " parsing [%s:%d]: %s: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days). \n " ,
file , linenum , args [ 0 ] , args [ idx ] , args [ idx - 1 ] ) ;
2019-03-08 08:47:00 -05:00
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
2019-06-07 13:00:37 -04:00
else if ( err = = PARSE_TIME_UNDER ) {
ha_alert ( " parsing [%s:%d]: %s: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms. \n " ,
file , linenum , args [ 0 ] , args [ idx ] , args [ idx - 1 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
else if ( err ) {
ha_alert ( " parsing [%s:%d] : %s: unexpected character '%c' in argument of '%s'. \n " ,
file , linenum , args [ 0 ] , * err , args [ idx - 1 ] ) ;
2019-03-08 08:47:00 -05:00
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
t - > expire = val ;
idx + + ;
}
else if ( strcmp ( args [ idx ] , " nopurge " ) = = 0 ) {
t - > nopurge = 1 ;
idx + + ;
}
else if ( strcmp ( args [ idx ] , " type " ) = = 0 ) {
idx + + ;
2023-04-13 08:33:52 -04:00
if ( stktable_parse_type ( args , & idx , & t - > type , & t - > key_size , file , linenum ) ! = 0 ) {
2019-03-08 08:47:00 -05:00
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
/* idx already points to next arg */
}
else if ( strcmp ( args [ idx ] , " store " ) = = 0 ) {
int type , err ;
2021-06-30 12:01:02 -04:00
char * cw , * nw , * sa , * sa2 ;
2019-03-08 08:47:00 -05:00
idx + + ;
nw = args [ idx ] ;
while ( * nw ) {
/* the "store" keyword supports a comma-separated list */
cw = nw ;
sa = NULL ; /* store arg */
2021-06-30 12:01:02 -04:00
sa2 = NULL ;
2019-03-08 08:47:00 -05:00
while ( * nw & & * nw ! = ' , ' ) {
if ( * nw = = ' ( ' ) {
* nw = 0 ;
sa = + + nw ;
while ( * nw ! = ' ) ' ) {
if ( ! * nw ) {
ha_alert ( " parsing [%s:%d] : %s: missing closing parenthesis after store option '%s'. \n " ,
file , linenum , args [ 0 ] , cw ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
2021-06-30 12:01:02 -04:00
if ( * nw = = ' , ' ) {
* nw = ' \0 ' ;
sa2 = nw + 1 ;
}
2019-03-08 08:47:00 -05:00
nw + + ;
}
* nw = ' \0 ' ;
}
nw + + ;
}
if ( * nw )
* nw + + = ' \0 ' ;
type = stktable_get_data_type ( cw ) ;
if ( type < 0 ) {
ha_alert ( " parsing [%s:%d] : %s: unknown store option '%s'. \n " ,
file , linenum , args [ 0 ] , cw ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
2021-06-30 12:01:02 -04:00
err = stktable_alloc_data_type ( t , type , sa , sa2 ) ;
2019-03-08 08:47:00 -05:00
switch ( err ) {
case PE_NONE : break ;
case PE_EXIST :
ha_warning ( " parsing [%s:%d]: %s: store option '%s' already enabled, ignored. \n " ,
file , linenum , args [ 0 ] , cw ) ;
err_code | = ERR_WARN ;
break ;
case PE_ARG_MISSING :
ha_alert ( " parsing [%s:%d] : %s: missing argument to store option '%s'. \n " ,
file , linenum , args [ 0 ] , cw ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
case PE_ARG_NOT_USED :
ha_alert ( " parsing [%s:%d] : %s: unexpected argument to store option '%s'. \n " ,
file , linenum , args [ 0 ] , cw ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
2021-06-30 12:01:02 -04:00
case PE_ARG_VALUE_OOR :
ha_alert ( " parsing [%s:%d] : %s: array size is out of allowed range (1-%d) for store option '%s'. \n " ,
file , linenum , args [ 0 ] , STKTABLE_MAX_DT_ARRAY_SIZE , cw ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
2019-03-08 08:47:00 -05:00
default :
ha_alert ( " parsing [%s:%d] : %s: error when processing store option '%s'. \n " ,
file , linenum , args [ 0 ] , cw ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
}
idx + + ;
2021-06-30 12:58:22 -04:00
if ( t - > data_ofs [ STKTABLE_DT_GPT ] & & t - > data_ofs [ STKTABLE_DT_GPT0 ] ) {
ha_alert ( " parsing [%s:%d] : %s: simultaneous usage of 'gpt' and 'gpt0' in a same table is not permitted as 'gpt' overrides 'gpt0'. \n " ,
file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
2021-06-30 13:06:43 -04:00
else if ( t - > data_ofs [ STKTABLE_DT_GPC ] & & ( t - > data_ofs [ STKTABLE_DT_GPC0 ] | | t - > data_ofs [ STKTABLE_DT_GPC1 ] ) ) {
ha_alert ( " parsing [%s:%d] : %s: simultaneous usage of 'gpc' and 'gpc[0/1]' in a same table is not permitted as 'gpc' overrides 'gpc[0/1]'. \n " ,
file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
else if ( t - > data_ofs [ STKTABLE_DT_GPC_RATE ] & & ( t - > data_ofs [ STKTABLE_DT_GPC0_RATE ] | | t - > data_ofs [ STKTABLE_DT_GPC1_RATE ] ) ) {
ha_alert ( " parsing [%s:%d] : %s: simultaneous usage of 'gpc_rate' and 'gpc[0/1]_rate' in a same table is not permitted as 'gpc_rate' overrides 'gpc[0/1]_rate'. \n " ,
file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
2019-03-08 08:47:00 -05:00
}
2020-11-20 03:28:26 -05:00
else if ( strcmp ( args [ idx ] , " srvkey " ) = = 0 ) {
char * keytype ;
idx + + ;
keytype = args [ idx ] ;
if ( strcmp ( keytype , " name " ) = = 0 ) {
t - > server_key_type = STKTABLE_SRV_NAME ;
}
else if ( strcmp ( keytype , " addr " ) = = 0 ) {
t - > server_key_type = STKTABLE_SRV_ADDR ;
}
else {
ha_alert ( " parsing [%s:%d] : %s : unknown server key type '%s'. \n " ,
file , linenum , args [ 0 ] , keytype ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
idx + + ;
}
MEDIUM: stktable/peers: "write-to" local table on peer updates
In this patch, we add the possibility to declare on a table definition
("table" in peer section, or "stick-table" in proxy section) that we
want the remote/peer updates on that table to be pushed on a local
haproxy table in addition to the source table.
Consider this example:
|peers mypeers
| peer local 127.0.0.1:3334
| peer clust 127.0.0.1:3333
| table t1.local type string size 10m store server_id,server_key expire 30s
| table t1.clust type string size 10m store server_id,server_key write-to mypeers/t1.local expire 30s
With this setup, we consider haproxy uses t1.local as cache/local table
for read and write operations, and that t1.clust is a remote table
containing datas processed from t1.local and similar tables from other
haproxy peers in a cluster setup. The t1.clust table will be used to
refresh the local/cache one via the "write-to" statement.
What will happen, is that every time haproxy will see entry updates for
the t1.clust table: it will overwrite t1.local table with fresh data and
will update the entry expiration timer. If t1.local entry doesn't exist
yet (key doesn't exist), it will automatically create it. Note that only
types that cannot be used for arithmetic ops will be handled, and this
to prevent processed values from the remote table from interfering with
computations based on values from the local table. (ie: prevent
cumulative counters from growing indefinitely).
"write-to" will only push supported types if they both exist in the source
and the target table. Be careful with server_id and server_key storage
because they are often declared implicitly when referencing a table in
sticking rules but it is required to declare them explicitly for them to
be pushed between a remote and a local table through "write-to" option.
Also note that the "write-to" target table should have the same type as
the source one, and that the key length should be strictly equal,
otherwise haproxy will raise an error due to the tables being
incompatibles. A table that is already being written to cannot be used
as a source table for a "write-to" target.
Thanks to this patch, it will now be possible to use sticking rules in
peer cluster context by using a local table as a local cache which
will be automatically refreshed by one or multiple remote table(s).
This commit depends on:
- "MINOR: stktable: stktable_init() sets err_msg on error"
- "MINOR: stktable: check if a type should be used as-is"
2023-10-02 10:40:27 -04:00
else if ( strcmp ( args [ idx ] , " write-to " ) = = 0 ) {
char * write_to ;
idx + + ;
write_to = args [ idx ] ;
if ( ! write_to [ 0 ] ) {
ha_alert ( " parsing [%s:%d] : %s : write-to requires table name. \n " ,
file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
ha_free ( & t - > write_to . name ) ;
t - > write_to . name = strdup ( write_to ) ;
idx + + ;
}
2019-03-08 08:47:00 -05:00
else {
ha_alert ( " parsing [%s:%d] : %s: unknown argument '%s'. \n " ,
file , linenum , args [ 0 ] , args [ idx ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
}
if ( ! t - > size ) {
ha_alert ( " parsing [%s:%d] : %s: missing size. \n " ,
file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
if ( t - > type = = ( unsigned int ) - 1 ) {
ha_alert ( " parsing [%s:%d] : %s: missing type. \n " ,
file , linenum , args [ 0 ] ) ;
err_code | = ERR_ALERT | ERR_FATAL ;
goto out ;
}
out :
return err_code ;
}
2014-07-03 11:02:46 -04:00
/* Prepares a stktable_key from a sample <smp> to search into table <t>.
2016-05-25 11:07:56 -04:00
* Note that the sample * is * modified and that the returned key may point
* to it , so the sample must not be modified afterwards before the lookup .
2014-07-03 11:02:46 -04:00
* Returns NULL if the sample could not be converted ( eg : no matching type ) ,
* otherwise a pointer to the static stktable_key filled with what is needed
* for the lookup .
2010-06-06 07:22:23 -04:00
*/
2014-07-03 11:02:46 -04:00
struct stktable_key * smp_to_stkey ( struct sample * smp , struct stktable * t )
2010-06-06 07:22:23 -04:00
{
2015-08-10 11:53:45 -04:00
/* Convert sample. */
2015-07-24 02:46:42 -04:00
if ( ! sample_convert ( smp , t - > type ) )
2012-04-26 05:03:17 -04:00
return NULL ;
2015-08-10 11:53:45 -04:00
/* Fill static_table_key. */
switch ( t - > type ) {
2015-07-24 02:46:42 -04:00
case SMP_T_IPV4 :
2017-08-29 09:30:31 -04:00
static_table_key . key = & smp - > data . u . ipv4 ;
static_table_key . key_len = 4 ;
2015-08-10 11:53:45 -04:00
break ;
2015-07-24 02:46:42 -04:00
case SMP_T_IPV6 :
2017-08-29 09:30:31 -04:00
static_table_key . key = & smp - > data . u . ipv6 ;
static_table_key . key_len = 16 ;
2015-08-10 11:53:45 -04:00
break ;
2015-07-24 02:46:42 -04:00
case SMP_T_SINT :
2015-08-10 11:53:45 -04:00
/* The stick table require a 32bit unsigned int, "sint" is a
* signed 64 it , so we can convert it inplace .
*/
2019-10-23 00:21:05 -04:00
smp - > data . u . sint = ( unsigned int ) smp - > data . u . sint ;
2017-08-29 09:30:31 -04:00
static_table_key . key = & smp - > data . u . sint ;
static_table_key . key_len = 4 ;
2015-08-10 11:53:45 -04:00
break ;
2015-07-24 02:46:42 -04:00
case SMP_T_STR :
2016-08-09 05:59:12 -04:00
if ( ! smp_make_safe ( smp ) )
return NULL ;
2018-07-13 04:54:26 -04:00
static_table_key . key = smp - > data . u . str . area ;
static_table_key . key_len = smp - > data . u . str . data ;
2015-08-10 11:53:45 -04:00
break ;
2015-07-24 02:46:42 -04:00
case SMP_T_BIN :
2018-07-13 04:54:26 -04:00
if ( smp - > data . u . str . data < t - > key_size ) {
2015-08-10 11:53:45 -04:00
/* This type needs padding with 0. */
2016-08-09 06:08:41 -04:00
if ( ! smp_make_rw ( smp ) )
return NULL ;
2015-08-10 11:53:45 -04:00
if ( smp - > data . u . str . size < t - > key_size )
if ( ! smp_dup ( smp ) )
return NULL ;
if ( smp - > data . u . str . size < t - > key_size )
return NULL ;
2018-07-13 04:54:26 -04:00
memset ( smp - > data . u . str . area + smp - > data . u . str . data , 0 ,
t - > key_size - smp - > data . u . str . data ) ;
smp - > data . u . str . data = t - > key_size ;
2010-09-23 12:02:19 -04:00
}
2018-07-13 04:54:26 -04:00
static_table_key . key = smp - > data . u . str . area ;
static_table_key . key_len = smp - > data . u . str . data ;
2015-08-10 11:53:45 -04:00
break ;
2010-09-23 12:02:19 -04:00
2015-08-10 11:53:45 -04:00
default : /* impossible case. */
return NULL ;
2010-09-23 12:02:19 -04:00
}
2017-08-29 09:30:31 -04:00
return & static_table_key ;
2010-06-06 07:22:23 -04:00
}
2014-07-03 11:02:46 -04:00
/*
* Process a fetch + format conversion as defined by the sample expression < expr >
* on request or response considering the < opt > parameter . Returns either NULL if
* no key could be extracted , or a pointer to the converted result stored in
* static_table_key in format < table_type > . If < smp > is not NULL , it will be reset
* and its flags will be initialized so that the caller gets a copy of the input
2014-07-30 02:56:35 -04:00
* sample , and knows why it was not accepted ( eg : SMP_F_MAY_CHANGE is present
* without SMP_OPT_FINAL ) . The output will be usable like this :
*
* return MAY_CHANGE FINAL Meaning for the sample
* NULL 0 * Not present and will never be ( eg : header )
* NULL 1 0 Not present or unstable , could change ( eg : req_len )
* NULL 1 1 Not present , will not change anymore
* smp 0 * Present and will not change ( eg : header )
* smp 1 0 not possible
* smp 1 1 Present , last known value ( eg : request length )
2014-07-03 11:02:46 -04:00
*/
2015-04-03 19:47:55 -04:00
struct stktable_key * stktable_fetch_key ( struct stktable * t , struct proxy * px , struct session * sess , struct stream * strm ,
2014-07-03 11:02:46 -04:00
unsigned int opt , struct sample_expr * expr , struct sample * smp )
{
if ( smp )
memset ( smp , 0 , sizeof ( * smp ) ) ;
2015-04-03 19:47:55 -04:00
smp = sample_process ( px , sess , strm , opt , expr , smp ) ;
2014-07-03 11:02:46 -04:00
if ( ! smp )
return NULL ;
if ( ( smp - > flags & SMP_F_MAY_CHANGE ) & & ! ( opt & SMP_OPT_FINAL ) )
return NULL ; /* we can only use stable samples */
return smp_to_stkey ( smp , t ) ;
}
2010-06-06 07:22:23 -04:00
/*
2012-04-27 15:37:17 -04:00
* Returns 1 if sample expression < expr > result can be converted to table key of
2010-06-06 07:22:23 -04:00
* type < table_type > , otherwise zero . Used in configuration check .
*/
2012-04-27 15:37:17 -04:00
int stktable_compatible_sample ( struct sample_expr * expr , unsigned long table_type )
2010-06-06 07:22:23 -04:00
{
2013-11-27 09:30:55 -05:00
int out_type ;
2015-07-24 02:46:42 -04:00
if ( table_type > = SMP_TYPES | | ! stktable_types [ table_type ] . kw )
2010-06-06 07:22:23 -04:00
return 0 ;
2013-11-27 09:30:55 -05:00
out_type = smp_expr_output_type ( expr ) ;
2015-08-10 11:53:45 -04:00
/* Convert sample. */
if ( ! sample_casts [ out_type ] [ table_type ] )
2013-11-27 09:30:55 -05:00
return 0 ;
2010-06-06 07:22:23 -04:00
return 1 ;
}
2010-01-04 09:23:48 -05:00
2014-07-15 10:44:27 -04:00
/* Extra data types processing : after the last one, some room may remain
* before STKTABLE_DATA_TYPES that may be used to register extra data types
* at run time .
*/
2010-06-06 07:34:54 -04:00
struct stktable_data_type stktable_data_types [ STKTABLE_DATA_TYPES ] = {
2023-10-26 11:46:02 -04:00
[ STKTABLE_DT_SERVER_ID ] = { . name = " server_id " , . std_type = STD_T_SINT , . as_is = 1 } ,
[ STKTABLE_DT_GPT0 ] = { . name = " gpt0 " , . std_type = STD_T_UINT , . as_is = 1 } ,
2010-07-18 02:04:30 -04:00
[ STKTABLE_DT_GPC0 ] = { . name = " gpc0 " , . std_type = STD_T_UINT } ,
2013-05-29 09:54:14 -04:00
[ STKTABLE_DT_GPC0_RATE ] = { . name = " gpc0_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
2010-07-18 02:04:30 -04:00
[ STKTABLE_DT_CONN_CNT ] = { . name = " conn_cnt " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_CONN_RATE ] = { . name = " conn_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
2021-10-08 11:53:12 -04:00
[ STKTABLE_DT_CONN_CUR ] = { . name = " conn_cur " , . std_type = STD_T_UINT , . is_local = 1 } ,
2010-07-18 02:04:30 -04:00
[ STKTABLE_DT_SESS_CNT ] = { . name = " sess_cnt " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_SESS_RATE ] = { . name = " sess_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
[ STKTABLE_DT_HTTP_REQ_CNT ] = { . name = " http_req_cnt " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_HTTP_REQ_RATE ] = { . name = " http_req_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
[ STKTABLE_DT_HTTP_ERR_CNT ] = { . name = " http_err_cnt " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_HTTP_ERR_RATE ] = { . name = " http_err_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
[ STKTABLE_DT_BYTES_IN_CNT ] = { . name = " bytes_in_cnt " , . std_type = STD_T_ULL } ,
[ STKTABLE_DT_BYTES_IN_RATE ] = { . name = " bytes_in_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
[ STKTABLE_DT_BYTES_OUT_CNT ] = { . name = " bytes_out_cnt " , . std_type = STD_T_ULL } ,
[ STKTABLE_DT_BYTES_OUT_RATE ] = { . name = " bytes_out_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
2018-01-29 09:22:53 -05:00
[ STKTABLE_DT_GPC1 ] = { . name = " gpc1 " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_GPC1_RATE ] = { . name = " gpc1_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
2023-10-26 11:46:02 -04:00
[ STKTABLE_DT_SERVER_KEY ] = { . name = " server_key " , . std_type = STD_T_DICT , . as_is = 1 } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
[ STKTABLE_DT_HTTP_FAIL_CNT ] = { . name = " http_fail_cnt " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_HTTP_FAIL_RATE ] = { . name = " http_fail_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
2023-10-26 11:46:02 -04:00
[ STKTABLE_DT_GPT ] = { . name = " gpt " , . std_type = STD_T_UINT , . is_array = 1 , . as_is = 1 } ,
2021-06-30 13:04:16 -04:00
[ STKTABLE_DT_GPC ] = { . name = " gpc " , . std_type = STD_T_UINT , . is_array = 1 } ,
[ STKTABLE_DT_GPC_RATE ] = { . name = " gpc_rate " , . std_type = STD_T_FRQP , . is_array = 1 , . arg_type = ARG_T_DELAY } ,
2024-01-19 11:23:07 -05:00
[ STKTABLE_DT_GLITCH_CNT ] = { . name = " glitch_cnt " , . std_type = STD_T_UINT } ,
[ STKTABLE_DT_GLITCH_RATE ] = { . name = " glitch_rate " , . std_type = STD_T_FRQP , . arg_type = ARG_T_DELAY } ,
2010-06-06 07:34:54 -04:00
} ;
2014-07-15 10:44:27 -04:00
/* Registers stick-table extra data type with index <idx>, name <name>, type
* < std_type > and arg type < arg_type > . If the index is negative , the next free
* index is automatically allocated . The allocated index is returned , or - 1 if
* no free index was found or < name > was already registered . The < name > is used
* directly as a pointer , so if it ' s not stable , the caller must allocate it .
*/
int stktable_register_data_store ( int idx , const char * name , int std_type , int arg_type )
{
if ( idx < 0 ) {
for ( idx = 0 ; idx < STKTABLE_DATA_TYPES ; idx + + ) {
if ( ! stktable_data_types [ idx ] . name )
break ;
if ( strcmp ( stktable_data_types [ idx ] . name , name ) = = 0 )
return - 1 ;
}
}
if ( idx > = STKTABLE_DATA_TYPES )
return - 1 ;
if ( stktable_data_types [ idx ] . name ! = NULL )
return - 1 ;
stktable_data_types [ idx ] . name = name ;
stktable_data_types [ idx ] . std_type = std_type ;
stktable_data_types [ idx ] . arg_type = arg_type ;
return idx ;
}
2010-06-06 07:34:54 -04:00
/*
* Returns the data type number for the stktable_data_type whose name is < name > ,
* or < 0 if not found .
*/
int stktable_get_data_type ( char * name )
{
int type ;
for ( type = 0 ; type < STKTABLE_DATA_TYPES ; type + + ) {
2014-07-15 10:44:27 -04:00
if ( ! stktable_data_types [ type ] . name )
continue ;
2010-06-06 07:34:54 -04:00
if ( strcmp ( name , stktable_data_types [ type ] . name ) = = 0 )
return type ;
}
2020-11-20 03:28:26 -05:00
/* For backwards compatibility */
if ( strcmp ( name , " server_name " ) = = 0 )
return STKTABLE_DT_SERVER_KEY ;
2010-06-06 07:34:54 -04:00
return - 1 ;
}
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns true if found , false otherwise . The input
* type is STR so that input samples are converted to string ( since all types
* can be converted to strings ) , then the function casts the string again into
* the table ' s type . This is a double conversion , but in the future we might
* support automatic input types to perform the cast on the fly .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_in_table ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_BOOL ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = ! ! ts ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2018-06-27 00:25:57 -04:00
stktable_release ( t , ts ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
return 1 ;
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the data rate received from clients in bytes / s
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_bytes_in_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_BYTES_IN_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_BYTES_IN_RATE ] . u ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the cumulated number of connections for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_conn_cnt ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_CONN_CNT ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the number of concurrent connections for the
* key if the key is present in the table , otherwise zero , so that comparisons
* can be easily performed . If the inspected parameter is not stored in the
* table , < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_conn_cur ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_CONN_CUR ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the rate of incoming connections from the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_conn_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_CONN_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_CONN_RATE ] . u ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
2022-08-16 12:11:25 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the expiration delay for the key if the key is
* present in the table , otherwise the default value provided as second argument
* if any , if not ( no default value ) , < not found > is returned .
*/
static int sample_conv_table_expire ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
t = arg_p [ 0 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) { /* key not present */
if ( arg_p [ 1 ] . type = = ARGT_STOP )
return 0 ;
/* default value */
smp - > data . u . sint = arg_p [ 1 ] . data . sint ;
return 1 ;
}
smp - > data . u . sint = tick_remain ( now_ms , ts - > expire ) ;
stktable_release ( t , ts ) ;
return 1 ;
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the time the key remains unused if the key is
* present in the table , otherwise the default value provided as second argument
* if any , if not ( no default value ) , < not found > is returned .
*/
static int sample_conv_table_idle ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
t = arg_p [ 0 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) { /* key not present */
if ( arg_p [ 1 ] . type = = ARGT_STOP )
return 0 ;
/* default value */
smp - > data . u . sint = arg_p [ 1 ] . data . sint ;
return 1 ;
}
smp - > data . u . sint = tick_remain ( tick_remain ( now_ms , ts - > expire ) , t - > expire ) ;
stktable_release ( t , ts ) ;
return 1 ;
}
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the data rate sent to clients in bytes / s
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_bytes_out_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_BYTES_OUT_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_BYTES_OUT_RATE ] . u ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
2024-01-19 11:23:07 -05:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the cumulated number of front glitches for the
* key if the key is present in the table , otherwise zero , so that comparisons
* can be easily performed . If the inspected parameter is not stored in the
* table , < not found > is returned .
*/
static int sample_conv_table_glitch_cnt ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
t = arg_p [ 0 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GLITCH_CNT ) ;
if ( ptr )
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the front glitch rate the key if the key is
* present in the table , otherwise zero , so that comparisons can be easily
* performed . If the inspected parameter is not stored in the table , < not found >
* is returned .
*/
static int sample_conv_table_glitch_rate ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
t = arg_p [ 0 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GLITCH_RATE ) ;
if ( ptr )
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
t - > data_arg [ STKTABLE_DT_GLITCH_RATE ] . u ) ;
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
2021-06-30 12:57:49 -04:00
/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
* it up into this table . Returns the value of the GPT [ arg_p ( 0 ) ] tag for the key
* if the key is present in the table , otherwise false , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
static int sample_conv_table_gpt ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
unsigned int idx ;
idx = arg_p [ 0 ] . data . sint ;
t = arg_p [ 1 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPT , idx ) ;
if ( ptr )
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
2015-08-19 02:25:14 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the value of the GPT0 tag for the key
* if the key is present in the table , otherwise false , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
static int sample_conv_table_gpt0 ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
2015-08-19 02:25:14 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
2015-08-19 02:25:14 -04:00
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GPT0 ) ;
2021-06-30 12:58:22 -04:00
if ( ! ptr )
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPT , 0 ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2015-08-19 02:25:14 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
2015-08-19 02:25:14 -04:00
}
2021-06-30 13:04:16 -04:00
/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
* it up into this table . Returns the value of the GPC [ arg_p ( 0 ) ] counter for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
static int sample_conv_table_gpc ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
unsigned int idx ;
idx = arg_p [ 0 ] . data . sint ;
t = arg_p [ 1 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPC , idx ) ;
if ( ptr )
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
* it up into this table . Returns the event rate of the GPC [ arg_p ( 0 ) ] counter
* for the key if the key is present in the table , otherwise zero , so that
* comparisons can be easily performed . If the inspected parameter is not
* stored in the table , < not found > is returned .
*/
static int sample_conv_table_gpc_rate ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
unsigned int idx ;
idx = arg_p [ 0 ] . data . sint ;
t = arg_p [ 1 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPC_RATE , idx ) ;
if ( ptr )
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
t - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ) ;
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the value of the GPC0 counter for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_gpc0 ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GPC0 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr ) {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPC , 0 ) ;
}
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the event rate of the GPC0 counter for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_gpc0_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GPC0_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_GPC0_RATE ] . u ) ;
2021-06-30 13:06:43 -04:00
else {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPC_RATE , 0 ) ;
if ( ptr )
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
t - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ) ;
}
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
2018-01-29 09:22:53 -05:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the value of the GPC1 counter for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
static int sample_conv_table_gpc1 ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
2018-01-29 09:22:53 -05:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GPC1 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr ) {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPC , 1 ) ;
}
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2018-01-29 09:22:53 -05:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
2018-01-29 09:22:53 -05:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the event rate of the GPC1 counter for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
static int sample_conv_table_gpc1_rate ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
2018-01-29 09:22:53 -05:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_GPC1_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_GPC1_RATE ] . u ) ;
2021-06-30 13:06:43 -04:00
else {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( t , ts , STKTABLE_DT_GPC_RATE , 1 ) ;
if ( ptr )
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
t - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ) ;
}
2018-01-29 09:22:53 -05:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
2018-01-29 09:22:53 -05:00
}
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the cumulated number of HTTP request errors
* for the key if the key is present in the table , otherwise zero , so that
* comparisons can be easily performed . If the inspected parameter is not stored
* in the table , < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_http_err_cnt ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_HTTP_ERR_CNT ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the HTTP request error rate the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_http_err_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_HTTP_ERR_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_HTTP_ERR_RATE ] . u ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the cumulated number of HTTP response failures
* for the key if the key is present in the table , otherwise zero , so that
* comparisons can be easily performed . If the inspected parameter is not stored
* in the table , < not found > is returned .
*/
static int sample_conv_table_http_fail_cnt ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
t = arg_p [ 0 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_HTTP_FAIL_CNT ) ;
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the HTTP response failure rate for the key
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
static int sample_conv_table_http_fail_rate ( const struct arg * arg_p , struct sample * smp , void * private )
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
t = arg_p [ 0 ] . data . t ;
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
ts = stktable_lookup_key ( t , key ) ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_HTTP_FAIL_RATE ) ;
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
t - > data_arg [ STKTABLE_DT_HTTP_FAIL_RATE ] . u ) ;
stktable_release ( t , ts ) ;
return ! ! ptr ;
}
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the cumulated number of HTTP request for the
* key if the key is present in the table , otherwise zero , so that comparisons
* can be easily performed . If the inspected parameter is not stored in the
* table , < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_http_req_cnt ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_HTTP_REQ_CNT ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the HTTP request rate the key if the key is
* present in the table , otherwise zero , so that comparisons can be easily
* performed . If the inspected parameter is not stored in the table , < not found >
* is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_http_req_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_HTTP_REQ_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_HTTP_REQ_RATE ] . u ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the volume of datareceived from clients in kbytes
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_kbytes_in ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_BYTES_IN_CNT ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_ull ) > > 10 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the volume of data sent to clients in kbytes
* if the key is present in the table , otherwise zero , so that comparisons can
* be easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_kbytes_out ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_BYTES_OUT_CNT ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_ull ) > > 10 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the server ID associated with the key if the
* key is present in the table , otherwise zero , so that comparisons can be
* easily performed . If the inspected parameter is not stored in the table ,
* < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_server_id ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_SERVER_ID ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_sint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the cumulated number of sessions for the
* key if the key is present in the table , otherwise zero , so that comparisons
* can be easily performed . If the inspected parameter is not stored in the
* table , < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_sess_cnt ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_SESS_CNT ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the session rate the key if the key is
* present in the table , otherwise zero , so that comparisons can be easily
* performed . If the inspected parameter is not stored in the table , < not found >
* is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_sess_rate ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
void * ptr ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
if ( ! ts ) /* key not present */
return 1 ;
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_SESS_RATE ) ;
2018-05-27 09:47:12 -04:00
if ( ptr )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2018-05-27 09:47:12 -04:00
t - > data_arg [ STKTABLE_DT_SESS_RATE ] . u ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
return ! ! ptr ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
}
/* Casts sample <smp> to the type of the table specified in arg(0), and looks
* it up into this table . Returns the amount of concurrent connections tracking
* the same key if the key is present in the table , otherwise zero , so that
* comparisons can be easily performed . If the inspected parameter is not
* stored in the table , < not found > is returned .
*/
2015-05-11 09:20:49 -04:00
static int sample_conv_table_trackers ( const struct arg * arg_p , struct sample * smp , void * private )
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{
struct stktable * t ;
struct stktable_key * key ;
struct stksess * ts ;
2019-03-14 02:07:41 -04:00
t = arg_p [ 0 ] . data . t ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
key = smp_to_stkey ( smp , t ) ;
if ( ! key )
return 0 ;
2016-05-25 11:07:56 -04:00
ts = stktable_lookup_key ( t , key ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
smp - > flags = SMP_F_VOL_TEST ;
2015-08-19 03:00:18 -04:00
smp - > data . type = SMP_T_SINT ;
2015-08-19 03:07:19 -04:00
smp - > data . u . sint = 0 ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-06-26 09:57:29 -04:00
if ( ! ts )
return 1 ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
smp - > data . u . sint = HA_ATOMIC_LOAD ( & ts - > ref_cnt ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
2018-05-27 09:47:12 -04:00
stktable_release ( t , ts ) ;
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
return 1 ;
}
2021-06-30 13:04:16 -04:00
/* This function increments the gpc counter at index 'rule->arg.gpc.idx' of the
* array on the tracksc counter of index ' rule - > arg . gpc . sc ' stored into the
* < stream > or directly in the session < sess > if < stream > is set to NULL
*
* This function always returns ACT_RET_CONT and parameter flags is unused .
*/
static enum act_return action_inc_gpc ( struct act_rule * rule , struct proxy * px ,
struct session * sess , struct stream * s , int flags )
2015-08-04 02:20:33 -04:00
{
struct stksess * ts ;
struct stkctr * stkctr ;
/* Extract the stksess, return OK if no stksess available. */
if ( s )
stkctr = & s - > stkctr [ rule - > arg . gpc . sc ] ;
else
stkctr = & sess - > stkctr [ rule - > arg . gpc . sc ] ;
2016-01-25 08:54:45 -05:00
2015-08-04 02:20:33 -04:00
ts = stkctr_entry ( stkctr ) ;
2016-01-25 08:54:45 -05:00
if ( ts ) {
void * ptr1 , * ptr2 ;
2015-08-04 02:20:33 -04:00
2021-06-30 13:04:16 -04:00
/* First, update gpc_rate if it's tracked. Second, update its gpc if tracked. */
ptr1 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC_RATE , rule - > arg . gpc . idx ) ;
ptr2 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC , rule - > arg . gpc . idx ) ;
2017-06-13 13:37:32 -04:00
if ( ptr1 | | ptr2 ) {
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( ptr1 )
2021-06-30 11:18:28 -04:00
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
2021-06-30 13:04:16 -04:00
stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u , 1 ) ;
2016-01-25 08:54:45 -05:00
2017-06-13 13:37:32 -04:00
if ( ptr2 )
2021-06-30 11:18:28 -04:00
stktable_data_cast ( ptr2 , std_t_uint ) + + ;
2015-08-04 02:20:33 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2017-06-13 13:37:32 -04:00
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , ts , 0 ) ;
}
2016-01-25 08:54:45 -05:00
}
2015-08-04 02:20:33 -04:00
return ACT_RET_CONT ;
}
2021-06-30 13:04:16 -04:00
/* Same as action_inc_gpc() but for gpc0 only */
static enum act_return action_inc_gpc0 ( struct act_rule * rule , struct proxy * px ,
struct session * sess , struct stream * s , int flags )
2015-08-04 02:20:33 -04:00
{
2021-06-30 13:04:16 -04:00
struct stksess * ts ;
struct stkctr * stkctr ;
2021-07-06 12:51:12 -04:00
unsigned int period = 0 ;
2015-08-04 02:20:33 -04:00
2021-06-30 13:04:16 -04:00
/* Extract the stksess, return OK if no stksess available. */
if ( s )
stkctr = & s - > stkctr [ rule - > arg . gpc . sc ] ;
else
stkctr = & sess - > stkctr [ rule - > arg . gpc . sc ] ;
2015-08-04 02:20:33 -04:00
2021-06-30 13:04:16 -04:00
ts = stkctr_entry ( stkctr ) ;
if ( ts ) {
void * ptr1 , * ptr2 ;
/* First, update gpc0_rate if it's tracked. Second, update its gpc0 if tracked. */
ptr1 = stktable_data_ptr ( stkctr - > table , ts , STKTABLE_DT_GPC0_RATE ) ;
2021-06-30 13:06:43 -04:00
if ( ptr1 ) {
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC0_RATE ] . u ;
}
else {
/* fallback on the gpc array */
ptr1 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC_RATE , 0 ) ;
if ( ptr1 )
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ;
}
2021-06-30 13:04:16 -04:00
ptr2 = stktable_data_ptr ( stkctr - > table , ts , STKTABLE_DT_GPC0 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr2 ) {
/* fallback on the gpc array */
ptr2 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC , 0 ) ;
}
2021-06-30 13:04:16 -04:00
if ( ptr1 | | ptr2 ) {
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
if ( ptr1 )
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
2021-06-30 13:06:43 -04:00
period , 1 ) ;
2021-06-30 13:04:16 -04:00
if ( ptr2 )
stktable_data_cast ( ptr2 , std_t_uint ) + + ;
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , ts , 0 ) ;
2015-08-04 02:20:33 -04:00
}
}
2021-06-30 13:04:16 -04:00
return ACT_RET_CONT ;
2015-08-04 02:20:33 -04:00
}
2021-06-30 13:04:16 -04:00
/* Same as action_inc_gpc() but for gpc1 only */
2018-01-29 09:22:53 -05:00
static enum act_return action_inc_gpc1 ( struct act_rule * rule , struct proxy * px ,
struct session * sess , struct stream * s , int flags )
{
struct stksess * ts ;
2023-01-06 10:09:58 -05:00
struct stkctr * stkctr = NULL ;
2021-07-06 12:51:12 -04:00
unsigned int period = 0 ;
2018-01-29 09:22:53 -05:00
/* Extract the stksess, return OK if no stksess available. */
2023-01-06 10:09:58 -05:00
if ( s & & s - > stkctr )
2018-01-29 09:22:53 -05:00
stkctr = & s - > stkctr [ rule - > arg . gpc . sc ] ;
2023-01-06 10:09:58 -05:00
else if ( sess - > stkctr )
2018-01-29 09:22:53 -05:00
stkctr = & sess - > stkctr [ rule - > arg . gpc . sc ] ;
2023-01-06 10:09:58 -05:00
else
return ACT_RET_CONT ;
2018-01-29 09:22:53 -05:00
ts = stkctr_entry ( stkctr ) ;
if ( ts ) {
void * ptr1 , * ptr2 ;
/* First, update gpc1_rate if it's tracked. Second, update its gpc1 if tracked. */
ptr1 = stktable_data_ptr ( stkctr - > table , ts , STKTABLE_DT_GPC1_RATE ) ;
2021-06-30 13:06:43 -04:00
if ( ptr1 ) {
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC1_RATE ] . u ;
}
else {
/* fallback on the gpc array */
ptr1 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC_RATE , 1 ) ;
if ( ptr1 )
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ;
}
2018-01-29 09:22:53 -05:00
ptr2 = stktable_data_ptr ( stkctr - > table , ts , STKTABLE_DT_GPC1 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr2 ) {
/* fallback on the gpc array */
ptr2 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC , 1 ) ;
}
2018-01-29 09:22:53 -05:00
if ( ptr1 | | ptr2 ) {
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
if ( ptr1 )
2021-06-30 11:18:28 -04:00
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
2021-06-30 13:06:43 -04:00
period , 1 ) ;
2018-01-29 09:22:53 -05:00
if ( ptr2 )
2021-06-30 11:18:28 -04:00
stktable_data_cast ( ptr2 , std_t_uint ) + + ;
2018-01-29 09:22:53 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , ts , 0 ) ;
}
}
return ACT_RET_CONT ;
}
2021-06-30 13:04:16 -04:00
/* This function is a common parser for actions incrementing the GPC
* ( General Purpose Counters ) . It understands the formats :
2018-01-29 09:22:53 -05:00
*
2021-06-30 13:04:16 -04:00
* sc - inc - gpc ( < gpc IDX > , < track ID > )
* sc - inc - gpc0 ( [ < track ID > ] )
* sc - inc - gpc1 ( [ < track ID > ] )
2018-01-29 09:22:53 -05:00
*
2021-06-30 13:04:16 -04:00
* It returns ACT_RET_PRS_ERR if fails and < err > is filled with an error
* message . Otherwise it returns ACT_RET_PRS_OK .
2018-01-29 09:22:53 -05:00
*/
2021-06-30 13:04:16 -04:00
static enum act_parse_ret parse_inc_gpc ( const char * * args , int * arg , struct proxy * px ,
struct act_rule * rule , char * * err )
2018-01-29 09:22:53 -05:00
{
const char * cmd_name = args [ * arg - 1 ] ;
char * error ;
2023-01-06 10:09:58 -05:00
if ( ! global . tune . nb_stk_ctr ) {
memprintf ( err , " Cannot use '%s', stick-counters are disabled via tune.stick-counters " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
2021-06-30 13:04:16 -04:00
cmd_name + = strlen ( " sc-inc-gpc " ) ;
if ( * cmd_name = = ' ( ' ) {
cmd_name + + ; /* skip the '(' */
rule - > arg . gpc . idx = strtoul ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' , ' ) {
memprintf ( err , " Missing gpc ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
2018-01-29 09:22:53 -05:00
return ACT_RET_PRS_ERR ;
}
2021-06-30 13:04:16 -04:00
else {
cmd_name = error + 1 ; /* skip the ',' */
rule - > arg . gpc . sc = strtol ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' ) ' ) {
memprintf ( err , " invalid stick table track ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
2023-01-06 10:09:58 -05:00
if ( rule - > arg . gpc . sc > = global . tune . nb_stk_ctr ) {
memprintf ( err , " invalid stick table track ID '%s'. The max allowed ID is %d (tune.stick-counters) " ,
args [ * arg - 1 ] , global . tune . nb_stk_ctr - 1 ) ;
2021-06-30 13:04:16 -04:00
return ACT_RET_PRS_ERR ;
}
2018-01-29 09:22:53 -05:00
}
2021-06-30 13:04:16 -04:00
rule - > action_ptr = action_inc_gpc ;
}
else if ( * cmd_name = = ' 0 ' | | * cmd_name = = ' 1 ' ) {
char c = * cmd_name ;
2018-01-29 09:22:53 -05:00
2021-06-30 13:04:16 -04:00
cmd_name + + ;
if ( * cmd_name = = ' \0 ' ) {
/* default stick table id. */
rule - > arg . gpc . sc = 0 ;
} else {
/* parse the stick table id. */
if ( * cmd_name ! = ' ( ' ) {
memprintf ( err , " invalid stick table track ID. Expects %s(<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
cmd_name + + ; /* jump the '(' */
rule - > arg . gpc . sc = strtol ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' ) ' ) {
memprintf ( err , " invalid stick table track ID. Expects %s(<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
2023-01-06 10:09:58 -05:00
if ( rule - > arg . gpc . sc > = global . tune . nb_stk_ctr ) {
memprintf ( err , " invalid stick table track ID. The max allowed ID is %d (tune.stick-counters) " ,
global . tune . nb_stk_ctr - 1 ) ;
2021-06-30 13:04:16 -04:00
return ACT_RET_PRS_ERR ;
}
2018-01-29 09:22:53 -05:00
}
2021-06-30 13:04:16 -04:00
if ( c = = ' 1 ' )
rule - > action_ptr = action_inc_gpc1 ;
else
rule - > action_ptr = action_inc_gpc0 ;
}
else {
/* default stick table id. */
2023-01-02 11:35:50 -05:00
memprintf ( err , " invalid gpc ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
2021-06-30 13:04:16 -04:00
return ACT_RET_PRS_ERR ;
2018-01-29 09:22:53 -05:00
}
rule - > action = ACT_CUSTOM ;
return ACT_RET_PRS_OK ;
}
2021-06-30 12:57:49 -04:00
/* This function sets the gpt at index 'rule->arg.gpt.idx' of the array on the
* tracksc counter of index ' rule - > arg . gpt . sc ' stored into the < stream > or
* directly in the session < sess > if < stream > is set to NULL . This gpt is
* set to the value computed by the expression ' rule - > arg . gpt . expr ' or if
* ' rule - > arg . gpt . expr ' is null directly to the value of ' rule - > arg . gpt . value ' .
*
* This function always returns ACT_RET_CONT and parameter flags is unused .
*/
static enum act_return action_set_gpt ( struct act_rule * rule , struct proxy * px ,
struct session * sess , struct stream * s , int flags )
{
void * ptr ;
struct stksess * ts ;
2023-01-06 10:09:58 -05:00
struct stkctr * stkctr = NULL ;
2021-06-30 12:57:49 -04:00
unsigned int value = 0 ;
struct sample * smp ;
int smp_opt_dir ;
/* Extract the stksess, return OK if no stksess available. */
2023-01-06 10:09:58 -05:00
if ( s & & s - > stkctr )
2021-06-30 12:57:49 -04:00
stkctr = & s - > stkctr [ rule - > arg . gpt . sc ] ;
2023-01-06 10:09:58 -05:00
else if ( sess - > stkctr )
2021-06-30 12:57:49 -04:00
stkctr = & sess - > stkctr [ rule - > arg . gpt . sc ] ;
2023-01-06 10:09:58 -05:00
else
return ACT_RET_CONT ;
2021-06-30 12:57:49 -04:00
ts = stkctr_entry ( stkctr ) ;
if ( ! ts )
return ACT_RET_CONT ;
/* Store the sample in the required sc, and ignore errors. */
ptr = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPT , rule - > arg . gpt . idx ) ;
if ( ptr ) {
if ( ! rule - > arg . gpt . expr )
value = ( unsigned int ) ( rule - > arg . gpt . value ) ;
else {
switch ( rule - > from ) {
2023-08-09 11:23:32 -04:00
case ACT_F_TCP_REQ_CON : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
2021-06-30 12:57:49 -04:00
case ACT_F_TCP_REQ_SES : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_TCP_REQ_CNT : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_TCP_RES_CNT : smp_opt_dir = SMP_OPT_DIR_RES ; break ;
case ACT_F_HTTP_REQ : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_HTTP_RES : smp_opt_dir = SMP_OPT_DIR_RES ; break ;
default :
send_log ( px , LOG_ERR , " stick table: internal error while setting gpt%u. " , rule - > arg . gpt . idx ) ;
if ( ! ( global . mode & MODE_QUIET ) | | ( global . mode & MODE_VERBOSE ) )
ha_alert ( " stick table: internal error while executing setting gpt%u. \n " , rule - > arg . gpt . idx ) ;
return ACT_RET_CONT ;
}
/* Fetch and cast the expression. */
smp = sample_fetch_as_type ( px , sess , s , smp_opt_dir | SMP_OPT_FINAL , rule - > arg . gpt . expr , SMP_T_SINT ) ;
if ( ! smp ) {
send_log ( px , LOG_WARNING , " stick table: invalid expression or data type while setting gpt%u. " , rule - > arg . gpt . idx ) ;
if ( ! ( global . mode & MODE_QUIET ) | | ( global . mode & MODE_VERBOSE ) )
ha_alert ( " stick table: invalid expression or data type while setting gpt%u. \n " , rule - > arg . gpt . idx ) ;
return ACT_RET_CONT ;
}
value = ( unsigned int ) ( smp - > data . u . sint ) ;
}
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
stktable_data_cast ( ptr , std_t_uint ) = value ;
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
stktable_touch_local ( stkctr - > table , ts , 0 ) ;
}
return ACT_RET_CONT ;
}
2015-08-19 02:25:14 -04:00
/* Always returns 1. */
static enum act_return action_set_gpt0 ( struct act_rule * rule , struct proxy * px ,
2015-09-27 04:00:49 -04:00
struct session * sess , struct stream * s , int flags )
2015-08-19 02:25:14 -04:00
{
void * ptr ;
struct stksess * ts ;
2023-01-06 10:09:58 -05:00
struct stkctr * stkctr = NULL ;
2019-11-06 12:38:53 -05:00
unsigned int value = 0 ;
struct sample * smp ;
int smp_opt_dir ;
2015-08-19 02:25:14 -04:00
/* Extract the stksess, return OK if no stksess available. */
2023-01-06 10:09:58 -05:00
if ( s & & s - > stkctr )
2015-08-19 02:25:14 -04:00
stkctr = & s - > stkctr [ rule - > arg . gpt . sc ] ;
2023-01-06 10:09:58 -05:00
else if ( sess - > stkctr )
2015-08-19 02:25:14 -04:00
stkctr = & sess - > stkctr [ rule - > arg . gpt . sc ] ;
2023-01-06 10:09:58 -05:00
else
return ACT_RET_CONT ;
2016-01-25 08:54:45 -05:00
2015-08-19 02:25:14 -04:00
ts = stkctr_entry ( stkctr ) ;
if ( ! ts )
return ACT_RET_CONT ;
/* Store the sample in the required sc, and ignore errors. */
ptr = stktable_data_ptr ( stkctr - > table , ts , STKTABLE_DT_GPT0 ) ;
2021-06-30 12:58:22 -04:00
if ( ! ptr )
ptr = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPT , 0 ) ;
2016-01-25 08:54:45 -05:00
if ( ptr ) {
2019-11-06 12:38:53 -05:00
if ( ! rule - > arg . gpt . expr )
value = ( unsigned int ) ( rule - > arg . gpt . value ) ;
else {
switch ( rule - > from ) {
2023-08-09 11:23:32 -04:00
case ACT_F_TCP_REQ_CON : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
2019-11-06 12:38:53 -05:00
case ACT_F_TCP_REQ_SES : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_TCP_REQ_CNT : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_TCP_RES_CNT : smp_opt_dir = SMP_OPT_DIR_RES ; break ;
case ACT_F_HTTP_REQ : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_HTTP_RES : smp_opt_dir = SMP_OPT_DIR_RES ; break ;
default :
send_log ( px , LOG_ERR , " stick table: internal error while setting gpt0. " ) ;
if ( ! ( global . mode & MODE_QUIET ) | | ( global . mode & MODE_VERBOSE ) )
ha_alert ( " stick table: internal error while executing setting gpt0. \n " ) ;
return ACT_RET_CONT ;
}
/* Fetch and cast the expression. */
smp = sample_fetch_as_type ( px , sess , s , smp_opt_dir | SMP_OPT_FINAL , rule - > arg . gpt . expr , SMP_T_SINT ) ;
if ( ! smp ) {
send_log ( px , LOG_WARNING , " stick table: invalid expression or data type while setting gpt0. " ) ;
if ( ! ( global . mode & MODE_QUIET ) | | ( global . mode & MODE_VERBOSE ) )
ha_alert ( " stick table: invalid expression or data type while setting gpt0. \n " ) ;
return ACT_RET_CONT ;
}
value = ( unsigned int ) ( smp - > data . u . sint ) ;
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
stktable_data_cast ( ptr , std_t_uint ) = value ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2017-06-13 13:37:32 -04:00
stktable_touch_local ( stkctr - > table , ts , 0 ) ;
2016-01-25 08:54:45 -05:00
}
2015-08-19 02:25:14 -04:00
return ACT_RET_CONT ;
}
2021-06-30 12:57:49 -04:00
/* This function is a parser for the "sc-set-gpt" and "sc-set-gpt0" actions.
* It understands the formats :
2015-08-19 02:25:14 -04:00
*
2021-06-30 12:57:49 -04:00
* sc - set - gpt ( < gpt IDX > , < track ID > ) < expression >
* sc - set - gpt0 ( < track ID > ) < expression >
2015-08-19 02:25:14 -04:00
*
2021-06-30 12:57:49 -04:00
* It returns ACT_RET_PRS_ERR if fails and < err > is filled with an error message .
* Otherwise , it returns ACT_RET_PRS_OK and the variable ' rule - > arg . gpt . expr '
* is filled with the pointer to the expression to execute or NULL if the arg
* is directly an integer stored into ' rule - > arg . gpt . value ' .
2015-08-19 02:25:14 -04:00
*/
2021-06-30 12:57:49 -04:00
static enum act_parse_ret parse_set_gpt ( const char * * args , int * arg , struct proxy * px ,
2015-08-19 02:25:14 -04:00
struct act_rule * rule , char * * err )
{
const char * cmd_name = args [ * arg - 1 ] ;
char * error ;
2019-11-06 12:38:53 -05:00
int smp_val ;
2015-08-19 02:25:14 -04:00
2023-01-06 10:09:58 -05:00
if ( ! global . tune . nb_stk_ctr ) {
memprintf ( err , " Cannot use '%s', stick-counters are disabled via tune.stick-counters " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
2021-06-30 12:57:49 -04:00
cmd_name + = strlen ( " sc-set-gpt " ) ;
if ( * cmd_name = = ' ( ' ) {
cmd_name + + ; /* skip the '(' */
rule - > arg . gpt . idx = strtoul ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' , ' ) {
memprintf ( err , " Missing gpt ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
2015-08-19 02:25:14 -04:00
return ACT_RET_PRS_ERR ;
}
2021-06-30 12:57:49 -04:00
else {
cmd_name = error + 1 ; /* skip the ',' */
rule - > arg . gpt . sc = strtol ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' ) ' ) {
memprintf ( err , " invalid stick table track ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
2023-01-06 10:09:58 -05:00
if ( rule - > arg . gpt . sc > = global . tune . nb_stk_ctr ) {
2021-06-30 12:57:49 -04:00
memprintf ( err , " invalid stick table track ID '%s'. The max allowed ID is %d " ,
2023-01-06 10:09:58 -05:00
args [ * arg - 1 ] , global . tune . nb_stk_ctr - 1 ) ;
2021-06-30 12:57:49 -04:00
return ACT_RET_PRS_ERR ;
}
2015-08-19 02:25:14 -04:00
}
2021-06-30 12:57:49 -04:00
rule - > action_ptr = action_set_gpt ;
}
else if ( * cmd_name = = ' 0 ' ) {
cmd_name + + ;
if ( * cmd_name = = ' \0 ' ) {
/* default stick table id. */
rule - > arg . gpt . sc = 0 ;
} else {
/* parse the stick table id. */
if ( * cmd_name ! = ' ( ' ) {
memprintf ( err , " invalid stick table track ID '%s'. Expects sc-set-gpt0(<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
cmd_name + + ; /* jump the '(' */
rule - > arg . gpt . sc = strtol ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' ) ' ) {
memprintf ( err , " invalid stick table track ID '%s'. Expects sc-set-gpt0(<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
2015-08-19 02:25:14 -04:00
2023-01-06 10:09:58 -05:00
if ( rule - > arg . gpt . sc > = global . tune . nb_stk_ctr ) {
2021-06-30 12:57:49 -04:00
memprintf ( err , " invalid stick table track ID '%s'. The max allowed ID is %d " ,
2023-01-06 10:09:58 -05:00
args [ * arg - 1 ] , global . tune . nb_stk_ctr - 1 ) ;
2021-06-30 12:57:49 -04:00
return ACT_RET_PRS_ERR ;
}
2015-08-19 02:25:14 -04:00
}
2021-06-30 12:57:49 -04:00
rule - > action_ptr = action_set_gpt0 ;
}
else {
/* default stick table id. */
memprintf ( err , " invalid gpt ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
2015-08-19 02:25:14 -04:00
}
2021-08-24 08:57:28 -04:00
/* value may be either an integer or an expression */
2019-11-06 12:38:53 -05:00
rule - > arg . gpt . expr = NULL ;
2015-08-19 02:25:14 -04:00
rule - > arg . gpt . value = strtol ( args [ * arg ] , & error , 10 ) ;
2021-08-24 08:57:28 -04:00
if ( * error = = ' \0 ' ) {
/* valid integer, skip it */
( * arg ) + + ;
} else {
2019-11-06 12:38:53 -05:00
rule - > arg . gpt . expr = sample_parse_expr ( ( char * * ) args , arg , px - > conf . args . file ,
2020-02-14 10:50:14 -05:00
px - > conf . args . line , err , & px - > conf . args , NULL ) ;
2019-11-06 12:38:53 -05:00
if ( ! rule - > arg . gpt . expr )
return ACT_RET_PRS_ERR ;
switch ( rule - > from ) {
2023-08-09 11:23:32 -04:00
case ACT_F_TCP_REQ_CON : smp_val = SMP_VAL_FE_CON_ACC ; break ;
2019-11-06 12:38:53 -05:00
case ACT_F_TCP_REQ_SES : smp_val = SMP_VAL_FE_SES_ACC ; break ;
case ACT_F_TCP_REQ_CNT : smp_val = SMP_VAL_FE_REQ_CNT ; break ;
case ACT_F_TCP_RES_CNT : smp_val = SMP_VAL_BE_RES_CNT ; break ;
case ACT_F_HTTP_REQ : smp_val = SMP_VAL_FE_HRQ_HDR ; break ;
case ACT_F_HTTP_RES : smp_val = SMP_VAL_BE_HRS_HDR ; break ;
default :
memprintf ( err , " internal error, unexpected rule->from=%d, please report this bug! " , rule - > from ) ;
return ACT_RET_PRS_ERR ;
}
if ( ! ( rule - > arg . gpt . expr - > fetch - > val & smp_val ) ) {
memprintf ( err , " fetch method '%s' extracts information from '%s', none of which is available here " , args [ * arg - 1 ] ,
sample_src_names ( rule - > arg . gpt . expr - > fetch - > use ) ) ;
free ( rule - > arg . gpt . expr ) ;
return ACT_RET_PRS_ERR ;
}
2015-08-19 02:25:14 -04:00
}
2015-09-02 11:17:33 -04:00
rule - > action = ACT_CUSTOM ;
2015-08-19 02:25:14 -04:00
return ACT_RET_PRS_OK ;
}
2023-01-02 12:15:20 -05:00
/* This function updates the gpc at index 'rule->arg.gpc.idx' of the array on
* the tracksc counter of index ' rule - > arg . gpc . sc ' stored into the < stream > or
* directly in the session < sess > if < stream > is set to NULL . This gpc is
* set to the value computed by the expression ' rule - > arg . gpc . expr ' or if
* ' rule - > arg . gpc . expr ' is null directly to the value of ' rule - > arg . gpc . value ' .
*
* This function always returns ACT_RET_CONT and parameter flags is unused .
*/
static enum act_return action_add_gpc ( struct act_rule * rule , struct proxy * px ,
struct session * sess , struct stream * s , int flags )
{
void * ptr1 , * ptr2 ;
struct stksess * ts ;
struct stkctr * stkctr ;
unsigned int value = 0 ;
struct sample * smp ;
int smp_opt_dir ;
/* Extract the stksess, return OK if no stksess available. */
if ( s )
stkctr = & s - > stkctr [ rule - > arg . gpc . sc ] ;
else
stkctr = & sess - > stkctr [ rule - > arg . gpc . sc ] ;
ts = stkctr_entry ( stkctr ) ;
if ( ! ts )
return ACT_RET_CONT ;
/* First, update gpc_rate if it's tracked. Second, update its gpc if tracked. */
ptr1 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC_RATE , rule - > arg . gpc . idx ) ;
ptr2 = stktable_data_ptr_idx ( stkctr - > table , ts , STKTABLE_DT_GPC , rule - > arg . gpc . idx ) ;
if ( ptr1 | | ptr2 ) {
if ( ! rule - > arg . gpc . expr )
value = ( unsigned int ) ( rule - > arg . gpc . value ) ;
else {
switch ( rule - > from ) {
2023-08-09 11:39:29 -04:00
case ACT_F_TCP_REQ_CON : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
2023-01-02 12:15:20 -05:00
case ACT_F_TCP_REQ_SES : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_TCP_REQ_CNT : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_TCP_RES_CNT : smp_opt_dir = SMP_OPT_DIR_RES ; break ;
case ACT_F_HTTP_REQ : smp_opt_dir = SMP_OPT_DIR_REQ ; break ;
case ACT_F_HTTP_RES : smp_opt_dir = SMP_OPT_DIR_RES ; break ;
default :
send_log ( px , LOG_ERR , " stick table: internal error while setting gpc%u. " , rule - > arg . gpc . idx ) ;
if ( ! ( global . mode & MODE_QUIET ) | | ( global . mode & MODE_VERBOSE ) )
ha_alert ( " stick table: internal error while executing setting gpc%u. \n " , rule - > arg . gpc . idx ) ;
return ACT_RET_CONT ;
}
/* Fetch and cast the expression. */
smp = sample_fetch_as_type ( px , sess , s , smp_opt_dir | SMP_OPT_FINAL , rule - > arg . gpc . expr , SMP_T_SINT ) ;
if ( ! smp ) {
send_log ( px , LOG_WARNING , " stick table: invalid expression or data type while setting gpc%u. " , rule - > arg . gpc . idx ) ;
if ( ! ( global . mode & MODE_QUIET ) | | ( global . mode & MODE_VERBOSE ) )
ha_alert ( " stick table: invalid expression or data type while setting gpc%u. \n " , rule - > arg . gpc . idx ) ;
return ACT_RET_CONT ;
}
value = ( unsigned int ) ( smp - > data . u . sint ) ;
}
if ( value ) {
/* only update the value if non-null increment */
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
if ( ptr1 )
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u , value ) ;
if ( ptr2 )
stktable_data_cast ( ptr2 , std_t_uint ) + = value ;
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
}
/* always touch the table so that it doesn't expire */
stktable_touch_local ( stkctr - > table , ts , 0 ) ;
}
return ACT_RET_CONT ;
}
/* This function is a parser for the "sc-add-gpc" action. It understands the
* format :
*
* sc - add - gpc ( < gpc IDX > , < track ID > ) < expression >
*
* It returns ACT_RET_PRS_ERR if fails and < err > is filled with an error message .
* Otherwise , it returns ACT_RET_PRS_OK and the variable ' rule - > arg . gpc . expr '
* is filled with the pointer to the expression to execute or NULL if the arg
* is directly an integer stored into ' rule - > arg . gpt . value ' .
*/
static enum act_parse_ret parse_add_gpc ( const char * * args , int * arg , struct proxy * px ,
struct act_rule * rule , char * * err )
{
const char * cmd_name = args [ * arg - 1 ] ;
char * error ;
int smp_val ;
cmd_name + = strlen ( " sc-add-gpc " ) ;
if ( * cmd_name ! = ' ( ' ) {
memprintf ( err , " Missing or invalid arguments for '%s'. Expects sc-add-gpc(<GPC ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
cmd_name + + ; /* skip the '(' */
rule - > arg . gpc . idx = strtoul ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' , ' ) {
memprintf ( err , " Missing gpc ID. Expects %s(<GPC ID>,<Track ID>) " , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
else {
cmd_name = error + 1 ; /* skip the ',' */
rule - > arg . gpc . sc = strtol ( cmd_name , & error , 10 ) ; /* Convert stick table id. */
if ( * error ! = ' ) ' ) {
memprintf ( err , " invalid stick table track ID '%s'. Expects %s(<GPC ID>,<Track ID>) " , cmd_name , args [ * arg - 1 ] ) ;
return ACT_RET_PRS_ERR ;
}
if ( rule - > arg . gpc . sc > = MAX_SESS_STKCTR ) {
memprintf ( err , " invalid stick table track ID '%s' for '%s'. The max allowed ID is %d " ,
cmd_name , args [ * arg - 1 ] , MAX_SESS_STKCTR - 1 ) ;
return ACT_RET_PRS_ERR ;
}
}
rule - > action_ptr = action_add_gpc ;
/* value may be either an integer or an expression */
rule - > arg . gpc . expr = NULL ;
rule - > arg . gpc . value = strtol ( args [ * arg ] , & error , 10 ) ;
if ( * error = = ' \0 ' ) {
/* valid integer, skip it */
( * arg ) + + ;
} else {
rule - > arg . gpc . expr = sample_parse_expr ( ( char * * ) args , arg , px - > conf . args . file ,
px - > conf . args . line , err , & px - > conf . args , NULL ) ;
if ( ! rule - > arg . gpc . expr )
return ACT_RET_PRS_ERR ;
switch ( rule - > from ) {
2023-08-09 11:39:29 -04:00
case ACT_F_TCP_REQ_CON : smp_val = SMP_VAL_FE_CON_ACC ; break ;
2023-01-02 12:15:20 -05:00
case ACT_F_TCP_REQ_SES : smp_val = SMP_VAL_FE_SES_ACC ; break ;
case ACT_F_TCP_REQ_CNT : smp_val = SMP_VAL_FE_REQ_CNT ; break ;
case ACT_F_TCP_RES_CNT : smp_val = SMP_VAL_BE_RES_CNT ; break ;
case ACT_F_HTTP_REQ : smp_val = SMP_VAL_FE_HRQ_HDR ; break ;
case ACT_F_HTTP_RES : smp_val = SMP_VAL_BE_HRS_HDR ; break ;
default :
memprintf ( err , " internal error, unexpected rule->from=%d, please report this bug! " , rule - > from ) ;
return ACT_RET_PRS_ERR ;
}
if ( ! ( rule - > arg . gpc . expr - > fetch - > val & smp_val ) ) {
memprintf ( err , " fetch method '%s' extracts information from '%s', none of which is available here " , args [ * arg - 1 ] ,
sample_src_names ( rule - > arg . gpc . expr - > fetch - > use ) ) ;
free ( rule - > arg . gpc . expr ) ;
return ACT_RET_PRS_ERR ;
}
}
rule - > action = ACT_CUSTOM ;
return ACT_RET_PRS_OK ;
}
2016-11-25 10:10:05 -05:00
/* set temp integer to the number of used entries in the table pointed to by expr.
* Accepts exactly 1 argument of type table .
*/
static int
smp_fetch_table_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
2019-03-14 02:07:41 -04:00
smp - > data . u . sint = args - > data . t - > current ;
2016-11-25 10:10:05 -05:00
return 1 ;
}
/* set temp integer to the number of free entries in the table pointed to by expr.
* Accepts exactly 1 argument of type table .
*/
static int
smp_fetch_table_avl ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2019-03-14 02:07:41 -04:00
struct stktable * t ;
2016-11-25 10:10:05 -05:00
2019-03-14 02:07:41 -04:00
t = args - > data . t ;
2016-11-25 10:10:05 -05:00
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
2019-03-14 02:07:41 -04:00
smp - > data . u . sint = t - > size - t - > current ;
2016-11-25 10:10:05 -05:00
return 1 ;
}
/* Returns a pointer to a stkctr depending on the fetch keyword name.
* It is designed to be called as sc [ 0 - 9 ] _ * sc_ * or src_ * exclusively .
* sc [ 0 - 9 ] _ * will return a pointer to the respective field in the
* stream < l4 > . sc_ * requires an UINT argument specifying the stick
* counter number . src_ * will fill a locally allocated structure with
* the table and entry corresponding to what is specified with src_ * .
* NULL may be returned if the designated stkctr is not tracked . For
* the sc_ * and sc [ 0 - 9 ] _ * forms , an optional table argument may be
* passed . When present , the currently tracked key is then looked up
* in the specified table instead of the current table . The purpose is
2020-06-21 12:42:57 -04:00
* to be able to convert multiple values per key ( eg : have gpc0 from
2016-11-25 10:10:05 -05:00
* multiple tables ) . < strm > is allowed to be NULL , in which case only
* the session will be consulted .
*/
struct stkctr *
2017-06-13 13:37:32 -04:00
smp_fetch_sc_stkctr ( struct session * sess , struct stream * strm , const struct arg * args , const char * kw , struct stkctr * stkctr )
2016-11-25 10:10:05 -05:00
{
struct stkctr * stkptr ;
struct stksess * stksess ;
unsigned int num = kw [ 2 ] - ' 0 ' ;
int arg = 0 ;
if ( num = = ' _ ' - ' 0 ' ) {
/* sc_* variant, args[0] = ctr# (mandatory) */
num = args [ arg + + ] . data . sint ;
}
else if ( num > 9 ) { /* src_* variant, args[0] = table */
struct stktable_key * key ;
struct connection * conn = objt_conn ( sess - > origin ) ;
struct sample smp ;
if ( ! conn )
return NULL ;
2018-11-15 16:43:28 -05:00
/* Fetch source address in a sample. */
2016-11-25 10:10:05 -05:00
smp . px = NULL ;
smp . sess = sess ;
smp . strm = strm ;
2021-05-12 04:17:47 -04:00
if ( ! smp_fetch_src | | ! smp_fetch_src ( empty_arg_list , & smp , " src " , NULL ) )
2016-11-25 10:10:05 -05:00
return NULL ;
/* Converts into key. */
2019-03-14 02:07:41 -04:00
key = smp_to_stkey ( & smp , args - > data . t ) ;
2016-11-25 10:10:05 -05:00
if ( ! key )
return NULL ;
2019-03-14 02:07:41 -04:00
stkctr - > table = args - > data . t ;
2017-06-13 13:37:32 -04:00
stkctr_set_entry ( stkctr , stktable_lookup_key ( stkctr - > table , key ) ) ;
return stkctr ;
2016-11-25 10:10:05 -05:00
}
/* Here, <num> contains the counter number from 0 to 9 for
* the sc [ 0 - 9 ] _ form , or even higher using sc_ ( num ) if needed .
* args [ arg ] is the first optional argument . We first lookup the
* ctr form the stream , then from the session if it was not there .
2023-01-06 10:09:58 -05:00
* But we must be sure the counter does not exceed global . tune . nb_stk_ctr .
2016-11-25 10:10:05 -05:00
*/
2023-01-06 10:09:58 -05:00
if ( num > = global . tune . nb_stk_ctr )
2019-10-21 04:53:34 -04:00
return NULL ;
2016-11-25 10:10:05 -05:00
2023-01-06 10:09:58 -05:00
stkptr = NULL ;
if ( strm & & strm - > stkctr )
2016-11-25 10:10:05 -05:00
stkptr = & strm - > stkctr [ num ] ;
2023-01-06 10:09:58 -05:00
if ( ! strm | | ! stkptr | | ! stkctr_entry ( stkptr ) ) {
if ( sess - > stkctr )
stkptr = & sess - > stkctr [ num ] ;
else
return NULL ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr_entry ( stkptr ) )
return NULL ;
}
stksess = stkctr_entry ( stkptr ) ;
if ( ! stksess )
return NULL ;
if ( unlikely ( args [ arg ] . type = = ARGT_TAB ) ) {
/* an alternate table was specified, let's look up the same key there */
2019-03-14 02:07:41 -04:00
stkctr - > table = args [ arg ] . data . t ;
2017-06-13 13:37:32 -04:00
stkctr_set_entry ( stkctr , stktable_lookup ( stkctr - > table , stksess ) ) ;
return stkctr ;
2016-11-25 10:10:05 -05:00
}
return stkptr ;
}
/* same as smp_fetch_sc_stkctr() but dedicated to src_* and can create
* the entry if it doesn ' t exist yet . This is needed for a few fetch
* functions which need to create an entry , such as src_inc_gpc * and
* src_clr_gpc * .
*/
struct stkctr *
2017-06-13 13:37:32 -04:00
smp_create_src_stkctr ( struct session * sess , struct stream * strm , const struct arg * args , const char * kw , struct stkctr * stkctr )
2016-11-25 10:10:05 -05:00
{
struct stktable_key * key ;
struct connection * conn = objt_conn ( sess - > origin ) ;
struct sample smp ;
if ( strncmp ( kw , " src_ " , 4 ) ! = 0 )
return NULL ;
if ( ! conn )
return NULL ;
2018-11-15 16:43:28 -05:00
/* Fetch source address in a sample. */
2016-11-25 10:10:05 -05:00
smp . px = NULL ;
smp . sess = sess ;
smp . strm = strm ;
2021-05-12 04:17:47 -04:00
if ( ! smp_fetch_src | | ! smp_fetch_src ( empty_arg_list , & smp , " src " , NULL ) )
2016-11-25 10:10:05 -05:00
return NULL ;
/* Converts into key. */
2019-03-14 02:07:41 -04:00
key = smp_to_stkey ( & smp , args - > data . t ) ;
2016-11-25 10:10:05 -05:00
if ( ! key )
return NULL ;
2019-03-14 02:07:41 -04:00
stkctr - > table = args - > data . t ;
2017-06-13 13:37:32 -04:00
stkctr_set_entry ( stkctr , stktable_get_entry ( stkctr - > table , key ) ) ;
return stkctr ;
2016-11-25 10:10:05 -05:00
}
/* set return a boolean indicating if the requested stream counter is
* currently being tracked or not .
* Supports being called as " sc[0-9]_tracked " only .
*/
static int
smp_fetch_sc_tracked ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
2016-11-25 10:10:05 -05:00
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_BOOL ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
smp - > data . u . sint = ! ! stkctr ;
/* release the ref count */
2018-09-14 08:31:22 -04:00
if ( stkctr = = & tmpstkctr )
2017-06-13 13:37:32 -04:00
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 1 ;
}
2021-06-30 12:57:49 -04:00
/* set <smp> to the General Purpose Tag of index set as first arg
* to value from the stream ' s tracked frontend counters or from the src .
* Supports being called as " sc_get_gpt(<gpt-idx>,<sc-idx>[,<table>]) " or
* " src_get_gpt(<gpt-idx>[,<table>]) " only . Value zero is returned if
* the key is new or gpt is not stored .
*/
static int
smp_fetch_sc_get_gpt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
unsigned int idx ;
idx = args [ 0 ] . data . sint ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args + 1 , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ) {
void * ptr ;
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPT , idx ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* set <smp> to the General Purpose Flag 0 value from the stream's tracked
* frontend counters or from the src .
* Supports being called as " sc[0-9]_get_gpc0 " or " src_get_gpt0 " only . Value
* zero is returned if the key is new .
*/
static int
smp_fetch_sc_get_gpt0 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
2017-06-13 13:37:32 -04:00
if ( stkctr_entry ( stkctr ) ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPT0 ) ;
2021-06-30 12:58:22 -04:00
if ( ! ptr )
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPT , 0 ) ;
2017-06-13 13:37:32 -04:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
2021-06-30 13:04:16 -04:00
/* set <smp> to the GPC[args(0)]'s value from the stream's tracked
* frontend counters or from the src .
* Supports being called as " sc_get_gpc(<gpc-idx>,<sc-idx>[,<table>]) " or
* " src_get_gpc(<gpc-idx>[,<table>]) " only . Value
* Value zero is returned if the key is new or gpc is not stored .
*/
static int
smp_fetch_sc_get_gpc ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
unsigned int idx ;
idx = args [ 0 ] . data . sint ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args + 1 , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , idx ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* set <smp> to the General Purpose Counter 0 value from the stream's tracked
* frontend counters or from the src .
* Supports being called as " sc[0-9]_get_gpc0 " or " src_get_gpc0 " only . Value
* zero is returned if the key is new .
*/
static int
smp_fetch_sc_get_gpc0 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC0 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr ) {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , 0 ) ;
}
2017-06-13 13:37:32 -04:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
2018-01-29 09:22:53 -05:00
/* set <smp> to the General Purpose Counter 1 value from the stream's tracked
* frontend counters or from the src .
* Supports being called as " sc[0-9]_get_gpc1 " or " src_get_gpc1 " only . Value
* zero is returned if the key is new .
*/
static int
smp_fetch_sc_get_gpc1 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC1 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr ) {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , 1 ) ;
}
2018-01-29 09:22:53 -05:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2018-01-29 09:22:53 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2021-06-30 13:04:16 -04:00
/* set <smp> to the GPC[args(0)]'s event rate from the stream's
* tracked frontend counters or from the src .
* Supports being called as " sc_gpc_rate(<gpc-idx>,<sc-idx>[,<table]) "
* or " src_gpc_rate(<gpc-idx>[,<table>]) " only .
* Value zero is returned if the key is new or gpc_rate is not stored .
*/
static int
smp_fetch_sc_gpc_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
unsigned int idx ;
idx = args [ 0 ] . data . sint ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args + 1 , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC_RATE , idx ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ) ;
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* set <smp> to the General Purpose Counter 0's event rate from the stream's
* tracked frontend counters or from the src .
* Supports being called as " sc[0-9]_gpc0_rate " or " src_gpc0_rate " only .
* Value zero is returned if the key is new .
*/
static int
smp_fetch_sc_gpc0_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2021-06-30 13:06:43 -04:00
unsigned int period ;
2016-11-25 10:10:05 -05:00
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC0_RATE ) ;
2021-06-30 13:06:43 -04:00
if ( ptr ) {
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC0_RATE ] . u ;
}
else {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC_RATE , 0 ) ;
if ( ptr )
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ;
}
2017-06-13 13:37:32 -04:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 13:06:43 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) , period ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
2018-01-29 09:22:53 -05:00
/* set <smp> to the General Purpose Counter 1's event rate from the stream's
* tracked frontend counters or from the src .
* Supports being called as " sc[0-9]_gpc1_rate " or " src_gpc1_rate " only .
* Value zero is returned if the key is new .
*/
static int
smp_fetch_sc_gpc1_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
2021-06-30 13:06:43 -04:00
unsigned int period ;
2018-01-29 09:22:53 -05:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC1_RATE ) ;
2021-06-30 13:06:43 -04:00
if ( ptr ) {
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC1_RATE ] . u ;
}
else {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC_RATE , 1 ) ;
if ( ptr )
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ;
}
2018-01-29 09:22:53 -05:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2021-06-30 13:06:43 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) , period ) ;
2018-01-29 09:22:53 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2021-06-30 13:04:16 -04:00
/* Increment the GPC[args(0)] value from the stream's tracked
* frontend counters and return it into temp integer .
* Supports being called as " sc_inc_gpc(<gpc-idx>,<sc-idx>[,<table>]) "
* or " src_inc_gpc(<gpc-idx>[,<table>]) " only .
*/
static int
smp_fetch_sc_inc_gpc ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
unsigned int idx ;
idx = args [ 0 ] . data . sint ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args + 1 , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! stkctr_entry ( stkctr ) )
stkctr = smp_create_src_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( stkctr & & stkctr_entry ( stkctr ) ) {
void * ptr1 , * ptr2 ;
/* First, update gpc0_rate if it's tracked. Second, update its
* gpc0 if tracked . Returns gpc0 ' s value otherwise the curr_ctr .
*/
ptr1 = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC_RATE , idx ) ;
ptr2 = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , idx ) ;
if ( ptr1 | | ptr2 ) {
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( ptr1 ) {
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u , 1 ) ;
smp - > data . u . sint = ( & stktable_data_cast ( ptr1 , std_t_frqp ) ) - > curr_ctr ;
}
if ( ptr2 )
smp - > data . u . sint = + + stktable_data_cast ( ptr2 , std_t_uint ) ;
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , stkctr_entry ( stkctr ) , ( stkctr = = & tmpstkctr ) ? 1 : 0 ) ;
}
else if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* Increment the General Purpose Counter 0 value from the stream's tracked
* frontend counters and return it into temp integer .
* Supports being called as " sc[0-9]_inc_gpc0 " or " src_inc_gpc0 " only .
*/
static int
smp_fetch_sc_inc_gpc0 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2021-07-06 12:51:12 -04:00
unsigned int period = 0 ;
2016-11-25 10:10:05 -05:00
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
2017-06-13 13:37:32 -04:00
if ( ! stkctr_entry ( stkctr ) )
stkctr = smp_create_src_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( stkctr & & stkctr_entry ( stkctr ) ) {
void * ptr1 , * ptr2 ;
2017-06-13 13:37:32 -04:00
2016-11-25 10:10:05 -05:00
/* First, update gpc0_rate if it's tracked. Second, update its
* gpc0 if tracked . Returns gpc0 ' s value otherwise the curr_ctr .
*/
ptr1 = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC0_RATE ) ;
2021-06-30 13:06:43 -04:00
if ( ptr1 ) {
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC0_RATE ] . u ;
}
else {
/* fallback on the gpc array */
ptr1 = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC_RATE , 0 ) ;
if ( ptr1 )
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ;
}
2016-11-25 10:10:05 -05:00
ptr2 = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC0 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr2 ) {
/* fallback on the gpc array */
ptr2 = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , 0 ) ;
}
2017-06-13 13:37:32 -04:00
if ( ptr1 | | ptr2 ) {
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2016-11-25 10:10:05 -05:00
2017-06-13 13:37:32 -04:00
if ( ptr1 ) {
2021-06-30 11:18:28 -04:00
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
2021-06-30 13:06:43 -04:00
period , 1 ) ;
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = ( & stktable_data_cast ( ptr1 , std_t_frqp ) ) - > curr_ctr ;
2017-06-13 13:37:32 -04:00
}
if ( ptr2 )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = + + stktable_data_cast ( ptr2 , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , stkctr_entry ( stkctr ) , ( stkctr = = & tmpstkctr ) ? 1 : 0 ) ;
}
else if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
2018-01-29 09:22:53 -05:00
/* Increment the General Purpose Counter 1 value from the stream's tracked
* frontend counters and return it into temp integer .
* Supports being called as " sc[0-9]_inc_gpc1 " or " src_inc_gpc1 " only .
*/
static int
smp_fetch_sc_inc_gpc1 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
2021-07-06 12:51:12 -04:00
unsigned int period = 0 ;
2018-01-29 09:22:53 -05:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! stkctr_entry ( stkctr ) )
stkctr = smp_create_src_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( stkctr & & stkctr_entry ( stkctr ) ) {
void * ptr1 , * ptr2 ;
/* First, update gpc1_rate if it's tracked. Second, update its
* gpc1 if tracked . Returns gpc1 ' s value otherwise the curr_ctr .
*/
ptr1 = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC1_RATE ) ;
2021-06-30 13:06:43 -04:00
if ( ptr1 ) {
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC1_RATE ] . u ;
}
else {
/* fallback on the gpc array */
ptr1 = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC_RATE , 1 ) ;
if ( ptr1 )
period = stkctr - > table - > data_arg [ STKTABLE_DT_GPC_RATE ] . u ;
}
2018-01-29 09:22:53 -05:00
ptr2 = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC1 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr2 ) {
/* fallback on the gpc array */
ptr2 = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , 1 ) ;
}
2018-01-29 09:22:53 -05:00
if ( ptr1 | | ptr2 ) {
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( ptr1 ) {
2021-06-30 11:18:28 -04:00
update_freq_ctr_period ( & stktable_data_cast ( ptr1 , std_t_frqp ) ,
2021-06-30 13:06:43 -04:00
period , 1 ) ;
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = ( & stktable_data_cast ( ptr1 , std_t_frqp ) ) - > curr_ctr ;
2018-01-29 09:22:53 -05:00
}
if ( ptr2 )
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = + + stktable_data_cast ( ptr2 , std_t_uint ) ;
2018-01-29 09:22:53 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , stkctr_entry ( stkctr ) , ( stkctr = = & tmpstkctr ) ? 1 : 0 ) ;
}
else if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2021-06-30 13:04:16 -04:00
/* Clear the GPC[args(0)] value from the stream's tracked
* frontend counters and return its previous value into temp integer .
* Supports being called as " sc_clr_gpc(<gpc-idx>,<sc-idx>[,<table>]) "
* or " src_clr_gpc(<gpc-idx>[,<table>]) " only .
*/
static int
smp_fetch_sc_clr_gpc ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
unsigned int idx ;
idx = args [ 0 ] . data . sint ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args + 1 , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! stkctr_entry ( stkctr ) )
stkctr = smp_create_src_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( stkctr & & stkctr_entry ( stkctr ) ) {
void * ptr ;
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , idx ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
stktable_data_cast ( ptr , std_t_uint ) = 0 ;
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , stkctr_entry ( stkctr ) , ( stkctr = = & tmpstkctr ) ? 1 : 0 ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* Clear the General Purpose Counter 0 value from the stream's tracked
* frontend counters and return its previous value into temp integer .
* Supports being called as " sc[0-9]_clr_gpc0 " or " src_clr_gpc0 " only .
*/
static int
smp_fetch_sc_clr_gpc0 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
2017-06-13 13:37:32 -04:00
if ( ! stkctr_entry ( stkctr ) )
stkctr = smp_create_src_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
2017-06-13 13:37:32 -04:00
if ( stkctr & & stkctr_entry ( stkctr ) ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC0 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr ) {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , 0 ) ;
}
2017-06-13 13:37:32 -04:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
stktable_data_cast ( ptr , std_t_uint ) = 0 ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2016-11-25 10:10:05 -05:00
/* If data was modified, we need to touch to re-schedule sync */
2017-06-13 13:37:32 -04:00
stktable_touch_local ( stkctr - > table , stkctr_entry ( stkctr ) , ( stkctr = = & tmpstkctr ) ? 1 : 0 ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
2018-01-29 09:22:53 -05:00
/* Clear the General Purpose Counter 1 value from the stream's tracked
* frontend counters and return its previous value into temp integer .
* Supports being called as " sc[0-9]_clr_gpc1 " or " src_clr_gpc1 " only .
*/
static int
smp_fetch_sc_clr_gpc1 ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( ! stkctr_entry ( stkctr ) )
stkctr = smp_create_src_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( stkctr & & stkctr_entry ( stkctr ) ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC1 ) ;
2021-06-30 13:06:43 -04:00
if ( ! ptr ) {
/* fallback on the gpc array */
ptr = stktable_data_ptr_idx ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GPC , 1 ) ;
}
2018-01-29 09:22:53 -05:00
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
stktable_data_cast ( ptr , std_t_uint ) = 0 ;
2018-01-29 09:22:53 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
/* If data was modified, we need to touch to re-schedule sync */
stktable_touch_local ( stkctr - > table , stkctr_entry ( stkctr ) , ( stkctr = = & tmpstkctr ) ? 1 : 0 ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* set <smp> to the cumulated number of connections from the stream's tracked
* frontend counters . Supports being called as " sc[0-9]_conn_cnt " or
* " src_conn_cnt " only .
*/
static int
smp_fetch_sc_conn_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_CONN_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the connection rate from the stream's tracked frontend
* counters . Supports being called as " sc[0-9]_conn_rate " or " src_conn_rate "
* only .
*/
static int
smp_fetch_sc_conn_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_CONN_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2016-11-25 10:10:05 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_CONN_RATE ] . u ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set temp integer to the number of connections from the stream's source address
* in the table pointed to by expr , after updating it .
* Accepts exactly 1 argument of type table .
*/
static int
smp_fetch_src_updt_conn_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct connection * conn = objt_conn ( smp - > sess - > origin ) ;
struct stksess * ts ;
struct stktable_key * key ;
void * ptr ;
2019-03-14 02:07:41 -04:00
struct stktable * t ;
2016-11-25 10:10:05 -05:00
if ( ! conn )
return 0 ;
2018-11-15 16:43:28 -05:00
/* Fetch source address in a sample. */
2021-05-12 04:17:47 -04:00
if ( ! smp_fetch_src | | ! smp_fetch_src ( empty_arg_list , smp , " src " , NULL ) )
2016-11-25 10:10:05 -05:00
return 0 ;
/* Converts into key. */
2019-03-14 02:07:41 -04:00
key = smp_to_stkey ( smp , args - > data . t ) ;
2016-11-25 10:10:05 -05:00
if ( ! key )
return 0 ;
2019-03-14 02:07:41 -04:00
t = args - > data . t ;
2016-11-25 10:10:05 -05:00
2019-03-14 02:07:41 -04:00
if ( ( ts = stktable_get_entry ( t , key ) ) = = NULL )
2016-11-25 10:10:05 -05:00
/* entry does not exist and could not be created */
return 0 ;
2019-03-14 02:07:41 -04:00
ptr = stktable_data_ptr ( t , ts , STKTABLE_DT_CONN_CNT ) ;
2017-06-13 13:37:32 -04:00
if ( ! ptr ) {
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored in this table */
2017-06-13 13:37:32 -04:00
}
2016-11-25 10:10:05 -05:00
smp - > data . type = SMP_T_SINT ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = + + stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2017-06-13 13:37:32 -04:00
2016-11-25 10:10:05 -05:00
smp - > flags = SMP_F_VOL_TEST ;
2017-06-13 13:37:32 -04:00
2019-03-14 02:07:41 -04:00
stktable_touch_local ( t , ts , 1 ) ;
2017-06-13 13:37:32 -04:00
/* Touch was previously performed by stktable_update_key */
2016-11-25 10:10:05 -05:00
return 1 ;
}
/* set <smp> to the number of concurrent connections from the stream's tracked
* frontend counters . Supports being called as " sc[0-9]_conn_cur " or
* " src_conn_cur " only .
*/
static int
smp_fetch_sc_conn_cur ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_CONN_CUR ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
2024-01-19 11:23:07 -05:00
/* set <smp> to the cumulated number of glitches from the stream or session's
* tracked frontend counters . Supports being called as " sc[0-9]_glitch_cnt " or
* " src_glitch_cnt " only .
*/
static int
smp_fetch_sc_glitch_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GLITCH_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
/* set <smp> to the rate of glitches from the stream or session's tracked
* frontend counters . Supports being called as " sc[0-9]_glitch_rate " or
* " src_glitch_rate " only .
*/
static int
smp_fetch_sc_glitch_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_GLITCH_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
stkctr - > table - > data_arg [ STKTABLE_DT_GLITCH_RATE ] . u ) ;
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* set <smp> to the cumulated number of streams from the stream's tracked
* frontend counters . Supports being called as " sc[0-9]_sess_cnt " or
* " src_sess_cnt " only .
*/
static int
smp_fetch_sc_sess_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_SESS_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the stream rate from the stream's tracked frontend counters.
* Supports being called as " sc[0-9]_sess_rate " or " src_sess_rate " only .
*/
static int
smp_fetch_sc_sess_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_SESS_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2016-11-25 10:10:05 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_SESS_RATE ] . u ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the cumulated number of HTTP requests from the stream's tracked
* frontend counters . Supports being called as " sc[0-9]_http_req_cnt " or
* " src_http_req_cnt " only .
*/
static int
smp_fetch_sc_http_req_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_HTTP_REQ_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the HTTP request rate from the stream's tracked frontend
* counters . Supports being called as " sc[0-9]_http_req_rate " or
* " src_http_req_rate " only .
*/
static int
smp_fetch_sc_http_req_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_HTTP_REQ_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2016-11-25 10:10:05 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_HTTP_REQ_RATE ] . u ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the cumulated number of HTTP requests errors from the stream's
* tracked frontend counters . Supports being called as " sc[0-9]_http_err_cnt " or
* " src_http_err_cnt " only .
*/
static int
smp_fetch_sc_http_err_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_HTTP_ERR_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the HTTP request error rate from the stream's tracked frontend
* counters . Supports being called as " sc[0-9]_http_err_rate " or
* " src_http_err_rate " only .
*/
static int
smp_fetch_sc_http_err_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_HTTP_ERR_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2016-11-25 10:10:05 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_HTTP_ERR_RATE ] . u ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
/* set <smp> to the cumulated number of HTTP response failures from the stream's
* tracked frontend counters . Supports being called as " sc[0-9]_http_fail_cnt " or
* " src_http_fail_cnt " only .
*/
static int
smp_fetch_sc_http_fail_cnt ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_HTTP_FAIL_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_uint ) ;
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
/* set <smp> to the HTTP response failure rate from the stream's tracked frontend
* counters . Supports being called as " sc[0-9]_http_fail_rate " or
* " src_http_fail_rate " only .
*/
static int
smp_fetch_sc_http_fail_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
struct stkctr tmpstkctr ;
struct stkctr * stkctr ;
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_HTTP_FAIL_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
return 0 ; /* parameter not stored */
}
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_HTTP_FAIL_RATE ] . u ) ;
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
return 1 ;
}
2016-11-25 10:10:05 -05:00
/* set <smp> to the number of kbytes received from clients, as found in the
* stream ' s tracked frontend counters . Supports being called as
* " sc[0-9]_kbytes_in " or " src_kbytes_in " only .
*/
static int
smp_fetch_sc_kbytes_in ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_BYTES_IN_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_ull ) > > 10 ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the data rate received from clients in bytes/s, as found
* in the stream ' s tracked frontend counters . Supports being called as
* " sc[0-9]_bytes_in_rate " or " src_bytes_in_rate " only .
*/
static int
smp_fetch_sc_bytes_in_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_BYTES_IN_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2016-11-25 10:10:05 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_BYTES_IN_RATE ] . u ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the number of kbytes sent to clients, as found in the
* stream ' s tracked frontend counters . Supports being called as
* " sc[0-9]_kbytes_out " or " src_kbytes_out " only .
*/
static int
smp_fetch_sc_kbytes_out ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_BYTES_OUT_CNT ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = stktable_data_cast ( ptr , std_t_ull ) > > 10 ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the data rate sent to clients in bytes/s, as found in the
* stream ' s tracked frontend counters . Supports being called as
* " sc[0-9]_bytes_out_rate " or " src_bytes_out_rate " only .
*/
static int
smp_fetch_sc_bytes_out_rate ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
smp - > data . u . sint = 0 ;
if ( stkctr_entry ( stkctr ) ! = NULL ) {
2017-06-13 13:37:32 -04:00
void * ptr ;
ptr = stktable_data_ptr ( stkctr - > table , stkctr_entry ( stkctr ) , STKTABLE_DT_BYTES_OUT_RATE ) ;
if ( ! ptr ) {
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
return 0 ; /* parameter not stored */
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
2021-06-30 11:18:28 -04:00
smp - > data . u . sint = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2016-11-25 10:10:05 -05:00
stkctr - > table - > data_arg [ STKTABLE_DT_BYTES_OUT_RATE ] . u ) ;
2017-06-13 13:37:32 -04:00
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & stkctr_entry ( stkctr ) - > lock ) ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr )
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
2016-11-25 10:10:05 -05:00
}
return 1 ;
}
/* set <smp> to the number of active trackers on the SC entry in the stream's
* tracked frontend counters . Supports being called as " sc[0-9]_trackers " only .
*/
static int
smp_fetch_sc_trackers ( const struct arg * args , struct sample * smp , const char * kw , void * private )
{
2017-06-13 13:37:32 -04:00
struct stkctr tmpstkctr ;
2016-11-25 10:10:05 -05:00
struct stkctr * stkctr ;
2017-06-13 13:37:32 -04:00
stkctr = smp_fetch_sc_stkctr ( smp - > sess , smp - > strm , args , kw , & tmpstkctr ) ;
2016-11-25 10:10:05 -05:00
if ( ! stkctr )
return 0 ;
smp - > flags = SMP_F_VOL_TEST ;
smp - > data . type = SMP_T_SINT ;
2017-06-13 13:37:32 -04:00
if ( stkctr = = & tmpstkctr ) {
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
smp - > data . u . sint = stkctr_entry ( stkctr ) ? ( HA_ATOMIC_LOAD ( & stkctr_entry ( stkctr ) - > ref_cnt ) - 1 ) : 0 ;
2017-06-13 13:37:32 -04:00
stktable_release ( stkctr - > table , stkctr_entry ( stkctr ) ) ;
}
else {
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
smp - > data . u . sint = stkctr_entry ( stkctr ) ? HA_ATOMIC_LOAD ( & stkctr_entry ( stkctr ) - > ref_cnt ) : 0 ;
2017-06-13 13:37:32 -04:00
}
2016-11-25 10:10:05 -05:00
return 1 ;
}
2016-11-22 12:00:53 -05:00
/* The functions below are used to manipulate table contents from the CLI.
* There are 3 main actions , " clear " , " set " and " show " . The code is shared
* between all actions , and the action is encoded in the void * private in
* the appctx as well as in the keyword registration , among one of the
* following values .
*/
enum {
STK_CLI_ACT_CLR ,
STK_CLI_ACT_SET ,
STK_CLI_ACT_SHOW ,
} ;
2022-05-17 13:07:51 -04:00
/* Dump the status of a table to a stream connector's
2016-11-22 12:00:53 -05:00
* read buffer . It returns 0 if the output buffer is full
* and needs to be called again , otherwise non - zero .
*/
2018-07-13 05:56:34 -04:00
static int table_dump_head_to_buffer ( struct buffer * msg ,
2022-05-18 09:07:19 -04:00
struct appctx * appctx ,
2019-03-14 02:07:41 -04:00
struct stktable * t , struct stktable * target )
2016-11-22 12:00:53 -05:00
{
2022-05-27 05:08:15 -04:00
struct stream * s = __sc_strm ( appctx_sc ( appctx ) ) ;
2016-11-22 12:00:53 -05:00
chunk_appendf ( msg , " # table: %s, type: %s, size:%d, used:%d \n " ,
2019-03-14 02:07:41 -04:00
t - > id , stktable_types [ t - > type ] . kw , t - > size , t - > current ) ;
2016-11-22 12:00:53 -05:00
/* any other information should be dumped here */
2017-05-23 18:57:40 -04:00
if ( target & & ( strm_li ( s ) - > bind_conf - > level & ACCESS_LVL_MASK ) < ACCESS_LVL_OPER )
2016-11-22 12:00:53 -05:00
chunk_appendf ( msg , " # contents not dumped due to insufficient privileges \n " ) ;
2022-05-18 09:07:19 -04:00
if ( applet_putchk ( appctx , msg ) = = - 1 )
2016-11-22 12:00:53 -05:00
return 0 ;
return 1 ;
}
2022-05-17 13:07:51 -04:00
/* Dump a table entry to a stream connector's
2016-11-22 12:00:53 -05:00
* read buffer . It returns 0 if the output buffer is full
* and needs to be called again , otherwise non - zero .
*/
2018-07-13 05:56:34 -04:00
static int table_dump_entry_to_buffer ( struct buffer * msg ,
2022-05-18 09:07:19 -04:00
struct appctx * appctx ,
2019-03-14 02:07:41 -04:00
struct stktable * t , struct stksess * entry )
2016-11-22 12:00:53 -05:00
{
int dt ;
chunk_appendf ( msg , " %p: " , entry ) ;
2019-03-14 02:07:41 -04:00
if ( t - > type = = SMP_T_IPV4 ) {
2016-11-22 12:00:53 -05:00
char addr [ INET_ADDRSTRLEN ] ;
inet_ntop ( AF_INET , ( const void * ) & entry - > key . key , addr , sizeof ( addr ) ) ;
chunk_appendf ( msg , " key=%s " , addr ) ;
}
2019-03-14 02:07:41 -04:00
else if ( t - > type = = SMP_T_IPV6 ) {
2016-11-22 12:00:53 -05:00
char addr [ INET6_ADDRSTRLEN ] ;
inet_ntop ( AF_INET6 , ( const void * ) & entry - > key . key , addr , sizeof ( addr ) ) ;
chunk_appendf ( msg , " key=%s " , addr ) ;
}
2019-03-14 02:07:41 -04:00
else if ( t - > type = = SMP_T_SINT ) {
2020-02-25 03:41:22 -05:00
chunk_appendf ( msg , " key=%u " , read_u32 ( entry - > key . key ) ) ;
2016-11-22 12:00:53 -05:00
}
2019-03-14 02:07:41 -04:00
else if ( t - > type = = SMP_T_STR ) {
2016-11-22 12:00:53 -05:00
chunk_appendf ( msg , " key= " ) ;
2019-03-14 02:07:41 -04:00
dump_text ( msg , ( const char * ) entry - > key . key , t - > key_size ) ;
2016-11-22 12:00:53 -05:00
}
else {
chunk_appendf ( msg , " key= " ) ;
2019-03-14 02:07:41 -04:00
dump_binary ( msg , ( const char * ) entry - > key . key , t - > key_size ) ;
2016-11-22 12:00:53 -05:00
}
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
chunk_appendf ( msg , " use=%d exp=%d shard=%d " , HA_ATOMIC_LOAD ( & entry - > ref_cnt ) - 1 , tick_remain ( now_ms , entry - > expire ) , entry - > shard ) ;
2016-11-22 12:00:53 -05:00
for ( dt = 0 ; dt < STKTABLE_DATA_TYPES ; dt + + ) {
void * ptr ;
2019-03-14 02:07:41 -04:00
if ( t - > data_ofs [ dt ] = = 0 )
2016-11-22 12:00:53 -05:00
continue ;
2021-06-30 12:01:02 -04:00
if ( stktable_data_types [ dt ] . is_array ) {
char tmp [ 16 ] = { } ;
const char * name_pfx = stktable_data_types [ dt ] . name ;
const char * name_sfx = NULL ;
unsigned int idx = 0 ;
int i = 0 ;
/* split name to show index before first _ of the name
* for example : ' gpc3_rate ' if array name is ' gpc_rate ' .
*/
for ( i = 0 ; i < ( sizeof ( tmp ) - 1 ) ; i + + ) {
if ( ! name_pfx [ i ] )
break ;
if ( name_pfx [ i ] = = ' _ ' ) {
name_pfx = & tmp [ 0 ] ;
name_sfx = & stktable_data_types [ dt ] . name [ i ] ;
break ;
}
tmp [ i ] = name_pfx [ i ] ;
}
ptr = stktable_data_ptr_idx ( t , entry , dt , idx ) ;
while ( ptr ) {
if ( stktable_data_types [ dt ] . arg_type = = ARG_T_DELAY )
chunk_appendf ( msg , " %s%u%s(%u)= " , name_pfx , idx , name_sfx ? name_sfx : " " , t - > data_arg [ dt ] . u ) ;
else
chunk_appendf ( msg , " %s%u%s= " , name_pfx , idx , name_sfx ? name_sfx : " " ) ;
switch ( stktable_data_types [ dt ] . std_type ) {
case STD_T_SINT :
chunk_appendf ( msg , " %d " , stktable_data_cast ( ptr , std_t_sint ) ) ;
break ;
case STD_T_UINT :
chunk_appendf ( msg , " %u " , stktable_data_cast ( ptr , std_t_uint ) ) ;
break ;
case STD_T_ULL :
chunk_appendf ( msg , " %llu " , stktable_data_cast ( ptr , std_t_ull ) ) ;
break ;
case STD_T_FRQP :
chunk_appendf ( msg , " %u " ,
read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
t - > data_arg [ dt ] . u ) ) ;
break ;
}
ptr = stktable_data_ptr_idx ( t , entry , dt , + + idx ) ;
}
continue ;
}
2016-11-22 12:00:53 -05:00
if ( stktable_data_types [ dt ] . arg_type = = ARG_T_DELAY )
2021-06-30 10:24:04 -04:00
chunk_appendf ( msg , " %s(%u)= " , stktable_data_types [ dt ] . name , t - > data_arg [ dt ] . u ) ;
2016-11-22 12:00:53 -05:00
else
chunk_appendf ( msg , " %s= " , stktable_data_types [ dt ] . name ) ;
2019-03-14 02:07:41 -04:00
ptr = stktable_data_ptr ( t , entry , dt ) ;
2016-11-22 12:00:53 -05:00
switch ( stktable_data_types [ dt ] . std_type ) {
case STD_T_SINT :
chunk_appendf ( msg , " %d " , stktable_data_cast ( ptr , std_t_sint ) ) ;
break ;
case STD_T_UINT :
chunk_appendf ( msg , " %u " , stktable_data_cast ( ptr , std_t_uint ) ) ;
break ;
case STD_T_ULL :
2021-06-30 10:24:04 -04:00
chunk_appendf ( msg , " %llu " , stktable_data_cast ( ptr , std_t_ull ) ) ;
2016-11-22 12:00:53 -05:00
break ;
case STD_T_FRQP :
2021-06-30 10:24:04 -04:00
chunk_appendf ( msg , " %u " ,
2016-11-22 12:00:53 -05:00
read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2019-03-14 02:07:41 -04:00
t - > data_arg [ dt ] . u ) ) ;
2016-11-22 12:00:53 -05:00
break ;
2019-05-23 06:15:04 -04:00
case STD_T_DICT : {
struct dict_entry * de ;
de = stktable_data_cast ( ptr , std_t_dict ) ;
chunk_appendf ( msg , " %s " , de ? ( char * ) de - > value . key : " - " ) ;
break ;
}
2016-11-22 12:00:53 -05:00
}
}
chunk_appendf ( msg , " \n " ) ;
2022-05-18 09:07:19 -04:00
if ( applet_putchk ( appctx , msg ) = = - 1 )
2016-11-22 12:00:53 -05:00
return 0 ;
return 1 ;
}
2022-05-03 05:35:07 -04:00
/* appctx context used by the "show table" command */
struct show_table_ctx {
void * target ; /* table we want to dump, or NULL for all */
struct stktable * t ; /* table being currently dumped (first if NULL) */
struct stksess * entry ; /* last entry we were trying to dump (or first if NULL) */
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int tree_head ; /* tree head currently being visited */
2022-05-03 05:35:07 -04:00
long long value [ STKTABLE_FILTER_LEN ] ; /* value to compare against */
signed char data_type [ STKTABLE_FILTER_LEN ] ; /* type of data to compare, or -1 if none */
signed char data_op [ STKTABLE_FILTER_LEN ] ; /* operator (STD_OP_*) when data_type set */
2022-05-03 05:45:02 -04:00
enum {
2022-05-03 06:02:36 -04:00
STATE_NEXT = 0 , /* px points to next table, entry=NULL */
2022-05-03 05:45:02 -04:00
STATE_DUMP , /* px points to curr table, entry is valid, refcount held */
STATE_DONE , /* done dumping */
} state ;
2022-05-03 05:35:07 -04:00
char action ; /* action on the table : one of STK_CLI_ACT_* */
} ;
2016-11-22 12:00:53 -05:00
2023-12-18 08:40:44 -05:00
/* Processes a single table entry <ts>.
* returns 0 if it wants to be called again , 1 if has ended processing .
2016-11-22 12:00:53 -05:00
*/
2023-12-18 08:40:44 -05:00
static int table_process_entry ( struct appctx * appctx , struct stksess * ts , char * * args )
2016-11-22 12:00:53 -05:00
{
2022-05-03 05:35:07 -04:00
struct show_table_ctx * ctx = appctx - > svcctx ;
struct stktable * t = ctx - > target ;
2016-11-22 12:00:53 -05:00
long long value ;
int data_type ;
int cur_arg ;
void * ptr ;
2021-04-10 17:00:53 -04:00
struct freq_ctr * frqp ;
2016-11-22 12:00:53 -05:00
2019-03-14 02:07:41 -04:00
switch ( t - > type ) {
2016-11-22 12:00:53 -05:00
case SMP_T_IPV4 :
case SMP_T_IPV6 :
case SMP_T_SINT :
case SMP_T_STR :
break ;
default :
2022-05-03 05:35:07 -04:00
switch ( ctx - > action ) {
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_SHOW :
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Showing keys from tables of type other than ip, ipv6, string and integer is not supported \n " ) ;
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_CLR :
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Removing keys from tables of type other than ip, ipv6, string and integer is not supported \n " ) ;
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_SET :
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Inserting keys into tables of type other than ip, ipv6, string and integer is not supported \n " ) ;
2016-11-22 12:00:53 -05:00
default :
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Unknown action \n " ) ;
2016-11-22 12:00:53 -05:00
}
}
/* check permissions */
if ( ! cli_has_level ( appctx , ACCESS_LVL_OPER ) )
return 1 ;
2023-12-18 08:40:44 -05:00
if ( ! ts )
return 1 ;
2022-05-03 05:35:07 -04:00
switch ( ctx - > action ) {
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_SHOW :
chunk_reset ( & trash ) ;
2022-05-18 09:07:19 -04:00
if ( ! table_dump_head_to_buffer ( & trash , appctx , t , t ) ) {
2019-03-14 02:07:41 -04:00
stktable_release ( t , ts ) ;
2016-11-22 12:00:53 -05:00
return 0 ;
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2022-05-18 09:07:19 -04:00
if ( ! table_dump_entry_to_buffer ( & trash , appctx , t , ts ) ) {
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_release ( t , ts ) ;
2016-11-22 12:00:53 -05:00
return 0 ;
2017-06-13 13:37:32 -04:00
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_release ( t , ts ) ;
2016-11-22 12:00:53 -05:00
break ;
case STK_CLI_ACT_CLR :
2019-03-14 02:07:41 -04:00
if ( ! stksess_kill ( t , ts , 1 ) ) {
2016-11-22 12:00:53 -05:00
/* don't delete an entry which is currently referenced */
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Entry currently in use, cannot remove \n " ) ;
2016-11-22 12:00:53 -05:00
}
break ;
case STK_CLI_ACT_SET :
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2016-11-22 12:00:53 -05:00
for ( cur_arg = 5 ; * args [ cur_arg ] ; cur_arg + = 2 ) {
if ( strncmp ( args [ cur_arg ] , " data. " , 5 ) ! = 0 ) {
2019-08-09 05:21:01 -04:00
cli_err ( appctx , " \" data.<type> \" followed by a value expected \n " ) ;
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_touch_local ( t , ts , 1 ) ;
2016-11-22 12:00:53 -05:00
return 1 ;
}
data_type = stktable_get_data_type ( args [ cur_arg ] + 5 ) ;
if ( data_type < 0 ) {
2019-08-09 05:21:01 -04:00
cli_err ( appctx , " Unknown data type \n " ) ;
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_touch_local ( t , ts , 1 ) ;
2016-11-22 12:00:53 -05:00
return 1 ;
}
2019-03-14 02:07:41 -04:00
if ( ! t - > data_ofs [ data_type ] ) {
2019-08-09 05:21:01 -04:00
cli_err ( appctx , " Data type not stored in this table \n " ) ;
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_touch_local ( t , ts , 1 ) ;
2016-11-22 12:00:53 -05:00
return 1 ;
}
if ( ! * args [ cur_arg + 1 ] | | strl2llrc ( args [ cur_arg + 1 ] , strlen ( args [ cur_arg + 1 ] ) , & value ) ! = 0 ) {
2019-08-09 05:21:01 -04:00
cli_err ( appctx , " Require a valid integer value to store \n " ) ;
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_touch_local ( t , ts , 1 ) ;
2016-11-22 12:00:53 -05:00
return 1 ;
}
2023-12-28 03:48:56 -05:00
ptr = __stktable_data_ptr ( t , ts , data_type ) ;
2016-11-22 12:00:53 -05:00
switch ( stktable_data_types [ data_type ] . std_type ) {
case STD_T_SINT :
stktable_data_cast ( ptr , std_t_sint ) = value ;
break ;
case STD_T_UINT :
stktable_data_cast ( ptr , std_t_uint ) = value ;
break ;
case STD_T_ULL :
stktable_data_cast ( ptr , std_t_ull ) = value ;
break ;
case STD_T_FRQP :
/* We set both the current and previous values. That way
* the reported frequency is stable during all the period
* then slowly fades out . This allows external tools to
* push measures without having to update them too often .
*/
frqp = & stktable_data_cast ( ptr , std_t_frqp ) ;
2021-04-10 17:00:53 -04:00
/* First bit is reserved for the freq_ctr lock
2017-11-02 12:32:43 -04:00
Note : here we ' re still protected by the stksess lock
2021-04-10 17:00:53 -04:00
so we don ' t need to update the update the freq_ctr
2017-11-02 12:32:43 -04:00
using its internal lock */
frqp - > curr_tick = now_ms & ~ 0x1 ;
2016-11-22 12:00:53 -05:00
frqp - > prev_ctr = 0 ;
frqp - > curr_ctr = value ;
break ;
}
}
2017-11-07 04:42:54 -05:00
HA_RWLOCK_WRUNLOCK ( STK_SESS_LOCK , & ts - > lock ) ;
2019-03-14 02:07:41 -04:00
stktable_touch_local ( t , ts , 1 ) ;
2016-11-22 12:00:53 -05:00
break ;
default :
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Unknown action \n " ) ;
2016-11-22 12:00:53 -05:00
}
return 1 ;
2023-12-18 08:40:44 -05:00
}
/* Processes a single table entry matching a specific key passed in argument.
* returns 0 if wants to be called again , 1 if has ended processing .
*/
static int table_process_entry_per_key ( struct appctx * appctx , char * * args )
{
struct show_table_ctx * ctx = appctx - > svcctx ;
struct stktable * t = ctx - > target ;
struct stksess * ts ;
struct sample key ;
if ( ! * args [ 4 ] )
return cli_err ( appctx , " Key value expected \n " ) ;
memset ( & key , 0 , sizeof ( key ) ) ;
key . data . type = SMP_T_STR ;
key . data . u . str . area = args [ 4 ] ;
key . data . u . str . data = strlen ( args [ 4 ] ) ;
switch ( t - > type ) {
case SMP_T_IPV4 :
case SMP_T_IPV6 :
/* prefer input format over table type when parsing ip addresses,
* then let smp_to_stkey ( ) do the conversion for us when needed
*/
BUG_ON ( ! sample_casts [ key . data . type ] [ SMP_T_ADDR ] ) ;
if ( ! sample_casts [ key . data . type ] [ SMP_T_ADDR ] ( & key ) )
return cli_err ( appctx , " Invalid key \n " ) ;
break ;
default :
/* nothing to do */
break ;
}
/* try to convert key according to table type
* ( it will fill static_table_key on success )
*/
if ( ! smp_to_stkey ( & key , t ) )
return cli_err ( appctx , " Invalid key \n " ) ;
if ( ctx - > action = = STK_CLI_ACT_SET ) {
ts = stktable_get_entry ( t , & static_table_key ) ;
if ( ! ts )
return cli_err ( appctx , " Unable to allocate a new entry \n " ) ;
} else
ts = stktable_lookup_key ( t , & static_table_key ) ;
return table_process_entry ( appctx , ts , args ) ;
2016-11-22 12:00:53 -05:00
}
2023-12-18 09:37:25 -05:00
/* Processes a single table entry matching a specific ptr passed in argument.
* returns 0 if wants to be called again , 1 if has ended processing .
*/
static int table_process_entry_per_ptr ( struct appctx * appctx , char * * args )
{
struct show_table_ctx * ctx = appctx - > svcctx ;
struct stktable * t = ctx - > target ;
2024-01-21 02:21:35 -05:00
ulong ptr ;
2023-12-18 09:37:25 -05:00
char * error ;
struct stksess * ts ;
if ( ! * args [ 4 ] | | args [ 4 ] [ 0 ] ! = ' 0 ' | | args [ 4 ] [ 1 ] ! = ' x ' )
return cli_err ( appctx , " Pointer expected (0xffff notation) \ n " ) ;
/* Convert argument to integer value */
2024-01-21 02:21:35 -05:00
ptr = strtoul ( args [ 4 ] , & error , 16 ) ;
2023-12-18 09:37:25 -05:00
if ( * error ! = ' \0 ' )
return cli_err ( appctx , " Malformed ptr. \n " ) ;
ts = stktable_lookup_ptr ( t , ( void * ) ptr ) ;
if ( ! ts )
return cli_err ( appctx , " No entry can be found matching ptr. \n " ) ;
return table_process_entry ( appctx , ts , args ) ;
}
2016-11-22 12:00:53 -05:00
/* Prepares the appctx fields with the data-based filters from the command line.
* Returns 0 if the dump can proceed , 1 if has ended processing .
*/
static int table_prepare_data_request ( struct appctx * appctx , char * * args )
{
2022-05-03 05:35:07 -04:00
struct show_table_ctx * ctx = appctx - > svcctx ;
2020-01-16 09:19:29 -05:00
int i ;
2020-01-22 10:50:27 -05:00
char * err = NULL ;
2020-01-16 09:19:29 -05:00
2022-05-03 05:35:07 -04:00
if ( ctx - > action ! = STK_CLI_ACT_SHOW & & ctx - > action ! = STK_CLI_ACT_CLR )
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " content-based lookup is only supported with the \" show \" and \" clear \" actions \n " ) ;
2016-11-22 12:00:53 -05:00
2020-01-16 09:19:29 -05:00
for ( i = 0 ; i < STKTABLE_FILTER_LEN ; i + + ) {
if ( i > 0 & & ! * args [ 3 + 3 * i ] ) // number of filter entries can be less than STKTABLE_FILTER_LEN
break ;
/* condition on stored data value */
2022-05-03 05:35:07 -04:00
ctx - > data_type [ i ] = stktable_get_data_type ( args [ 3 + 3 * i ] + 5 ) ;
if ( ctx - > data_type [ i ] < 0 )
2020-01-22 10:50:27 -05:00
return cli_dynerr ( appctx , memprintf ( & err , " Filter entry #%i: Unknown data type \n " , i + 1 ) ) ;
2016-11-22 12:00:53 -05:00
2022-05-03 05:35:07 -04:00
if ( ! ( ( struct stktable * ) ctx - > target ) - > data_ofs [ ctx - > data_type [ i ] ] )
2020-01-22 10:50:27 -05:00
return cli_dynerr ( appctx , memprintf ( & err , " Filter entry #%i: Data type not stored in this table \n " , i + 1 ) ) ;
2016-11-22 12:00:53 -05:00
2022-05-03 05:35:07 -04:00
ctx - > data_op [ i ] = get_std_op ( args [ 4 + 3 * i ] ) ;
if ( ctx - > data_op [ i ] < 0 )
2020-01-22 10:50:27 -05:00
return cli_dynerr ( appctx , memprintf ( & err , " Filter entry #%i: Require and operator among \" eq \" , \" ne \" , \" le \" , \" ge \" , \" lt \" , \" gt \" \n " , i + 1 ) ) ;
2016-11-22 12:00:53 -05:00
2022-05-03 05:35:07 -04:00
if ( ! * args [ 5 + 3 * i ] | | strl2llrc ( args [ 5 + 3 * i ] , strlen ( args [ 5 + 3 * i ] ) , & ctx - > value [ i ] ) ! = 0 )
2020-01-22 10:50:27 -05:00
return cli_dynerr ( appctx , memprintf ( & err , " Filter entry #%i: Require a valid integer value to compare against \n " , i + 1 ) ) ;
}
if ( * args [ 3 + 3 * i ] ) {
return cli_dynerr ( appctx , memprintf ( & err , " Detected extra data in filter, %ith word of input, after '%s' \n " , 3 + 3 * i + 1 , args [ 2 + 3 * i ] ) ) ;
2020-01-16 09:19:29 -05:00
}
2016-11-22 12:00:53 -05:00
/* OK we're done, all the fields are set */
return 0 ;
}
/* returns 0 if wants to be called, 1 if has ended processing */
2018-04-18 07:26:46 -04:00
static int cli_parse_table_req ( char * * args , char * payload , struct appctx * appctx , void * private )
2016-11-22 12:00:53 -05:00
{
2022-05-03 05:35:07 -04:00
struct show_table_ctx * ctx = applet_reserve_svcctx ( appctx , sizeof ( * ctx ) ) ;
2020-01-16 09:19:29 -05:00
int i ;
for ( i = 0 ; i < STKTABLE_FILTER_LEN ; i + + )
2022-05-03 05:35:07 -04:00
ctx - > data_type [ i ] = - 1 ;
ctx - > target = NULL ;
ctx - > entry = NULL ;
ctx - > action = ( long ) private ; // keyword argument, one of STK_CLI_ACT_*
2016-11-22 12:00:53 -05:00
if ( * args [ 2 ] ) {
2022-05-03 06:02:36 -04:00
ctx - > t = ctx - > target = stktable_find_by_name ( args [ 2 ] ) ;
2022-05-03 05:35:07 -04:00
if ( ! ctx - > target )
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " No such table \n " ) ;
2016-11-22 12:00:53 -05:00
}
else {
2022-05-03 06:02:36 -04:00
ctx - > t = stktables_list ;
2022-05-03 05:35:07 -04:00
if ( ctx - > action ! = STK_CLI_ACT_SHOW )
2016-11-22 12:00:53 -05:00
goto err_args ;
return 0 ;
}
if ( strcmp ( args [ 3 ] , " key " ) = = 0 )
return table_process_entry_per_key ( appctx , args ) ;
2023-12-18 09:37:25 -05:00
if ( strcmp ( args [ 3 ] , " ptr " ) = = 0 )
return table_process_entry_per_ptr ( appctx , args ) ;
2016-11-22 12:00:53 -05:00
else if ( strncmp ( args [ 3 ] , " data. " , 5 ) = = 0 )
return table_prepare_data_request ( appctx , args ) ;
else if ( * args [ 3 ] )
goto err_args ;
return 0 ;
err_args :
2022-05-03 05:35:07 -04:00
switch ( ctx - > action ) {
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_SHOW :
2023-12-18 09:37:25 -05:00
return cli_err ( appctx , " Optional argument only supports \" data.<store_data_type> \" <operator> <value> or key <key> or ptr <ptr> \n " ) ;
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_CLR :
2023-12-18 09:37:25 -05:00
return cli_err ( appctx , " Required arguments: <table> \" data.<store_data_type> \" <operator> <value> or <table> key <key> or <table> ptr <ptr> \n " ) ;
2016-11-22 12:00:53 -05:00
case STK_CLI_ACT_SET :
2023-12-18 09:37:25 -05:00
return cli_err ( appctx , " Required arguments: <table> key <key> [data.<store_data_type> <value>]* or <table> ptr <ptr> [data.<store_data_type> <value>]* \n " ) ;
2016-11-22 12:00:53 -05:00
default :
2019-08-09 05:21:01 -04:00
return cli_err ( appctx , " Unknown action \n " ) ;
2016-11-22 12:00:53 -05:00
}
}
/* This function is used to deal with table operations (dump or clear depending
* on the action stored in appctx - > private ) . It returns 0 if the output buffer is
* full and it needs to be called again , otherwise non - zero .
*/
static int cli_io_handler_table ( struct appctx * appctx )
{
2022-05-03 05:35:07 -04:00
struct show_table_ctx * ctx = appctx - > svcctx ;
2022-05-27 05:08:15 -04:00
struct stconn * sc = appctx_sc ( appctx ) ;
2022-05-27 04:26:46 -04:00
struct stream * s = __sc_strm ( sc ) ;
2016-11-22 12:00:53 -05:00
struct ebmb_node * eb ;
int skip_entry ;
2022-05-03 05:35:07 -04:00
int show = ctx - > action = = STK_CLI_ACT_SHOW ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
int shard = ctx - > tree_head ;
2016-11-22 12:00:53 -05:00
/*
2022-05-03 06:02:36 -04:00
* We have 3 possible states in ctx - > state :
2022-05-03 05:45:02 -04:00
* - STATE_NEXT : the proxy pointer points to the next table to
2016-11-22 12:00:53 -05:00
* dump , the entry pointer is NULL ;
2022-05-03 05:45:02 -04:00
* - STATE_DUMP : the proxy pointer points to the current table
2016-11-22 12:00:53 -05:00
* and the entry pointer points to the next entry to be dumped ,
* and the refcount on the next entry is held ;
2022-05-03 05:45:02 -04:00
* - STATE_DONE : nothing left to dump , the buffer may contain some
2016-11-22 12:00:53 -05:00
* data though .
*/
chunk_reset ( & trash ) ;
2022-05-03 05:45:02 -04:00
while ( ctx - > state ! = STATE_DONE ) {
switch ( ctx - > state ) {
case STATE_NEXT :
2022-05-03 05:35:07 -04:00
if ( ! ctx - > t | |
( ctx - > target & &
ctx - > t ! = ctx - > target ) ) {
2022-05-03 05:45:02 -04:00
ctx - > state = STATE_DONE ;
2016-11-22 12:00:53 -05:00
break ;
}
2022-05-03 05:35:07 -04:00
if ( ctx - > t - > size ) {
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
if ( show & & ! shard & & ! table_dump_head_to_buffer ( & trash , appctx , ctx - > t , ctx - > target ) )
2016-11-22 12:00:53 -05:00
return 0 ;
2022-05-03 05:35:07 -04:00
if ( ctx - > target & &
2017-05-23 18:57:40 -04:00
( strm_li ( s ) - > bind_conf - > level & ACCESS_LVL_MASK ) > = ACCESS_LVL_OPER ) {
2016-11-22 12:00:53 -05:00
/* dump entries only if table explicitly requested */
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & ctx - > t - > shards [ shard ] . sh_lock ) ;
eb = ebmb_first ( & ctx - > t - > shards [ shard ] . keys ) ;
2016-11-22 12:00:53 -05:00
if ( eb ) {
2022-05-03 05:35:07 -04:00
ctx - > entry = ebmb_entry ( eb , struct stksess , key ) ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
HA_ATOMIC_INC ( & ctx - > entry - > ref_cnt ) ;
2022-05-03 05:45:02 -04:00
ctx - > state = STATE_DUMP ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & ctx - > t - > shards [ shard ] . sh_lock ) ;
2016-11-22 12:00:53 -05:00
break ;
}
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & ctx - > t - > shards [ shard ] . sh_lock ) ;
/* we come here if we didn't find any entry in this shard */
shard = + + ctx - > tree_head ;
if ( shard < CONFIG_HAP_TBL_BUCKETS )
break ; // try again on new shard
/* fall through next table */
shard = ctx - > tree_head = 0 ;
2016-11-22 12:00:53 -05:00
}
}
2022-05-03 05:35:07 -04:00
ctx - > t = ctx - > t - > next ;
2016-11-22 12:00:53 -05:00
break ;
2022-05-03 05:45:02 -04:00
case STATE_DUMP :
2016-11-22 12:00:53 -05:00
skip_entry = 0 ;
2022-05-03 05:35:07 -04:00
HA_RWLOCK_RDLOCK ( STK_SESS_LOCK , & ctx - > entry - > lock ) ;
2017-06-13 13:37:32 -04:00
2022-05-03 05:35:07 -04:00
if ( ctx - > data_type [ 0 ] > = 0 ) {
2016-11-22 12:00:53 -05:00
/* we're filtering on some data contents */
void * ptr ;
2020-01-22 11:09:47 -05:00
int dt , i ;
2020-01-16 09:19:29 -05:00
signed char op ;
long long data , value ;
2016-11-22 12:00:53 -05:00
2017-06-13 13:37:32 -04:00
2020-01-22 11:09:47 -05:00
for ( i = 0 ; i < STKTABLE_FILTER_LEN ; i + + ) {
2022-05-03 05:35:07 -04:00
if ( ctx - > data_type [ i ] = = - 1 )
2020-01-16 09:19:29 -05:00
break ;
2022-05-03 05:35:07 -04:00
dt = ctx - > data_type [ i ] ;
ptr = stktable_data_ptr ( ctx - > t ,
ctx - > entry ,
2020-01-16 09:19:29 -05:00
dt ) ;
data = 0 ;
switch ( stktable_data_types [ dt ] . std_type ) {
case STD_T_SINT :
data = stktable_data_cast ( ptr , std_t_sint ) ;
break ;
case STD_T_UINT :
data = stktable_data_cast ( ptr , std_t_uint ) ;
break ;
case STD_T_ULL :
data = stktable_data_cast ( ptr , std_t_ull ) ;
break ;
case STD_T_FRQP :
data = read_freq_ctr_period ( & stktable_data_cast ( ptr , std_t_frqp ) ,
2022-05-03 05:35:07 -04:00
ctx - > t - > data_arg [ dt ] . u ) ;
2020-01-16 09:19:29 -05:00
break ;
}
2016-11-22 12:00:53 -05:00
2022-05-03 05:35:07 -04:00
op = ctx - > data_op [ i ] ;
value = ctx - > value [ i ] ;
2020-01-16 09:19:29 -05:00
/* skip the entry if the data does not match the test and the value */
if ( ( data < value & &
( op = = STD_OP_EQ | | op = = STD_OP_GT | | op = = STD_OP_GE ) ) | |
( data = = value & &
( op = = STD_OP_NE | | op = = STD_OP_GT | | op = = STD_OP_LT ) ) | |
( data > value & &
( op = = STD_OP_EQ | | op = = STD_OP_LT | | op = = STD_OP_LE ) ) ) {
skip_entry = 1 ;
break ;
}
2016-11-22 12:00:53 -05:00
}
}
if ( show & & ! skip_entry & &
2022-05-18 09:07:19 -04:00
! table_dump_entry_to_buffer ( & trash , appctx , ctx - > t , ctx - > entry ) ) {
2022-05-03 05:35:07 -04:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & ctx - > entry - > lock ) ;
2017-06-13 13:37:32 -04:00
return 0 ;
}
2016-11-22 12:00:53 -05:00
2022-05-03 05:35:07 -04:00
HA_RWLOCK_RDUNLOCK ( STK_SESS_LOCK , & ctx - > entry - > lock ) ;
2017-06-13 13:37:32 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRLOCK ( STK_TABLE_LOCK , & ctx - > t - > shards [ shard ] . sh_lock ) ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
HA_ATOMIC_DEC ( & ctx - > entry - > ref_cnt ) ;
2016-11-22 12:00:53 -05:00
2022-05-03 05:35:07 -04:00
eb = ebmb_next ( & ctx - > entry - > key ) ;
2016-11-22 12:00:53 -05:00
if ( eb ) {
2022-05-03 05:35:07 -04:00
struct stksess * old = ctx - > entry ;
ctx - > entry = ebmb_entry ( eb , struct stksess , key ) ;
2016-11-22 12:00:53 -05:00
if ( show )
2022-05-03 05:35:07 -04:00
__stksess_kill_if_expired ( ctx - > t , old ) ;
else if ( ! skip_entry & & ! ctx - > entry - > ref_cnt )
__stksess_kill ( ctx - > t , old ) ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
HA_ATOMIC_INC ( & ctx - > entry - > ref_cnt ) ;
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & ctx - > t - > shards [ shard ] . sh_lock ) ;
2016-11-22 12:00:53 -05:00
break ;
}
if ( show )
2022-05-03 05:35:07 -04:00
__stksess_kill_if_expired ( ctx - > t , ctx - > entry ) ;
MEDIUM: stick-table: change the ref_cnt atomically
Due to the ts->ref_cnt being manipulated and checked inside wrlocks,
we continue to have it updated under plenty of read locks, which have
an important cost on many-thread machines.
This patch turns them all to atomic ops and carefully moves them outside
of locks every time this is possible:
- the ref_cnt is incremented before write-unlocking on creation otherwise
the element could vanish before we can do it
- the ref_cnt is decremented after write-locking on release
- for all other cases it's updated out of locks since it's guaranteed by
the sequence that it cannot vanish
- checks are done before locking every time it's used to decide
whether we're going to release the element (saves several write locks)
- expiration tests are just done using atomic loads, since there's no
particular ordering constraint there, we just want consistent values.
For Lua, the loop that is used to dump stick-tables could switch to read
locks only, but this was not done.
For peers, the loop that builds updates in peer_send_teachmsgs is extremely
expensive in write locks and it doesn't seem this is really needed since
the only updated variables are last_pushed and commitupdate, the first
one being on the shared table (thus not used by other threads) and the
commitupdate could likely be changed using a CAS. Thus all of this could
theoretically move under a read lock, but that was not done here.
On a 80-thread machine with a peers section enabled, the request rate
increased from 415 to 520k rps.
2023-05-27 12:55:48 -04:00
else if ( ! skip_entry & & ! HA_ATOMIC_LOAD ( & ctx - > entry - > ref_cnt ) )
2022-05-03 05:35:07 -04:00
__stksess_kill ( ctx - > t , ctx - > entry ) ;
2017-06-13 13:37:32 -04:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
HA_RWLOCK_WRUNLOCK ( STK_TABLE_LOCK , & ctx - > t - > shards [ shard ] . sh_lock ) ;
2016-11-22 12:00:53 -05:00
MAJOR: stktable: split the keys across multiple shards to reduce contention
In order to reduce the contention on the table when keys expire quickly,
we're spreading the load over multiple trees. That counts for keys and
expiration dates. The shard number is calculated from the key value
itself, both when looking up and when setting it.
The "show table" dump on the CLI iterates over all shards so that the
output is not fully sorted, it's only sorted within each shard. The Lua
table dump just does the same. It was verified with a Lua program to
count stick-table entries that it works as intended (the test case is
reproduced here as it's clearly not easy to automate as a vtc):
function dump_stk()
local dmp = core.proxies['tbl'].stktable:dump({});
local count = 0
for _, __ in pairs(dmp) do
count = count + 1
end
core.Info('Total entries: ' .. count)
end
core.register_action("dump_stk", {'tcp-req', 'http-req'}, dump_stk, 0);
##
global
tune.lua.log.stderr on
lua-load-per-thread lua-cnttbl.lua
listen front
bind :8001
http-request lua.dump_stk if { path_beg /stk }
http-request track-sc1 rand(),upper,hex table tbl
http-request redirect location /
backend tbl
stick-table size 100k type string len 12 store http_req_cnt
##
$ h2load -c 16 -n 10000 0:8001/
$ curl 0:8001/stk
## A count close to 100k appears on haproxy's stderr
## On the CLI, "show table tbl" | wc will show the same.
Some large parts were reindented only to add a top-level loop to iterate
over shards (e.g. process_table_expire()). Better check the diff using
git show -b.
The number of shards is decided just like for the pools, at build time
based on the max number of threads, so that we can keep a constant. Maybe
this should be done differently. For now CONFIG_HAP_TBL_BUCKETS is used,
and defaults to CONFIG_HAP_POOL_BUCKETS to keep the benefits of all the
measurements made for the pools. It turns out that this value seems to
be the most reasonable one without inflating the struct stktable too
much. By default for 1024 threads the value is 32 and delivers 980k RPS
in a test involving 80 threads, while adding 1kB to the struct stktable
(roughly doubling it). The same test at 64 gives 1008 kRPS and at 128
it gives 1040 kRPS for 8 times the initial size. 16 would be too low
however, with 675k RPS.
The stksess already have a shard number, it's the one used to decide which
peer connection to send the entry. Maybe we should also store the one
associated with the entry itself instead of recalculating it, though it
does not happen that often. The operation is done by hashing the key using
XXH32().
The peers also take and release the table's lock but the way it's used
it not very clear yet, so at this point it's sure this will not work.
At this point, this allowed to completely unlock the performance on a
80-thread setup:
before: 5.4 Gbps, 150k RPS, 80 cores
52.71% haproxy [.] stktable_lookup_key
36.90% haproxy [.] stktable_get_entry.part.0
0.86% haproxy [.] ebmb_lookup
0.18% haproxy [.] process_stream
0.12% haproxy [.] process_table_expire
0.11% haproxy [.] fwrr_get_next_server
0.10% haproxy [.] eb32_insert
0.10% haproxy [.] run_tasks_from_lists
after: 36 Gbps, 980k RPS, 80 cores
44.92% haproxy [.] stktable_get_entry
5.47% haproxy [.] ebmb_lookup
2.50% haproxy [.] fwrr_get_next_server
0.97% haproxy [.] eb32_insert
0.92% haproxy [.] process_stream
0.52% haproxy [.] run_tasks_from_lists
0.45% haproxy [.] conn_backend_get
0.44% haproxy [.] __pool_alloc
0.35% haproxy [.] process_table_expire
0.35% haproxy [.] connect_server
0.35% haproxy [.] h1_headers_to_hdr_list
0.34% haproxy [.] eb_delete
0.31% haproxy [.] srv_add_to_idle_list
0.30% haproxy [.] h1_snd_buf
WIP: uint64_t -> long
WIP: ulong -> uint
code is much smaller
2024-03-04 11:09:28 -05:00
shard = + + ctx - > tree_head ;
if ( shard > = CONFIG_HAP_TBL_BUCKETS ) {
shard = ctx - > tree_head = 0 ;
ctx - > t = ctx - > t - > next ;
}
2022-05-03 05:45:02 -04:00
ctx - > state = STATE_NEXT ;
2016-11-22 12:00:53 -05:00
break ;
2022-05-03 05:45:02 -04:00
default :
2016-11-22 12:00:53 -05:00
break ;
}
}
return 1 ;
}
static void cli_release_show_table ( struct appctx * appctx )
{
2022-05-03 05:35:07 -04:00
struct show_table_ctx * ctx = appctx - > svcctx ;
2022-05-03 05:45:02 -04:00
if ( ctx - > state = = STATE_DUMP ) {
2022-05-03 05:35:07 -04:00
stksess_kill_if_expired ( ctx - > t , ctx - > entry , 1 ) ;
2016-11-22 12:00:53 -05:00
}
}
2023-01-06 10:09:58 -05:00
static int stk_parse_stick_counters ( char * * args , int section_type , struct proxy * curpx ,
const struct proxy * defpx , const char * file , int line ,
char * * err )
{
char * error ;
int counters ;
counters = strtol ( args [ 1 ] , & error , 10 ) ;
if ( * error ! = 0 ) {
memprintf ( err , " %s: '%s' is an invalid number " , args [ 0 ] , args [ 1 ] ) ;
return - 1 ;
}
if ( counters < 0 ) {
memprintf ( err , " %s: the number of stick-counters may not be negative (was %d) " , args [ 0 ] , counters ) ;
return - 1 ;
}
global . tune . nb_stk_ctr = counters ;
return 0 ;
}
/* This function creates the stk_ctr pools after the configuration parsing. It
* returns 0 on success otherwise ERR_ * . If nb_stk_ctr is 0 , the pool remains
* NULL .
*/
static int stkt_create_stk_ctr_pool ( void )
{
if ( ! global . tune . nb_stk_ctr )
return 0 ;
pool_head_stk_ctr = create_pool ( " stk_ctr " , sizeof ( * ( ( struct session * ) 0 ) - > stkctr ) * global . tune . nb_stk_ctr , MEM_F_SHARED ) ;
if ( ! pool_head_stk_ctr ) {
ha_alert ( " out of memory while creating the stick-counters pool. \n " ) ;
return ERR_ABORT ;
}
return 0 ;
}
2020-08-28 05:31:31 -04:00
static void stkt_late_init ( void )
{
struct sample_fetch * f ;
f = find_sample_fetch ( " src " , strlen ( " src " ) ) ;
if ( f )
smp_fetch_src = f - > process ;
2023-01-06 10:09:58 -05:00
hap_register_post_check ( stkt_create_stk_ctr_pool ) ;
2020-08-28 05:31:31 -04:00
}
INITCALL0 ( STG_INIT , stkt_late_init ) ;
2016-11-22 12:00:53 -05:00
/* register cli keywords */
static struct cli_kw_list cli_kws = { { } , {
2021-05-07 05:38:37 -04:00
{ { " clear " , " table " , NULL } , " clear table <table> [<filter>]* : remove an entry from a table (filter: data/key) " , cli_parse_table_req , cli_io_handler_table , cli_release_show_table , ( void * ) STK_CLI_ACT_CLR } ,
{ { " set " , " table " , NULL } , " set table <table> key <k> [data.* <v>]* : update or create a table entry's data " , cli_parse_table_req , cli_io_handler_table , NULL , ( void * ) STK_CLI_ACT_SET } ,
{ { " show " , " table " , NULL } , " show table <table> [<filter>]* : report table usage stats or dump this table's contents (filter: data/key) " , cli_parse_table_req , cli_io_handler_table , cli_release_show_table , ( void * ) STK_CLI_ACT_SHOW } ,
2016-11-22 12:00:53 -05:00
{ { } , }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , cli_register_kw , & cli_kws ) ;
2016-11-22 12:00:53 -05:00
2015-08-19 02:25:14 -04:00
static struct action_kw_list tcp_conn_kws = { { } , {
2023-01-02 12:15:20 -05:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 13:04:16 -04:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 12:57:49 -04:00
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
2015-08-19 02:25:14 -04:00
{ /* END */ }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , tcp_req_conn_keywords_register , & tcp_conn_kws ) ;
2016-10-21 10:37:51 -04:00
static struct action_kw_list tcp_sess_kws = { { } , {
2023-01-02 12:15:20 -05:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 13:04:16 -04:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 12:57:49 -04:00
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
2016-10-21 10:37:51 -04:00
{ /* END */ }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , tcp_req_sess_keywords_register , & tcp_sess_kws ) ;
2015-08-19 02:25:14 -04:00
static struct action_kw_list tcp_req_kws = { { } , {
2023-01-02 12:15:20 -05:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 13:04:16 -04:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 12:57:49 -04:00
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
2015-08-19 02:25:14 -04:00
{ /* END */ }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , tcp_req_cont_keywords_register , & tcp_req_kws ) ;
2015-08-19 02:25:14 -04:00
static struct action_kw_list tcp_res_kws = { { } , {
2023-01-02 12:15:20 -05:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 13:04:16 -04:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 12:57:49 -04:00
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
2015-08-19 02:25:14 -04:00
{ /* END */ }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , tcp_res_cont_keywords_register , & tcp_res_kws ) ;
2015-08-19 02:25:14 -04:00
static struct action_kw_list http_req_kws = { { } , {
2023-01-02 12:15:20 -05:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 13:04:16 -04:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 12:57:49 -04:00
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
2015-08-19 02:25:14 -04:00
{ /* END */ }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , http_req_keywords_register , & http_req_kws ) ;
2015-08-19 02:25:14 -04:00
static struct action_kw_list http_res_kws = { { } , {
2023-01-02 12:15:20 -05:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 13:04:16 -04:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
2021-06-30 12:57:49 -04:00
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
2015-08-19 02:25:14 -04:00
{ /* END */ }
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , http_res_keywords_register , & http_res_kws ) ;
2023-01-05 05:17:38 -05:00
static struct action_kw_list http_after_res_kws = { { } , {
2023-03-17 06:28:58 -04:00
{ " sc-add-gpc " , parse_add_gpc , KWF_MATCH_PREFIX } ,
2023-01-05 05:17:38 -05:00
{ " sc-inc-gpc " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc0 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-inc-gpc1 " , parse_inc_gpc , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ " sc-set-gpt0 " , parse_set_gpt , KWF_MATCH_PREFIX } ,
{ /* END */ }
} } ;
INITCALL1 ( STG_REGISTER , http_after_res_keywords_register , & http_after_res_kws ) ;
2016-11-25 10:10:05 -05:00
/* Note: must not be declared <const> as its list will be overwritten.
* Please take care of keeping this list alphabetically sorted .
*/
static struct sample_fetch_kw_list smp_fetch_keywords = { ILH , {
{ " sc_bytes_in_rate " , smp_fetch_sc_bytes_in_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_bytes_out_rate " , smp_fetch_sc_bytes_out_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2021-06-30 13:04:16 -04:00
{ " sc_clr_gpc " , smp_fetch_sc_clr_gpc , ARG3 ( 2 , SINT , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_clr_gpc0 " , smp_fetch_sc_clr_gpc0 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc_clr_gpc1 " , smp_fetch_sc_clr_gpc1 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN } ,
2016-11-25 10:10:05 -05:00
{ " sc_conn_cnt " , smp_fetch_sc_conn_cnt , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_conn_cur " , smp_fetch_sc_conn_cur , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_conn_rate " , smp_fetch_sc_conn_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2021-06-30 12:57:49 -04:00
{ " sc_get_gpt " , smp_fetch_sc_get_gpt , ARG3 ( 2 , SINT , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2017-01-05 05:44:09 -05:00
{ " sc_get_gpt0 " , smp_fetch_sc_get_gpt0 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2021-06-30 13:04:16 -04:00
{ " sc_get_gpc " , smp_fetch_sc_get_gpc , ARG3 ( 2 , SINT , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_get_gpc0 " , smp_fetch_sc_get_gpc0 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc_get_gpc1 " , smp_fetch_sc_get_gpc1 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN } ,
2024-01-19 11:23:07 -05:00
{ " sc_glitch_cnt " , smp_fetch_sc_glitch_cnt , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_glitch_rate " , smp_fetch_sc_glitch_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2021-06-30 13:04:16 -04:00
{ " sc_gpc_rate " , smp_fetch_sc_gpc_rate , ARG3 ( 2 , SINT , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_gpc0_rate " , smp_fetch_sc_gpc0_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc_gpc1_rate " , smp_fetch_sc_gpc1_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_http_err_cnt " , smp_fetch_sc_http_err_cnt , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_http_err_rate " , smp_fetch_sc_http_err_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
{ " sc_http_fail_cnt " , smp_fetch_sc_http_fail_cnt , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_http_fail_rate " , smp_fetch_sc_http_fail_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_http_req_cnt " , smp_fetch_sc_http_req_cnt , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_http_req_rate " , smp_fetch_sc_http_req_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2021-06-30 13:04:16 -04:00
{ " sc_inc_gpc " , smp_fetch_sc_inc_gpc , ARG3 ( 2 , SINT , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_inc_gpc0 " , smp_fetch_sc_inc_gpc0 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc_inc_gpc1 " , smp_fetch_sc_inc_gpc1 , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc_kbytes_in " , smp_fetch_sc_kbytes_in , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc_kbytes_out " , smp_fetch_sc_kbytes_out , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc_sess_cnt " , smp_fetch_sc_sess_cnt , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_sess_rate " , smp_fetch_sc_sess_rate , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc_tracked " , smp_fetch_sc_tracked , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_BOOL , SMP_USE_INTRN , } ,
{ " sc_trackers " , smp_fetch_sc_trackers , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_bytes_in_rate " , smp_fetch_sc_bytes_in_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_bytes_out_rate " , smp_fetch_sc_bytes_out_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_clr_gpc0 " , smp_fetch_sc_clr_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc0_clr_gpc1 " , smp_fetch_sc_clr_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc0_conn_cnt " , smp_fetch_sc_conn_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_conn_cur " , smp_fetch_sc_conn_cur , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_conn_rate " , smp_fetch_sc_conn_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2017-01-05 05:44:09 -05:00
{ " sc0_get_gpt0 " , smp_fetch_sc_get_gpt0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc0_get_gpc0 " , smp_fetch_sc_get_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc0_get_gpc1 " , smp_fetch_sc_get_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2024-01-19 11:23:07 -05:00
{ " sc0_glitch_cnt " , smp_fetch_sc_glitch_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_glitch_rate " , smp_fetch_sc_glitch_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc0_gpc0_rate " , smp_fetch_sc_gpc0_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc0_gpc1_rate " , smp_fetch_sc_gpc1_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc0_http_err_cnt " , smp_fetch_sc_http_err_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_http_err_rate " , smp_fetch_sc_http_err_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
{ " sc0_http_fail_cnt " , smp_fetch_sc_http_fail_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_http_fail_rate " , smp_fetch_sc_http_fail_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc0_http_req_cnt " , smp_fetch_sc_http_req_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_http_req_rate " , smp_fetch_sc_http_req_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_inc_gpc0 " , smp_fetch_sc_inc_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc0_inc_gpc1 " , smp_fetch_sc_inc_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc0_kbytes_in " , smp_fetch_sc_kbytes_in , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc0_kbytes_out " , smp_fetch_sc_kbytes_out , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc0_sess_cnt " , smp_fetch_sc_sess_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_sess_rate " , smp_fetch_sc_sess_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc0_tracked " , smp_fetch_sc_tracked , ARG1 ( 0 , TAB ) , NULL , SMP_T_BOOL , SMP_USE_INTRN , } ,
{ " sc0_trackers " , smp_fetch_sc_trackers , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_bytes_in_rate " , smp_fetch_sc_bytes_in_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_bytes_out_rate " , smp_fetch_sc_bytes_out_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2021-06-30 13:04:16 -04:00
{ " sc1_clr_gpc " , smp_fetch_sc_clr_gpc , ARG2 ( 1 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_clr_gpc0 " , smp_fetch_sc_clr_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc1_clr_gpc1 " , smp_fetch_sc_clr_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_conn_cnt " , smp_fetch_sc_conn_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_conn_cur " , smp_fetch_sc_conn_cur , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_conn_rate " , smp_fetch_sc_conn_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2017-01-05 05:44:09 -05:00
{ " sc1_get_gpt0 " , smp_fetch_sc_get_gpt0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_get_gpc0 " , smp_fetch_sc_get_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc1_get_gpc1 " , smp_fetch_sc_get_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2024-01-19 11:23:07 -05:00
{ " sc1_glitch_cnt " , smp_fetch_sc_glitch_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_glitch_rate " , smp_fetch_sc_glitch_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_gpc0_rate " , smp_fetch_sc_gpc0_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc1_gpc1_rate " , smp_fetch_sc_gpc1_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_http_err_cnt " , smp_fetch_sc_http_err_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_http_err_rate " , smp_fetch_sc_http_err_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
{ " sc1_http_fail_cnt " , smp_fetch_sc_http_fail_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_http_fail_rate " , smp_fetch_sc_http_fail_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_http_req_cnt " , smp_fetch_sc_http_req_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_http_req_rate " , smp_fetch_sc_http_req_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_inc_gpc0 " , smp_fetch_sc_inc_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc1_inc_gpc1 " , smp_fetch_sc_inc_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc1_kbytes_in " , smp_fetch_sc_kbytes_in , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc1_kbytes_out " , smp_fetch_sc_kbytes_out , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc1_sess_cnt " , smp_fetch_sc_sess_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_sess_rate " , smp_fetch_sc_sess_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc1_tracked " , smp_fetch_sc_tracked , ARG1 ( 0 , TAB ) , NULL , SMP_T_BOOL , SMP_USE_INTRN , } ,
{ " sc1_trackers " , smp_fetch_sc_trackers , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_bytes_in_rate " , smp_fetch_sc_bytes_in_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_bytes_out_rate " , smp_fetch_sc_bytes_out_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_clr_gpc0 " , smp_fetch_sc_clr_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc2_clr_gpc1 " , smp_fetch_sc_clr_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc2_conn_cnt " , smp_fetch_sc_conn_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_conn_cur " , smp_fetch_sc_conn_cur , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_conn_rate " , smp_fetch_sc_conn_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2017-01-05 05:44:09 -05:00
{ " sc2_get_gpt0 " , smp_fetch_sc_get_gpt0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc2_get_gpc0 " , smp_fetch_sc_get_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc2_get_gpc1 " , smp_fetch_sc_get_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2024-01-19 11:23:07 -05:00
{ " sc2_glitch_cnt " , smp_fetch_sc_glitch_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_glitch_rate " , smp_fetch_sc_glitch_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc2_gpc0_rate " , smp_fetch_sc_gpc0_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc2_gpc1_rate " , smp_fetch_sc_gpc1_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc2_http_err_cnt " , smp_fetch_sc_http_err_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_http_err_rate " , smp_fetch_sc_http_err_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
{ " sc2_http_fail_cnt " , smp_fetch_sc_http_fail_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_http_fail_rate " , smp_fetch_sc_http_fail_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc2_http_req_cnt " , smp_fetch_sc_http_req_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_http_req_rate " , smp_fetch_sc_http_req_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_inc_gpc0 " , smp_fetch_sc_inc_gpc0 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2018-01-29 09:22:53 -05:00
{ " sc2_inc_gpc1 " , smp_fetch_sc_inc_gpc1 , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
2016-11-25 10:10:05 -05:00
{ " sc2_kbytes_in " , smp_fetch_sc_kbytes_in , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc2_kbytes_out " , smp_fetch_sc_kbytes_out , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " sc2_sess_cnt " , smp_fetch_sc_sess_cnt , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_sess_rate " , smp_fetch_sc_sess_rate , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " sc2_tracked " , smp_fetch_sc_tracked , ARG1 ( 0 , TAB ) , NULL , SMP_T_BOOL , SMP_USE_INTRN , } ,
{ " sc2_trackers " , smp_fetch_sc_trackers , ARG1 ( 0 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " src_bytes_in_rate " , smp_fetch_sc_bytes_in_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_bytes_out_rate " , smp_fetch_sc_bytes_out_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2021-06-30 13:04:16 -04:00
{ " src_clr_gpc " , smp_fetch_sc_clr_gpc , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_clr_gpc0 " , smp_fetch_sc_clr_gpc0 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2018-01-29 09:22:53 -05:00
{ " src_clr_gpc1 " , smp_fetch_sc_clr_gpc1 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_conn_cnt " , smp_fetch_sc_conn_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_conn_cur " , smp_fetch_sc_conn_cur , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_conn_rate " , smp_fetch_sc_conn_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2021-06-30 12:57:49 -04:00
{ " src_get_gpt " , smp_fetch_sc_get_gpt , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2017-01-05 05:44:09 -05:00
{ " src_get_gpt0 " , smp_fetch_sc_get_gpt0 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2021-06-30 13:04:16 -04:00
{ " src_get_gpc " , smp_fetch_sc_get_gpc , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_get_gpc0 " , smp_fetch_sc_get_gpc0 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2018-01-29 09:22:53 -05:00
{ " src_get_gpc1 " , smp_fetch_sc_get_gpc1 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2024-01-19 11:23:07 -05:00
{ " src_glitch_cnt " , smp_fetch_sc_glitch_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_glitch_rate " , smp_fetch_sc_glitch_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2021-06-30 13:04:16 -04:00
{ " src_gpc_rate " , smp_fetch_sc_gpc_rate , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_gpc0_rate " , smp_fetch_sc_gpc0_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2018-01-29 09:22:53 -05:00
{ " src_gpc1_rate " , smp_fetch_sc_gpc1_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_http_err_cnt " , smp_fetch_sc_http_err_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_http_err_rate " , smp_fetch_sc_http_err_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
{ " src_http_fail_cnt " , smp_fetch_sc_http_fail_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_http_fail_rate " , smp_fetch_sc_http_fail_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_http_req_cnt " , smp_fetch_sc_http_req_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_http_req_rate " , smp_fetch_sc_http_req_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2021-06-30 13:04:16 -04:00
{ " src_inc_gpc " , smp_fetch_sc_inc_gpc , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_inc_gpc0 " , smp_fetch_sc_inc_gpc0 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2018-01-29 09:22:53 -05:00
{ " src_inc_gpc1 " , smp_fetch_sc_inc_gpc1 , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
2016-11-25 10:10:05 -05:00
{ " src_kbytes_in " , smp_fetch_sc_kbytes_in , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_kbytes_out " , smp_fetch_sc_kbytes_out , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_sess_cnt " , smp_fetch_sc_sess_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_sess_rate " , smp_fetch_sc_sess_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " src_updt_conn_cnt " , smp_fetch_src_updt_conn_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_L4CLI , } ,
{ " table_avl " , smp_fetch_table_avl , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ " table_cnt " , smp_fetch_table_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_SINT , SMP_USE_INTRN , } ,
{ /* END */ } ,
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , sample_register_fetches , & smp_fetch_keywords ) ;
2016-11-25 10:10:05 -05:00
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
/* Note: must not be declared <const> as its list will be overwritten */
static struct sample_conv_kw_list sample_conv_kws = { ILH , {
2016-05-25 11:16:38 -04:00
{ " in_table " , sample_conv_in_table , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_BOOL } ,
{ " table_bytes_in_rate " , sample_conv_table_bytes_in_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_bytes_out_rate " , sample_conv_table_bytes_out_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_conn_cnt " , sample_conv_table_conn_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_conn_cur " , sample_conv_table_conn_cur , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_conn_rate " , sample_conv_table_conn_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2022-08-16 12:11:25 -04:00
{ " table_expire " , sample_conv_table_expire , ARG2 ( 1 , TAB , SINT ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2021-06-30 12:57:49 -04:00
{ " table_gpt " , sample_conv_table_gpt , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2016-05-25 11:16:38 -04:00
{ " table_gpt0 " , sample_conv_table_gpt0 , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2021-06-30 13:04:16 -04:00
{ " table_gpc " , sample_conv_table_gpc , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2016-05-25 11:16:38 -04:00
{ " table_gpc0 " , sample_conv_table_gpc0 , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2018-01-29 09:22:53 -05:00
{ " table_gpc1 " , sample_conv_table_gpc1 , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2021-06-30 13:04:16 -04:00
{ " table_gpc_rate " , sample_conv_table_gpc_rate , ARG2 ( 2 , SINT , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2016-05-25 11:16:38 -04:00
{ " table_gpc0_rate " , sample_conv_table_gpc0_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2018-01-29 09:22:53 -05:00
{ " table_gpc1_rate " , sample_conv_table_gpc1_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2024-01-19 11:23:07 -05:00
{ " table_glitch_cnt " , sample_conv_table_glitch_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_glitch_rate " , sample_conv_table_glitch_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2016-05-25 11:16:38 -04:00
{ " table_http_err_cnt " , sample_conv_table_http_err_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_http_err_rate " , sample_conv_table_http_err_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
MINOR: stick-tables/counters: add http_fail_cnt and http_fail_rate data types
Historically we've been counting lots of client-triggered events in stick
tables to help detect misbehaving ones, but we've been missing the same on
the server side, and there's been repeated requests for being able to count
the server errors per URL in order to precisely monitor the quality of
service or even to avoid routing requests to certain dead services, which
is also called "circuit breaking" nowadays.
This commit introduces http_fail_cnt and http_fail_rate, which work like
http_err_cnt and http_err_rate in that they respectively count events and
their frequency, but they only consider server-side issues such as network
errors, unparsable and truncated responses, and 5xx status codes other
than 501 and 505 (since these ones are usually triggered by the client).
Note that retryable errors are purposely not accounted for, so that only
what the client really sees is considered.
With this it becomes very simple to put some protective measures in place
to perform a redirect or return an excuse page when the error rate goes
beyond a certain threshold for a given URL, and give more chances to the
server to recover from this condition. Typically it could look like this
to bypass a URL causing more than 10 requests per second:
stick-table type string len 80 size 4k expire 1m store http_fail_rate(1m)
http-request track-sc0 base # track host+path, ignore query string
http-request return status 503 content-type text/html \
lf-file excuse.html if { sc0_http_fail_rate gt 10 }
A more advanced mechanism using gpt0 could even implement high/low rates
to disable/enable the service.
Reg-test converteers_ref_cnt_never_dec.vtc was updated to test it.
2021-02-10 06:07:15 -05:00
{ " table_http_fail_cnt " , sample_conv_table_http_fail_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_http_fail_rate " , sample_conv_table_http_fail_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2016-05-25 11:16:38 -04:00
{ " table_http_req_cnt " , sample_conv_table_http_req_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_http_req_rate " , sample_conv_table_http_req_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2022-08-16 12:11:25 -04:00
{ " table_idle " , sample_conv_table_idle , ARG2 ( 1 , TAB , SINT ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
2016-05-25 11:16:38 -04:00
{ " table_kbytes_in " , sample_conv_table_kbytes_in , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_kbytes_out " , sample_conv_table_kbytes_out , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_server_id " , sample_conv_table_server_id , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_sess_cnt " , sample_conv_table_sess_cnt , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_sess_rate " , sample_conv_table_sess_rate , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
{ " table_trackers " , sample_conv_table_trackers , ARG1 ( 1 , TAB ) , NULL , SMP_T_ANY , SMP_T_SINT } ,
MEDIUM: stick-table: add new converters to fetch table data
These new converters make it possible to look up any sample expression
in a table, and check whether an equivalent key exists or not, and if it
exists, to retrieve the associated data (eg: gpc0, request rate, etc...).
Till now it was only possible using tracking, but sometimes tracking is
not suited to only retrieving such counters, either because it's done too
early or because too many items need to be checked without necessarily
being tracked.
These converters all take a string on input, and then convert it again to
the table's type. This means that if an input sample is of type IPv4 and
the table is of type IP, it will first be converted to a string, then back
to an IP address. This is a limitation of the current design which does not
allow converters to declare that "any" type is supported on input. Since
strings are the only types which can be cast to any other one, this method
always works.
The following converters were added :
in_table, table_bytes_in_rate, table_bytes_out_rate, table_conn_cnt,
table_conn_cur, table_conn_rate, table_gpc0, table_gpc0_rate,
table_http_err_cnt, table_http_err_rate, table_http_req_cnt,
table_http_req_rate, table_kbytes_in, table_kbytes_out,
table_server_id, table_sess_cnt, table_sess_rate, table_trackers.
2014-07-10 08:03:38 -04:00
{ /* END */ } ,
} } ;
2018-11-25 13:14:37 -05:00
INITCALL1 ( STG_REGISTER , sample_register_convs , & sample_conv_kws ) ;
2023-01-06 10:09:58 -05:00
static struct cfg_kw_list cfg_kws = { { } , {
{ CFG_GLOBAL , " tune.stick-counters " , stk_parse_stick_counters } ,
{ /* END */ }
} } ;
INITCALL1 ( STG_REGISTER , cfg_register_keywords , & cfg_kws ) ;
2024-01-31 04:33:55 -05:00
# if defined(USE_PROMEX)
static int stk_promex_metric_info ( unsigned int id , struct promex_metric * metric , struct ist * desc )
{
switch ( id ) {
case STICKTABLE_SIZE :
* metric = ( struct promex_metric ) { . n = ist ( " size " ) , . type = PROMEX_MT_GAUGE , . flags = PROMEX_FL_MODULE_METRIC } ;
* desc = ist ( " Stick table size. " ) ;
break ;
case STICKTABLE_USED :
* metric = ( struct promex_metric ) { . n = ist ( " used " ) , . type = PROMEX_MT_GAUGE , . flags = PROMEX_FL_MODULE_METRIC } ;
* desc = ist ( " Number of entries used in this stick table. " ) ;
break ;
default :
return - 1 ;
}
return 1 ;
}
static void * stk_promex_start_ts ( void * unused , unsigned int id )
{
return stktables_list ;
}
2024-02-22 04:12:27 -05:00
static void * stk_promex_next_ts ( void * unused , void * metric_ctx , unsigned int id )
2024-01-31 04:33:55 -05:00
{
struct stktable * t = metric_ctx ;
return t - > next ;
}
static int stk_promex_fill_ts ( void * unused , void * metric_ctx , unsigned int id , struct promex_label * labels , struct field * field )
{
struct stktable * t = metric_ctx ;
if ( ! t - > size )
return 0 ;
labels [ 0 ] . name = ist ( " name " ) ;
2024-03-29 13:21:50 -04:00
labels [ 0 ] . value = ist ( t - > id ) ;
2024-01-31 04:33:55 -05:00
labels [ 1 ] . name = ist ( " type " ) ;
2024-03-29 13:21:50 -04:00
labels [ 1 ] . value = ist ( stktable_types [ t - > type ] . kw ) ;
2024-01-31 04:33:55 -05:00
switch ( id ) {
case STICKTABLE_SIZE :
* field = mkf_u32 ( FN_GAUGE , t - > size ) ;
break ;
case STICKTABLE_USED :
* field = mkf_u32 ( FN_GAUGE , t - > current ) ;
break ;
default :
return - 1 ;
}
return 1 ;
}
static struct promex_module promex_sticktable_module = {
. name = IST ( " sticktable " ) ,
. metric_info = stk_promex_metric_info ,
. start_ts = stk_promex_start_ts ,
. next_ts = stk_promex_next_ts ,
. fill_ts = stk_promex_fill_ts ,
. nb_metrics = STICKTABLE_TOTAL_FIELDS ,
} ;
INITCALL1 ( STG_REGISTER , promex_register_module , & promex_sticktable_module ) ;
# endif