2013-11-28 05:05:19 -05:00
|
|
|
/*
|
|
|
|
|
* Pattern management functions.
|
|
|
|
|
*
|
|
|
|
|
* Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <stdio.h>
|
2020-01-17 12:01:20 -05:00
|
|
|
#include <errno.h>
|
2013-11-28 05:05:19 -05:00
|
|
|
|
MEDIUM: migrate the patterns reference to cebs_tree
cebs_tree are 24 bytes smaller than ebst_tree (16B vs 40B), and pattern
references are only used during map/acl updates, so their storage is
pure loss between updates (which most of the time never happen). By
switching their indexing to compact trees, we can save 16 to 24 bytes
per entry depending on alightment (here it's 24 per struct but 16
practical as malloc's alignment keeps 8 unused).
Tested on core i7-8650U running at 3.0 GHz, with a file containing
17.7M IP addresses (16.7M different):
$ time ./haproxy -c -f acl-ip.cfg
Save 280 MB RAM for 17.7M IP addresses, and slightly speeds up the
startup (5.8%, from 19.2s to 18.2s), a part of which possible being
attributed to having to write less memory. Note that this is on small
strings. On larger ones such as user-agents, ebtree doesn't reread
the whole key and might be more efficient.
Before:
RAM (VSZ/RSS): 4443912 3912444
real 0m19.211s
user 0m18.138s
sys 0m1.068s
Overhead Command Shared Object Symbol
44.79% haproxy haproxy [.] ebst_insert
25.07% haproxy haproxy [.] ebmb_insert_prefix
3.44% haproxy libc-2.33.so [.] __libc_calloc
2.71% haproxy libc-2.33.so [.] _int_malloc
2.33% haproxy haproxy [.] free_pattern_tree
1.78% haproxy libc-2.33.so [.] inet_pton4
1.62% haproxy libc-2.33.so [.] _IO_fgets
1.58% haproxy libc-2.33.so [.] _int_free
1.56% haproxy haproxy [.] pat_ref_push
1.35% haproxy libc-2.33.so [.] malloc_consolidate
1.16% haproxy libc-2.33.so [.] __strlen_avx2
0.79% haproxy haproxy [.] pat_idx_tree_ip
0.76% haproxy haproxy [.] pat_ref_read_from_file
0.60% haproxy libc-2.33.so [.] __strrchr_avx2
0.55% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.54% haproxy libc-2.33.so [.] __memchr_avx2
0.46% haproxy haproxy [.] pat_ref_append
After:
RAM (VSZ/RSS): 4166108 3634768
real 0m18.114s
user 0m17.113s
sys 0m0.996s
Overhead Command Shared Object Symbol
38.99% haproxy haproxy [.] cebs_insert
27.09% haproxy haproxy [.] ebmb_insert_prefix
3.63% haproxy libc-2.33.so [.] __libc_calloc
3.18% haproxy libc-2.33.so [.] _int_malloc
2.69% haproxy haproxy [.] free_pattern_tree
1.99% haproxy libc-2.33.so [.] inet_pton4
1.74% haproxy libc-2.33.so [.] _IO_fgets
1.73% haproxy libc-2.33.so [.] _int_free
1.57% haproxy haproxy [.] pat_ref_push
1.48% haproxy libc-2.33.so [.] malloc_consolidate
1.22% haproxy libc-2.33.so [.] __strlen_avx2
1.05% haproxy libc-2.33.so [.] __strcmp_avx2
0.80% haproxy haproxy [.] pat_idx_tree_ip
0.74% haproxy libc-2.33.so [.] __memchr_avx2
0.69% haproxy libc-2.33.so [.] __strrchr_avx2
0.69% haproxy libc-2.33.so [.] _IO_getline_info
0.62% haproxy haproxy [.] pat_ref_read_from_file
0.56% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.56% haproxy libc-2.33.so [.] cfree@GLIBC_2.2.5
0.46% haproxy haproxy [.] pat_ref_append
If the addresses are totally disordered (via "shuf" on the input file),
we see both implementations reach exactly 68.0s (slower due to much
higher cache miss ratio).
On large strings such as user agents (1 million here), it's now slightly
slower (+9%):
Before:
real 0m2.475s
user 0m2.316s
sys 0m0.155s
After:
real 0m2.696s
user 0m2.544s
sys 0m0.147s
But such patterns are much less common than short ones, and the memory
savings do still count.
Note that while it could be tempting to get rid of the list that chains
all these pat_ref_elt together and only enumerate them by walking along
the tree to save 16 extra bytes per entry, that's not possible due to
the problem that insertion ordering is critical (think overlapping regex
such as /index.* and /index.html). Currently it's not possible to proceed
differently because patterns are first pre-loaded into the pat_ref via
pat_ref_read_from_file_smp() and later indexed by pattern_read_from_file(),
which has to only redo the second part anyway for maps/acls declared
multiple times.
2025-01-12 13:38:28 -05:00
|
|
|
#include <import/cebs_tree.h>
|
2025-12-17 23:31:29 -05:00
|
|
|
#include <import/ceb32_tree.h>
|
2023-08-22 10:52:47 -04:00
|
|
|
#include <import/ebistree.h>
|
|
|
|
|
#include <import/ebpttree.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <import/ebsttree.h>
|
|
|
|
|
#include <import/lru.h>
|
|
|
|
|
|
2020-05-27 06:58:42 -04:00
|
|
|
#include <haproxy/api.h>
|
2020-06-04 11:05:57 -04:00
|
|
|
#include <haproxy/global.h>
|
2020-06-04 16:01:04 -04:00
|
|
|
#include <haproxy/log.h>
|
2020-06-02 10:48:09 -04:00
|
|
|
#include <haproxy/net_helper.h>
|
2020-06-04 09:06:28 -04:00
|
|
|
#include <haproxy/pattern.h>
|
2020-06-02 11:32:26 -04:00
|
|
|
#include <haproxy/regex.h>
|
2020-06-04 09:33:47 -04:00
|
|
|
#include <haproxy/sample.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/tools.h>
|
2021-09-11 11:51:13 -04:00
|
|
|
#include <haproxy/xxhash.h>
|
2013-11-28 05:05:19 -05:00
|
|
|
|
|
|
|
|
|
2025-12-17 23:31:29 -05:00
|
|
|
/* Convenience macros for iterating over generations. */
|
|
|
|
|
#define pat_ref_gen_foreach(gen, ref) \
|
|
|
|
|
for (gen = cebu32_item_first(&ref->gen_root, gen_node, gen_id, struct pat_ref_gen); \
|
|
|
|
|
gen; \
|
|
|
|
|
gen = cebu32_item_next(&ref->gen_root, gen_node, gen_id, gen))
|
|
|
|
|
|
|
|
|
|
/* Safe variant that allows deleting an entry in the body of the loop. */
|
|
|
|
|
#define pat_ref_gen_foreach_safe(gen, next, ref) \
|
|
|
|
|
for (gen = cebu32_item_first(&ref->gen_root, gen_node, gen_id, struct pat_ref_gen); \
|
|
|
|
|
gen && (next = cebu32_item_next(&ref->gen_root, gen_node, gen_id, gen), 1); \
|
|
|
|
|
gen = next)
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
const char *const pat_match_names[PAT_MATCH_NUM] = {
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_FOUND] = "found",
|
|
|
|
|
[PAT_MATCH_BOOL] = "bool",
|
|
|
|
|
[PAT_MATCH_INT] = "int",
|
|
|
|
|
[PAT_MATCH_IP] = "ip",
|
|
|
|
|
[PAT_MATCH_BIN] = "bin",
|
|
|
|
|
[PAT_MATCH_LEN] = "len",
|
|
|
|
|
[PAT_MATCH_STR] = "str",
|
|
|
|
|
[PAT_MATCH_BEG] = "beg",
|
|
|
|
|
[PAT_MATCH_SUB] = "sub",
|
|
|
|
|
[PAT_MATCH_DIR] = "dir",
|
|
|
|
|
[PAT_MATCH_DOM] = "dom",
|
|
|
|
|
[PAT_MATCH_END] = "end",
|
|
|
|
|
[PAT_MATCH_REG] = "reg",
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = "regm",
|
2013-11-28 05:05:19 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
int (*const pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = {
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_FOUND] = pat_parse_nothing,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_parse_nothing,
|
|
|
|
|
[PAT_MATCH_INT] = pat_parse_int,
|
|
|
|
|
[PAT_MATCH_IP] = pat_parse_ip,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_parse_bin,
|
2014-01-27 08:19:53 -05:00
|
|
|
[PAT_MATCH_LEN] = pat_parse_int,
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_STR] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_BEG] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_SUB] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_END] = pat_parse_str,
|
|
|
|
|
[PAT_MATCH_REG] = pat_parse_reg,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = pat_parse_reg,
|
2013-11-28 05:05:19 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
int (*const pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = {
|
2013-12-13 09:12:32 -05:00
|
|
|
[PAT_MATCH_FOUND] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_INT] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_IP] = pat_idx_tree_ip,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_idx_list_ptr,
|
|
|
|
|
[PAT_MATCH_LEN] = pat_idx_list_val,
|
|
|
|
|
[PAT_MATCH_STR] = pat_idx_tree_str,
|
2014-05-10 02:53:48 -04:00
|
|
|
[PAT_MATCH_BEG] = pat_idx_tree_pfx,
|
2013-12-13 09:12:32 -05:00
|
|
|
[PAT_MATCH_SUB] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_END] = pat_idx_list_str,
|
|
|
|
|
[PAT_MATCH_REG] = pat_idx_list_reg,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = pat_idx_list_regm,
|
2013-12-13 09:12:32 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
void (*const pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
|
2020-11-02 13:26:02 -05:00
|
|
|
[PAT_MATCH_FOUND] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_INT] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_IP] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_LEN] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_STR] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_BEG] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_SUB] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_END] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_REG] = pat_prune_gen,
|
|
|
|
|
[PAT_MATCH_REGM] = pat_prune_gen,
|
2014-01-14 10:24:51 -05:00
|
|
|
};
|
|
|
|
|
|
2021-04-10 11:44:27 -04:00
|
|
|
struct pattern *(*const pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = {
|
2013-11-28 12:22:00 -05:00
|
|
|
[PAT_MATCH_FOUND] = NULL,
|
|
|
|
|
[PAT_MATCH_BOOL] = pat_match_nothing,
|
|
|
|
|
[PAT_MATCH_INT] = pat_match_int,
|
|
|
|
|
[PAT_MATCH_IP] = pat_match_ip,
|
|
|
|
|
[PAT_MATCH_BIN] = pat_match_bin,
|
|
|
|
|
[PAT_MATCH_LEN] = pat_match_len,
|
|
|
|
|
[PAT_MATCH_STR] = pat_match_str,
|
|
|
|
|
[PAT_MATCH_BEG] = pat_match_beg,
|
|
|
|
|
[PAT_MATCH_SUB] = pat_match_sub,
|
|
|
|
|
[PAT_MATCH_DIR] = pat_match_dir,
|
|
|
|
|
[PAT_MATCH_DOM] = pat_match_dom,
|
|
|
|
|
[PAT_MATCH_END] = pat_match_end,
|
|
|
|
|
[PAT_MATCH_REG] = pat_match_reg,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = pat_match_regm,
|
2013-11-28 05:05:19 -05:00
|
|
|
};
|
|
|
|
|
|
2013-12-06 09:36:54 -05:00
|
|
|
/* Just used for checking configuration compatibility */
|
2021-04-10 11:44:27 -04:00
|
|
|
int const pat_match_types[PAT_MATCH_NUM] = {
|
2015-07-06 17:43:03 -04:00
|
|
|
[PAT_MATCH_FOUND] = SMP_T_SINT,
|
|
|
|
|
[PAT_MATCH_BOOL] = SMP_T_SINT,
|
|
|
|
|
[PAT_MATCH_INT] = SMP_T_SINT,
|
2013-12-06 09:36:54 -05:00
|
|
|
[PAT_MATCH_IP] = SMP_T_ADDR,
|
2013-12-16 18:20:33 -05:00
|
|
|
[PAT_MATCH_BIN] = SMP_T_BIN,
|
|
|
|
|
[PAT_MATCH_LEN] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_STR] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_BEG] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_SUB] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_DIR] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_DOM] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_END] = SMP_T_STR,
|
|
|
|
|
[PAT_MATCH_REG] = SMP_T_STR,
|
2016-02-10 16:55:20 -05:00
|
|
|
[PAT_MATCH_REGM] = SMP_T_STR,
|
2013-12-06 09:36:54 -05:00
|
|
|
};
|
|
|
|
|
|
2014-01-17 09:25:13 -05:00
|
|
|
/* this struct is used to return information */
|
2017-07-03 05:34:05 -04:00
|
|
|
static THREAD_LOCAL struct pattern static_pattern;
|
|
|
|
|
static THREAD_LOCAL struct sample_data static_sample_data;
|
2014-01-17 09:25:13 -05:00
|
|
|
|
2025-04-02 14:44:22 -04:00
|
|
|
/* This is the root of the list of all available pattern_ref values. */
|
2014-02-11 05:31:40 -05:00
|
|
|
struct list pattern_reference = LIST_HEAD_INIT(pattern_reference);
|
|
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
static THREAD_LOCAL struct lru64_head *pat_lru_tree;
|
2021-04-10 11:42:04 -04:00
|
|
|
static unsigned long long pat_lru_seed __read_mostly;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2025-07-04 18:07:25 -04:00
|
|
|
unsigned long long patterns_added = 0;
|
|
|
|
|
unsigned long long patterns_freed = 0;
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
/*
|
2014-01-21 05:25:41 -05:00
|
|
|
*
|
|
|
|
|
* The following functions are not exported and are used by internals process
|
|
|
|
|
* of pattern matching
|
|
|
|
|
*
|
2013-11-28 05:05:19 -05:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* Background: Fast way to find a zero byte in a word
|
|
|
|
|
* http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
|
|
|
|
|
* hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL;
|
|
|
|
|
*
|
|
|
|
|
* To look for 4 different byte values, xor the word with those bytes and
|
|
|
|
|
* then check for zero bytes:
|
|
|
|
|
*
|
|
|
|
|
* v = (((unsigned char)c * 0x1010101U) ^ delimiter)
|
|
|
|
|
* where <delimiter> is the 4 byte values to look for (as an uint)
|
|
|
|
|
* and <c> is the character that is being tested
|
|
|
|
|
*/
|
|
|
|
|
static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
|
|
|
|
|
{
|
|
|
|
|
mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
|
|
|
|
|
return (mask - 0x01010101) & ~mask & 0x80808080U;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4)
|
|
|
|
|
{
|
|
|
|
|
return d1 << 24 | d2 << 16 | d3 << 8 | d4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* These functions are exported and may be used by any other component.
|
|
|
|
|
*
|
2014-08-29 09:19:33 -04:00
|
|
|
* The following functions are used for parsing pattern matching input value.
|
|
|
|
|
* The <text> contain the string to be parsed. <pattern> must be a preallocated
|
|
|
|
|
* pattern. The pat_parse_* functions fill this structure with the parsed value.
|
|
|
|
|
* <err> is filled with an error message built with memprintf() function. It is
|
|
|
|
|
* allowed to use a trash as a temporary storage for the returned pattern, as
|
|
|
|
|
* the next call after these functions will be pat_idx_*.
|
2014-01-21 05:25:41 -05:00
|
|
|
*
|
2014-08-29 09:19:33 -04:00
|
|
|
* In success case, the pat_parse_* function returns 1. If the function
|
|
|
|
|
* fails, it returns 0 and <err> is filled.
|
2014-01-21 05:25:41 -05:00
|
|
|
*/
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* ignore the current line */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_nothing(const char *text, struct pattern *pattern, int mflags, char **err)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* Parse a string. It is allocated and duplicated. */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_str(const char *text, struct pattern *pattern, int mflags, char **err)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 18:20:33 -05:00
|
|
|
pattern->type = SMP_T_STR;
|
2013-12-13 09:36:59 -05:00
|
|
|
pattern->ptr.str = (char *)text;
|
2014-01-24 04:58:12 -05:00
|
|
|
pattern->len = strlen(text);
|
2014-01-21 05:25:41 -05:00
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* Parse a binary written in hexa. It is allocated. */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_bin(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2018-07-13 05:56:34 -04:00
|
|
|
struct buffer *trash;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-16 18:20:33 -05:00
|
|
|
pattern->type = SMP_T_BIN;
|
2014-01-21 05:25:41 -05:00
|
|
|
trash = get_trash_chunk();
|
|
|
|
|
pattern->len = trash->size;
|
2018-07-13 04:54:26 -04:00
|
|
|
pattern->ptr.str = trash->area;
|
2014-01-24 04:58:12 -05:00
|
|
|
return !!parse_binary(text, &pattern->ptr.str, &pattern->len, err);
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse a regex. It is allocated. */
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_reg(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-29 13:35:16 -05:00
|
|
|
pattern->ptr.str = (char *)text;
|
2013-11-28 05:05:19 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse a range of positive integers delimited by either ':' or '-'. If only
|
|
|
|
|
* one integer is read, it is set as both min and max. An operator may be
|
|
|
|
|
* specified as the prefix, among this list of 5 :
|
|
|
|
|
*
|
|
|
|
|
* 0:eq, 1:gt, 2:ge, 3:lt, 4:le
|
|
|
|
|
*
|
|
|
|
|
* The default operator is "eq". It supports range matching. Ranges are
|
|
|
|
|
* rejected for other operators. The operator may be changed at any time.
|
|
|
|
|
* The operator is stored in the 'opaque' argument.
|
|
|
|
|
*
|
|
|
|
|
* If err is non-NULL, an error message will be returned there on errors and
|
2014-01-23 11:40:34 -05:00
|
|
|
* the caller will have to free it. The function returns zero on error, and
|
|
|
|
|
* non-zero on success.
|
2013-11-28 05:05:19 -05:00
|
|
|
*
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_int(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-24 04:58:12 -05:00
|
|
|
const char *ptr = text;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2015-07-06 17:43:03 -04:00
|
|
|
pattern->type = SMP_T_SINT;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* Empty string is not valid */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (!*text)
|
2014-01-23 11:40:34 -05:00
|
|
|
goto not_valid_range;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* Search ':' or '-' separator. */
|
|
|
|
|
while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
|
|
|
|
|
ptr++;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If separator not found. */
|
|
|
|
|
if (!*ptr) {
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a number", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.max = pattern->val.range.min;
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If the separator is the first character. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (ptr == text && *(ptr + 1) != '\0') {
|
2014-01-23 11:40:34 -05:00
|
|
|
if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
|
|
|
|
|
goto not_valid_range;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.min_set = 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->val.range.max_set = 1;
|
2014-01-23 11:40:34 -05:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If separator is the last character. */
|
|
|
|
|
if (*(ptr + 1) == '\0') {
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
|
2014-01-23 11:40:34 -05:00
|
|
|
goto not_valid_range;
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 0;
|
2014-01-23 11:40:34 -05:00
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
|
|
|
|
|
/* Else, parse two numbers. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
|
2014-01-23 11:40:34 -05:00
|
|
|
goto not_valid_range;
|
|
|
|
|
|
|
|
|
|
if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
|
|
|
|
|
goto not_valid_range;
|
|
|
|
|
|
|
|
|
|
if (pattern->val.range.min > pattern->val.range.max)
|
|
|
|
|
goto not_valid_range;
|
|
|
|
|
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
not_valid_range:
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse a range of positive 2-component versions delimited by either ':' or
|
|
|
|
|
* '-'. The version consists in a major and a minor, both of which must be
|
|
|
|
|
* smaller than 65536, because internally they will be represented as a 32-bit
|
|
|
|
|
* integer.
|
|
|
|
|
* If only one version is read, it is set as both min and max. Just like for
|
|
|
|
|
* pure integers, an operator may be specified as the prefix, among this list
|
|
|
|
|
* of 5 :
|
|
|
|
|
*
|
|
|
|
|
* 0:eq, 1:gt, 2:ge, 3:lt, 4:le
|
|
|
|
|
*
|
|
|
|
|
* The default operator is "eq". It supports range matching. Ranges are
|
|
|
|
|
* rejected for other operators. The operator may be changed at any time.
|
|
|
|
|
* The operator is stored in the 'opaque' argument. This allows constructs
|
|
|
|
|
* such as the following one :
|
|
|
|
|
*
|
|
|
|
|
* acl obsolete_ssl ssl_req_proto lt 3
|
|
|
|
|
* acl unsupported_ssl ssl_req_proto gt 3.1
|
|
|
|
|
* acl valid_ssl ssl_req_proto 3.0-3.1
|
|
|
|
|
*
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_dotted_ver(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-24 04:58:12 -05:00
|
|
|
const char *ptr = text;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2015-07-06 17:43:03 -04:00
|
|
|
pattern->type = SMP_T_SINT;
|
2014-01-23 11:40:34 -05:00
|
|
|
|
|
|
|
|
/* Search ':' or '-' separator. */
|
|
|
|
|
while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
|
|
|
|
|
ptr++;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If separator not found. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (*ptr == '\0' && ptr > text) {
|
|
|
|
|
if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a dotted number", text);
|
2013-11-28 05:05:19 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.max = pattern->val.range.min;
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If the separator is the first character. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (ptr == text && *(ptr+1) != '\0') {
|
2014-01-23 11:40:34 -05:00
|
|
|
if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
pattern->val.range.min_set = 0;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2014-01-23 11:40:34 -05:00
|
|
|
/* If separator is the last character. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (ptr == &text[strlen(text)-1]) {
|
|
|
|
|
if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 0;
|
2014-01-23 11:40:34 -05:00
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
2014-01-23 11:40:34 -05:00
|
|
|
|
|
|
|
|
/* Else, parse two numbers. */
|
2014-01-24 04:58:12 -05:00
|
|
|
if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
|
|
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
if (pattern->val.range.min > pattern->val.range.max) {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid dotted number range", text);
|
2014-01-23 11:40:34 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
pattern->val.range.min_set = 1;
|
|
|
|
|
pattern->val.range.max_set = 1;
|
|
|
|
|
return 1;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse an IP address and an optional mask in the form addr[/mask].
|
|
|
|
|
* The addr may either be an IPv4 address or a hostname. The mask
|
|
|
|
|
* may either be a dotted mask or a number of bits. Returns 1 if OK,
|
|
|
|
|
* otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
int pat_parse_ip(const char *text, struct pattern *pattern, int mflags, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-04-28 05:18:57 -04:00
|
|
|
if (str2net(text, !(mflags & PAT_MF_NO_DNS) && (global.mode & MODE_STARTING),
|
2014-02-11 09:23:04 -05:00
|
|
|
&pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->type = SMP_T_IPV4;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2014-01-24 04:58:12 -05:00
|
|
|
else if (str62net(text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
|
2013-11-28 05:05:19 -05:00
|
|
|
pattern->type = SMP_T_IPV6;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
else {
|
2014-01-24 04:58:12 -05:00
|
|
|
memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", text);
|
2013-11-28 05:05:19 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* These functions are exported and may be used by any other component.
|
|
|
|
|
*
|
2018-11-15 13:22:31 -05:00
|
|
|
* This function just takes a sample <smp> and checks if this sample matches
|
|
|
|
|
* with the pattern <pattern>. This function returns only PAT_MATCH or
|
2014-01-21 05:25:41 -05:00
|
|
|
* PAT_NOMATCH.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* always return false */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_nothing(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2015-08-19 03:07:19 -04:00
|
|
|
if (smp->data.u.sint) {
|
2014-03-17 14:53:10 -04:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = NULL;
|
2014-03-17 14:53:10 -04:00
|
|
|
static_pattern.ref = NULL;
|
|
|
|
|
static_pattern.type = 0;
|
|
|
|
|
static_pattern.ptr.str = NULL;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: pattern: prevent uninitialized reads in pat_match_{str,beg}
Using valgrind when running map_beg or map_str, the following error is
reported:
==242644== Conditional jump or move depends on uninitialised value(s)
==242644== at 0x2E4AB1: pat_match_str (pattern.c:457)
==242644== by 0x2E81ED: pattern_exec_match (pattern.c:2560)
==242644== by 0x343176: sample_conv_map (map.c:211)
==242644== by 0x27522F: sample_process_cnv (sample.c:1330)
==242644== by 0x2752DB: sample_process (sample.c:1373)
==242644== by 0x319917: action_store (vars.c:814)
==242644== by 0x24D451: http_req_get_intercept_rule (http_ana.c:2697)
In fact, the error is legit, because in pat_match_{beg,str}, we
dereference the buffer on len+1 to check if a value was previously set,
and then decide to force NULL-byte if it wasn't set.
But the approach is no longer compatible with current architecture:
data past str.data is not guaranteed to be initialized in the buffer.
Thus we cannot dereference the value, else we expose us to uninitialized
read errors. Moreover, the check is useless, because we systematically
set the ending byte to 0 when the conditions are met.
Finally, restoring the older value after the lookup is not relevant:
indeed, either the sample is marked as const and in such case it
is already duplicated, or the sample is not const and we forcefully add
a terminating NULL byte outside from the actual string bytes (since we're
past str.data), so as we didn't alter effective string data and that data
past str.data cannot be dereferenced anyway as it isn't guaranteed to be
initialized, there's no point in restoring previous uninitialized data.
It could be backported in all stable versions. But since this was only
detected by valgrind and isn't known to cause issues in existing
deployments, it's probably better to wait a bit before backporting it
to avoid any breakage.. although the fix should be theoretically harmless.
2024-09-06 10:33:15 -04:00
|
|
|
/* ensure the input sample can be read as a string without knowing its size,
|
|
|
|
|
* that is, ensure the terminating null byte is there
|
|
|
|
|
*
|
|
|
|
|
* The function may fail. Returns 1 on success and 0 on failure
|
|
|
|
|
*/
|
|
|
|
|
static inline int pat_match_ensure_str(struct sample *smp)
|
|
|
|
|
{
|
|
|
|
|
if (smp->data.u.str.data < smp->data.u.str.size) {
|
|
|
|
|
/* we have to force a trailing zero on the test pattern and
|
|
|
|
|
* the buffer is large enough to accommodate it. If the flag
|
|
|
|
|
* CONST is set, duplicate the string
|
|
|
|
|
*/
|
|
|
|
|
if (smp->flags & SMP_F_CONST) {
|
|
|
|
|
if (!smp_dup(smp))
|
|
|
|
|
return 0;
|
|
|
|
|
} else
|
|
|
|
|
smp->data.u.str.area[smp->data.u.str.data] = '\0';
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* Otherwise, the sample is duplicated. A trailing zero
|
|
|
|
|
* is automatically added to the string.
|
|
|
|
|
*/
|
|
|
|
|
if (!smp_dup(smp))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2018-11-15 13:22:31 -05:00
|
|
|
/* NB: For two strings to be identical, it is required that their length match */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
|
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
2013-12-16 08:22:13 -05:00
|
|
|
|
|
|
|
|
/* Lookup a string in the expression's pattern tree. */
|
|
|
|
|
if (!eb_is_empty(&expr->pattern_tree)) {
|
BUG/MEDIUM: pattern: prevent uninitialized reads in pat_match_{str,beg}
Using valgrind when running map_beg or map_str, the following error is
reported:
==242644== Conditional jump or move depends on uninitialised value(s)
==242644== at 0x2E4AB1: pat_match_str (pattern.c:457)
==242644== by 0x2E81ED: pattern_exec_match (pattern.c:2560)
==242644== by 0x343176: sample_conv_map (map.c:211)
==242644== by 0x27522F: sample_process_cnv (sample.c:1330)
==242644== by 0x2752DB: sample_process (sample.c:1373)
==242644== by 0x319917: action_store (vars.c:814)
==242644== by 0x24D451: http_req_get_intercept_rule (http_ana.c:2697)
In fact, the error is legit, because in pat_match_{beg,str}, we
dereference the buffer on len+1 to check if a value was previously set,
and then decide to force NULL-byte if it wasn't set.
But the approach is no longer compatible with current architecture:
data past str.data is not guaranteed to be initialized in the buffer.
Thus we cannot dereference the value, else we expose us to uninitialized
read errors. Moreover, the check is useless, because we systematically
set the ending byte to 0 when the conditions are met.
Finally, restoring the older value after the lookup is not relevant:
indeed, either the sample is marked as const and in such case it
is already duplicated, or the sample is not const and we forcefully add
a terminating NULL byte outside from the actual string bytes (since we're
past str.data), so as we didn't alter effective string data and that data
past str.data cannot be dereferenced anyway as it isn't guaranteed to be
initialized, there's no point in restoring previous uninitialized data.
It could be backported in all stable versions. But since this was only
detected by valgrind and isn't known to cause issues in existing
deployments, it's probably better to wait a bit before backporting it
to avoid any breakage.. although the fix should be theoretically harmless.
2024-09-06 10:33:15 -04:00
|
|
|
if (!pat_match_ensure_str(smp))
|
|
|
|
|
return NULL;
|
2020-06-30 12:52:32 -04:00
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
node = ebst_lookup(&expr->pattern_tree, smp->data.u.str.area);
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
BUG/MEDIUM: pattern: only visit equivalent nodes when skipping versions
Miroslav reported in issue #1802 a problem that affects atomic map/acl
updates. During an update, incorrect versions are properly skipped, but
in order to do so, we rely on ebmb_next() instead of ebmb_next_dup().
This means that if a new matching entry is in the process of being
added and is the first one to succeed in the lookup, we'll skip it due
to its version and use the next entry regardless of its value provided
that it has the correct version. For IP addresses and string prefixes
it's particularly visible because a lookup may match a new longer prefix
that's not yet committed (e.g. 11.0.0.1 would match 11/8 when 10/7 was
the only committed one), and skipping it could end up on 12/8 for
example. As soon as a commit for the last version happens, the issue
disappears.
This problem only affects tree-based matches: the "str", "ip", and "beg"
matches.
Here we replace the ebmb_next() values with ebmb_next_dup() for exact
string matches, and with ebmb_lookup_shorter() for longest matches,
which will first visit duplicates, then look for shorter prefixes. This
relies on previous commit:
MINOR: ebtree: add ebmb_lookup_shorter() to pursue lookups
Both need to be backported to 2.4, where the generation ID was added.
Note that nowadays a simpler and more efficient approach might be employed,
by having a single version in the current tree, and a list of trees per
version. Manipulations would look up the tree version and work (and lock)
only in the relevant trees, while normal operations would be performed on
the current tree only. Committing would just be a matter of swapping tree
roots and deleting old trees contents.
2022-08-01 05:46:27 -04:00
|
|
|
node = ebmb_next_dup(node);
|
2020-10-29 04:41:34 -04:00
|
|
|
continue;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = elt->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
static_pattern.ref = elt->ref;
|
2014-04-28 05:18:57 -04:00
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
2013-12-16 08:22:13 -05:00
|
|
|
static_pattern.type = SMP_T_STR;
|
|
|
|
|
static_pattern.ptr.str = (char *)elt->node.key;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
/* look in the list */
|
2024-11-03 12:42:26 -05:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns) && expr->ref->entry_cnt >= 20) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
|
|
|
|
|
2017-07-03 05:34:05 -04:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len != smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) ||
|
|
|
|
|
(!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0)) {
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
|
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NB: For two binaries buf to be identical, it is required that their lengths match */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2024-11-03 12:42:26 -05:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns) && expr->ref->entry_cnt >= 20) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len != smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (memcmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) {
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
|
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
2016-02-10 16:55:20 -05:00
|
|
|
/* Executes a regex. It temporarily changes the data to add a trailing zero,
|
|
|
|
|
* and restores the previous character when leaving. This function fills
|
|
|
|
|
* a matching array.
|
|
|
|
|
*/
|
|
|
|
|
struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data,
|
2016-02-10 16:55:20 -05:00
|
|
|
MAX_MATCH, pmatch, 0)) {
|
|
|
|
|
ret = pattern;
|
|
|
|
|
smp->ctx.a[0] = pmatch;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-21 05:25:41 -05:00
|
|
|
/* Executes a regex. It temporarily changes the data to add a trailing zero,
|
|
|
|
|
* and restores the previous character when leaving.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2024-11-03 12:42:26 -05:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns) && expr->ref->entry_cnt >= 5) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (regex_exec2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data)) {
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern matches the beginning of the tested string. */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
2014-05-10 02:53:48 -04:00
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2014-05-10 02:53:48 -04:00
|
|
|
/* Lookup a string in the expression's pattern tree. */
|
|
|
|
|
if (!eb_is_empty(&expr->pattern_tree)) {
|
BUG/MEDIUM: pattern: prevent uninitialized reads in pat_match_{str,beg}
Using valgrind when running map_beg or map_str, the following error is
reported:
==242644== Conditional jump or move depends on uninitialised value(s)
==242644== at 0x2E4AB1: pat_match_str (pattern.c:457)
==242644== by 0x2E81ED: pattern_exec_match (pattern.c:2560)
==242644== by 0x343176: sample_conv_map (map.c:211)
==242644== by 0x27522F: sample_process_cnv (sample.c:1330)
==242644== by 0x2752DB: sample_process (sample.c:1373)
==242644== by 0x319917: action_store (vars.c:814)
==242644== by 0x24D451: http_req_get_intercept_rule (http_ana.c:2697)
In fact, the error is legit, because in pat_match_{beg,str}, we
dereference the buffer on len+1 to check if a value was previously set,
and then decide to force NULL-byte if it wasn't set.
But the approach is no longer compatible with current architecture:
data past str.data is not guaranteed to be initialized in the buffer.
Thus we cannot dereference the value, else we expose us to uninitialized
read errors. Moreover, the check is useless, because we systematically
set the ending byte to 0 when the conditions are met.
Finally, restoring the older value after the lookup is not relevant:
indeed, either the sample is marked as const and in such case it
is already duplicated, or the sample is not const and we forcefully add
a terminating NULL byte outside from the actual string bytes (since we're
past str.data), so as we didn't alter effective string data and that data
past str.data cannot be dereferenced anyway as it isn't guaranteed to be
initialized, there's no point in restoring previous uninitialized data.
It could be backported in all stable versions. But since this was only
detected by valgrind and isn't known to cause issues in existing
deployments, it's probably better to wait a bit before backporting it
to avoid any breakage.. although the fix should be theoretically harmless.
2024-09-06 10:33:15 -04:00
|
|
|
if (!pat_match_ensure_str(smp))
|
|
|
|
|
return NULL;
|
2020-06-30 12:52:32 -04:00
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
node = ebmb_lookup_longest(&expr->pattern_tree,
|
|
|
|
|
smp->data.u.str.area);
|
2014-05-10 02:53:48 -04:00
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
BUG/MEDIUM: pattern: only visit equivalent nodes when skipping versions
Miroslav reported in issue #1802 a problem that affects atomic map/acl
updates. During an update, incorrect versions are properly skipped, but
in order to do so, we rely on ebmb_next() instead of ebmb_next_dup().
This means that if a new matching entry is in the process of being
added and is the first one to succeed in the lookup, we'll skip it due
to its version and use the next entry regardless of its value provided
that it has the correct version. For IP addresses and string prefixes
it's particularly visible because a lookup may match a new longer prefix
that's not yet committed (e.g. 11.0.0.1 would match 11/8 when 10/7 was
the only committed one), and skipping it could end up on 12/8 for
example. As soon as a commit for the last version happens, the issue
disappears.
This problem only affects tree-based matches: the "str", "ip", and "beg"
matches.
Here we replace the ebmb_next() values with ebmb_next_dup() for exact
string matches, and with ebmb_lookup_shorter() for longest matches,
which will first visit duplicates, then look for shorter prefixes. This
relies on previous commit:
MINOR: ebtree: add ebmb_lookup_shorter() to pursue lookups
Both need to be backported to 2.4, where the generation ID was added.
Note that nowadays a simpler and more efficient approach might be employed,
by having a single version in the current tree, and a list of trees per
version. Manipulations would look up the tree version and work (and lock)
only in the relevant trees, while normal operations would be performed on
the current tree only. Committing would just be a matter of swapping tree
roots and deleting old trees contents.
2022-08-01 05:46:27 -04:00
|
|
|
node = ebmb_lookup_shorter(node);
|
2020-10-29 04:41:34 -04:00
|
|
|
continue;
|
|
|
|
|
}
|
2014-05-10 02:53:48 -04:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = elt->data;
|
2014-05-10 02:53:48 -04:00
|
|
|
static_pattern.ref = elt->ref;
|
|
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
|
|
|
|
static_pattern.type = SMP_T_STR;
|
|
|
|
|
static_pattern.ptr.str = (char *)elt->node.key;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* look in the list */
|
2024-11-03 12:42:26 -05:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns) && expr->ref->entry_cnt >= 20) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
|
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len > smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0) ||
|
|
|
|
|
(!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0))
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern matches the end of the tested string. */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2024-11-03 12:42:26 -05:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns) && expr->ref->entry_cnt >= 20) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len > smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0) ||
|
|
|
|
|
(!icase && strncmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0))
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2015-04-29 10:24:50 -04:00
|
|
|
ret = pattern;
|
|
|
|
|
break;
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern is included inside the tested string.
|
|
|
|
|
* NB: Suboptimal, should be rewritten using a Boyer-Moore method.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int icase;
|
|
|
|
|
char *end;
|
|
|
|
|
char *c;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
2015-04-29 10:24:50 -04:00
|
|
|
struct pattern *ret = NULL;
|
|
|
|
|
struct lru64 *lru = NULL;
|
|
|
|
|
|
2024-11-03 12:42:26 -05:00
|
|
|
if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns) && expr->ref->entry_cnt >= 20) {
|
2015-05-04 11:18:42 -04:00
|
|
|
unsigned long long seed = pat_lru_seed ^ (long)expr;
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-12-22 07:22:34 -05:00
|
|
|
lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
|
2020-11-02 09:26:51 -05:00
|
|
|
pat_lru_tree, expr, expr->ref->revision);
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru && lru->domain) {
|
2017-07-03 05:34:05 -04:00
|
|
|
ret = lru->data;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2013-12-16 08:22:13 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pattern->len > smp->data.u.str.data)
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
end = smp->data.u.str.area + smp->data.u.str.data - pattern->len;
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = expr->mflags & PAT_MF_IGNORE_CASE;
|
2013-12-16 08:22:13 -05:00
|
|
|
if (icase) {
|
2018-07-13 04:54:26 -04:00
|
|
|
for (c = smp->data.u.str.area; c <= end; c++) {
|
2020-07-05 15:46:32 -04:00
|
|
|
if (tolower((unsigned char)*c) != tolower((unsigned char)*pattern->ptr.str))
|
2013-12-16 08:22:13 -05:00
|
|
|
continue;
|
2015-04-29 10:24:50 -04:00
|
|
|
if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) {
|
|
|
|
|
ret = pattern;
|
|
|
|
|
goto leave;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-07-13 04:54:26 -04:00
|
|
|
for (c = smp->data.u.str.area; c <= end; c++) {
|
2013-12-16 08:22:13 -05:00
|
|
|
if (*c != *pattern->ptr.str)
|
|
|
|
|
continue;
|
2015-04-29 10:24:50 -04:00
|
|
|
if (strncmp(pattern->ptr.str, c, pattern->len) == 0) {
|
|
|
|
|
ret = pattern;
|
|
|
|
|
goto leave;
|
|
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-04-29 10:24:50 -04:00
|
|
|
leave:
|
2019-10-23 00:59:31 -04:00
|
|
|
if (lru)
|
2020-11-02 09:26:51 -05:00
|
|
|
lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
|
2015-04-29 10:24:50 -04:00
|
|
|
|
|
|
|
|
return ret;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This one is used by other real functions. It checks that the pattern is
|
|
|
|
|
* included inside the tested string, but enclosed between the specified
|
|
|
|
|
* delimiters or at the beginning or end of the string. The delimiters are
|
|
|
|
|
* provided as an unsigned int made by make_4delim() and match up to 4 different
|
|
|
|
|
* delimiters. Delimiters are stripped at the beginning and end of the pattern.
|
|
|
|
|
*/
|
2014-04-28 05:18:57 -04:00
|
|
|
static int match_word(struct sample *smp, struct pattern *pattern, int mflags, unsigned int delimiters)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
|
|
|
|
int may_match, icase;
|
|
|
|
|
char *c, *end;
|
|
|
|
|
char *ps;
|
|
|
|
|
int pl;
|
|
|
|
|
|
|
|
|
|
pl = pattern->len;
|
|
|
|
|
ps = pattern->ptr.str;
|
|
|
|
|
|
|
|
|
|
while (pl > 0 && is_delimiter(*ps, delimiters)) {
|
|
|
|
|
pl--;
|
|
|
|
|
ps++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (pl > 0 && is_delimiter(ps[pl - 1], delimiters))
|
|
|
|
|
pl--;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if (pl > smp->data.u.str.data)
|
2014-01-21 05:25:41 -05:00
|
|
|
return PAT_NOMATCH;
|
|
|
|
|
|
|
|
|
|
may_match = 1;
|
2014-04-28 05:18:57 -04:00
|
|
|
icase = mflags & PAT_MF_IGNORE_CASE;
|
2018-07-13 04:54:26 -04:00
|
|
|
end = smp->data.u.str.area + smp->data.u.str.data - pl;
|
|
|
|
|
for (c = smp->data.u.str.area; c <= end; c++) {
|
2014-01-21 05:25:41 -05:00
|
|
|
if (is_delimiter(*c, delimiters)) {
|
|
|
|
|
may_match = 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!may_match)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (icase) {
|
2020-07-05 15:46:32 -04:00
|
|
|
if ((tolower((unsigned char)*c) == tolower((unsigned char)*ps)) &&
|
2014-01-21 05:25:41 -05:00
|
|
|
(strncasecmp(ps, c, pl) == 0) &&
|
|
|
|
|
(c == end || is_delimiter(c[pl], delimiters)))
|
|
|
|
|
return PAT_MATCH;
|
|
|
|
|
} else {
|
|
|
|
|
if ((*c == *ps) &&
|
|
|
|
|
(strncmp(ps, c, pl) == 0) &&
|
|
|
|
|
(c == end || is_delimiter(c[pl], delimiters)))
|
|
|
|
|
return PAT_MATCH;
|
|
|
|
|
}
|
|
|
|
|
may_match = 0;
|
|
|
|
|
}
|
|
|
|
|
return PAT_NOMATCH;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern is included inside the tested string, but enclosed
|
|
|
|
|
* between the delimiters '?' or '/' or at the beginning or end of the string.
|
|
|
|
|
* Delimiters at the beginning or end of the pattern are ignored.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_dir(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '?', '?')))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the pattern is included inside the tested string, but enclosed
|
|
|
|
|
* between the delmiters '/', '?', '.' or ":" or at the beginning or end of
|
|
|
|
|
* the string. Delimiters at the beginning or end of the pattern are ignored.
|
|
|
|
|
*/
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_dom(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2014-04-28 05:18:57 -04:00
|
|
|
if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '.', ':')))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the integer in <test> is included between min and max */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_int(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2015-08-19 03:07:19 -04:00
|
|
|
if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.sint) &&
|
|
|
|
|
(!pattern->val.range.max_set || smp->data.u.sint <= pattern->val.range.max))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Checks that the length of the pattern in <test> is included between min and max */
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern *pat_match_len(struct sample *smp, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
2020-10-29 04:41:34 -04:00
|
|
|
|
|
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.str.data) &&
|
|
|
|
|
(!pattern->val.range.max_set || smp->data.u.str.data <= pattern->val.range.max))
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
2023-09-06 05:32:54 -04:00
|
|
|
/* Performs ipv4 key lookup in <expr> ipv4 tree
|
|
|
|
|
* Returns NULL on failure
|
|
|
|
|
*/
|
|
|
|
|
static struct pattern *_pat_match_tree_ipv4(struct in_addr *key, struct pattern_expr *expr, int fill)
|
2014-01-21 05:25:41 -05:00
|
|
|
{
|
2013-12-16 08:22:13 -05:00
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
2023-09-06 05:32:54 -04:00
|
|
|
|
|
|
|
|
/* Lookup an IPv4 address in the expression's pattern tree using
|
|
|
|
|
* the longest match method.
|
|
|
|
|
*/
|
|
|
|
|
node = ebmb_lookup_longest(&expr->pattern_tree, key);
|
|
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
|
|
|
|
node = ebmb_lookup_shorter(node);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (fill) {
|
|
|
|
|
static_pattern.data = elt->data;
|
|
|
|
|
static_pattern.ref = elt->ref;
|
|
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
|
|
|
|
static_pattern.type = SMP_T_IPV4;
|
|
|
|
|
static_pattern.val.ipv4.addr.s_addr = read_u32(elt->node.key);
|
|
|
|
|
if (!cidr2dotted(elt->node.node.pfx, &static_pattern.val.ipv4.mask))
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Performs ipv6 key lookup in <expr> ipv6 tree
|
|
|
|
|
* Returns NULL on failure
|
|
|
|
|
*/
|
|
|
|
|
static struct pattern *_pat_match_tree_ipv6(struct in6_addr *key, struct pattern_expr *expr, int fill)
|
|
|
|
|
{
|
|
|
|
|
struct ebmb_node *node;
|
|
|
|
|
struct pattern_tree *elt;
|
|
|
|
|
|
|
|
|
|
/* Lookup an IPv6 address in the expression's pattern tree using
|
|
|
|
|
* the longest match method.
|
|
|
|
|
*/
|
|
|
|
|
node = ebmb_lookup_longest(&expr->pattern_tree_2, key);
|
|
|
|
|
while (node) {
|
|
|
|
|
elt = ebmb_entry(node, struct pattern_tree, node);
|
|
|
|
|
if (elt->ref->gen_id != expr->ref->curr_gen) {
|
|
|
|
|
node = ebmb_lookup_shorter(node);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (fill) {
|
|
|
|
|
static_pattern.data = elt->data;
|
|
|
|
|
static_pattern.ref = elt->ref;
|
|
|
|
|
static_pattern.sflags = PAT_SF_TREE;
|
|
|
|
|
static_pattern.type = SMP_T_IPV6;
|
|
|
|
|
memcpy(&static_pattern.val.ipv6.addr, elt->node.key, 16);
|
|
|
|
|
static_pattern.val.ipv6.mask = elt->node.node.pfx;
|
|
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int fill)
|
|
|
|
|
{
|
|
|
|
|
struct in_addr v4;
|
|
|
|
|
struct in6_addr v6;
|
2013-12-16 08:22:13 -05:00
|
|
|
struct pattern_list *lst;
|
|
|
|
|
struct pattern *pattern;
|
|
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
/* The input sample is IPv4. Try to match in the trees. */
|
2015-08-19 03:00:18 -04:00
|
|
|
if (smp->data.type == SMP_T_IPV4) {
|
2023-09-06 05:32:54 -04:00
|
|
|
pattern = _pat_match_tree_ipv4(&smp->data.u.ipv4, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
2021-01-07 23:35:52 -05:00
|
|
|
/* The IPv4 sample don't match the IPv4 tree. Convert the IPv4
|
2023-09-05 08:58:53 -04:00
|
|
|
* sample address to IPv6 and try to lookup in the IPv6 tree.
|
2013-12-19 17:54:54 -05:00
|
|
|
*/
|
2023-09-06 05:32:54 -04:00
|
|
|
v4tov6(&v6, &smp->data.u.ipv4);
|
|
|
|
|
pattern = _pat_match_tree_ipv6(&v6, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
|
|
|
|
/* eligible for list lookup using IPv4 address */
|
|
|
|
|
v4 = smp->data.u.ipv4;
|
|
|
|
|
goto list_lookup;
|
2013-12-19 17:54:54 -05:00
|
|
|
}
|
2013-12-16 08:22:13 -05:00
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
/* The input sample is IPv6. Try to match in the trees. */
|
2015-08-19 03:00:18 -04:00
|
|
|
if (smp->data.type == SMP_T_IPV6) {
|
2023-09-06 05:32:54 -04:00
|
|
|
pattern = _pat_match_tree_ipv6(&smp->data.u.ipv6, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
|
|
|
|
/* No match in the IPv6 tree. Try to convert 6 to 4 to lookup in
|
|
|
|
|
* the IPv4 tree
|
2013-12-19 17:54:54 -05:00
|
|
|
*/
|
2023-09-05 08:58:53 -04:00
|
|
|
if (v6tov4(&v4, &smp->data.u.ipv6)) {
|
2023-09-06 05:32:54 -04:00
|
|
|
pattern = _pat_match_tree_ipv4(&v4, expr, fill);
|
|
|
|
|
if (pattern)
|
|
|
|
|
return pattern;
|
|
|
|
|
/* eligible for list lookup using IPv4 address */
|
|
|
|
|
goto list_lookup;
|
2013-12-19 17:54:54 -05:00
|
|
|
}
|
|
|
|
|
}
|
2014-01-21 05:25:41 -05:00
|
|
|
|
2023-09-06 05:32:54 -04:00
|
|
|
not_found:
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
list_lookup:
|
|
|
|
|
/* No match in the trees, but we still have a valid IPv4 address: lookup
|
|
|
|
|
* in the IPv4 list (non-contiguous masks list). This is our last resort
|
|
|
|
|
*/
|
2013-12-19 17:54:54 -05:00
|
|
|
list_for_each_entry(lst, &expr->patterns, list) {
|
|
|
|
|
pattern = &lst->pat;
|
|
|
|
|
|
2020-10-29 04:41:34 -04:00
|
|
|
if (pattern->ref->gen_id != expr->ref->curr_gen)
|
|
|
|
|
continue;
|
|
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
/* Check if the input sample match the current pattern. */
|
2023-09-05 08:58:53 -04:00
|
|
|
if (((v4.s_addr ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0)
|
2013-12-16 08:22:13 -05:00
|
|
|
return pattern;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
2023-09-06 05:32:54 -04:00
|
|
|
goto not_found;
|
2014-01-21 05:25:41 -05:00
|
|
|
}
|
|
|
|
|
|
2020-11-03 05:22:04 -05:00
|
|
|
/* finds the pattern holding <list> from list head <head> and deletes it.
|
|
|
|
|
* This is made for use for pattern removal within an expression.
|
|
|
|
|
*/
|
2020-11-03 08:50:29 -05:00
|
|
|
static void pat_unlink_from_head(void **head, void **list)
|
2020-11-03 05:22:04 -05:00
|
|
|
{
|
2020-11-03 08:50:29 -05:00
|
|
|
while (*head) {
|
|
|
|
|
if (*head == list) {
|
|
|
|
|
*head = *list;
|
2020-11-03 05:22:04 -05:00
|
|
|
return;
|
|
|
|
|
}
|
2020-11-03 08:50:29 -05:00
|
|
|
head = *head;
|
2020-11-03 05:22:04 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
void free_pattern_tree(struct eb_root *root)
|
|
|
|
|
{
|
|
|
|
|
struct eb_node *node, *next;
|
2013-12-13 10:09:50 -05:00
|
|
|
struct pattern_tree *elt;
|
2013-12-09 05:29:46 -05:00
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
node = eb_first(root);
|
|
|
|
|
while (node) {
|
|
|
|
|
next = eb_next(node);
|
|
|
|
|
eb_delete(node);
|
2013-12-13 10:09:50 -05:00
|
|
|
elt = container_of(node, struct pattern_tree, node);
|
2020-11-03 05:22:04 -05:00
|
|
|
pat_unlink_from_head(&elt->ref->tree_head, &elt->from_ref);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(elt->data);
|
2013-12-09 05:29:46 -05:00
|
|
|
free(elt);
|
2013-11-28 05:05:19 -05:00
|
|
|
node = next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 13:26:02 -05:00
|
|
|
void pat_prune_gen(struct pattern_expr *expr)
|
2013-11-28 05:41:23 -05:00
|
|
|
{
|
2014-01-14 10:24:51 -05:00
|
|
|
struct pattern_list *pat, *tmp;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pat, tmp, &expr->patterns, list) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&pat->list);
|
2020-11-03 05:22:04 -05:00
|
|
|
pat_unlink_from_head(&pat->pat.ref->list_head, &pat->from_ref);
|
2020-11-02 13:26:02 -05:00
|
|
|
if (pat->pat.sflags & PAT_SF_REGFREE)
|
|
|
|
|
regex_free(pat->pat.ptr.ptr);
|
|
|
|
|
else
|
|
|
|
|
free(pat->pat.ptr.ptr);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(pat->pat.data);
|
2014-01-14 10:24:51 -05:00
|
|
|
free(pat);
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-28 05:41:23 -05:00
|
|
|
free_pattern_tree(&expr->pattern_tree);
|
2013-12-19 17:54:54 -05:00
|
|
|
free_pattern_tree(&expr->pattern_tree_2);
|
2013-11-28 05:41:23 -05:00
|
|
|
LIST_INIT(&expr->patterns);
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt = 0;
|
2013-11-28 05:41:23 -05:00
|
|
|
}
|
|
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* The following functions are used for the pattern indexation
|
|
|
|
|
*
|
2013-11-28 05:05:19 -05:00
|
|
|
*/
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2013-12-13 09:12:32 -05:00
|
|
|
struct pattern_list *patl;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
|
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
2014-01-23 11:53:31 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_list *patl;
|
|
|
|
|
|
|
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
2015-02-06 11:50:55 -05:00
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
2014-01-23 11:53:31 -05:00
|
|
|
return 0;
|
2015-02-06 11:50:55 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
|
|
|
|
patl->pat.ptr.ptr = malloc(patl->pat.len);
|
|
|
|
|
if (!patl->pat.ptr.ptr) {
|
|
|
|
|
free(patl);
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
struct pattern_list *patl;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
|
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
|
|
|
|
patl->pat.ptr.str = malloc(patl->pat.len + 1);
|
|
|
|
|
if (!patl->pat.ptr.str) {
|
|
|
|
|
free(patl);
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
|
|
|
|
|
patl->pat.ptr.str[patl->pat.len] = '\0';
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-10 16:55:20 -05:00
|
|
|
int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err)
|
2013-12-13 09:12:32 -05:00
|
|
|
{
|
|
|
|
|
struct pattern_list *patl;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* allocate pattern */
|
|
|
|
|
patl = calloc(1, sizeof(*patl));
|
|
|
|
|
if (!patl) {
|
|
|
|
|
memprintf(err, "out of memory while indexing pattern");
|
|
|
|
|
return 0;
|
2014-01-23 11:53:31 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* duplicate pattern */
|
|
|
|
|
memcpy(&patl->pat, pat, sizeof(*pat));
|
|
|
|
|
|
|
|
|
|
/* compile regex */
|
2020-11-02 13:16:23 -05:00
|
|
|
patl->pat.sflags |= PAT_SF_REGFREE;
|
2019-04-30 09:54:36 -04:00
|
|
|
if (!(patl->pat.ptr.reg = regex_comp(pat->ptr.str, !(expr->mflags & PAT_MF_IGNORE_CASE),
|
|
|
|
|
cap, err))) {
|
2014-04-28 18:57:16 -04:00
|
|
|
free(patl);
|
2013-12-13 09:12:32 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* chain pattern in the expression */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&expr->patterns, &patl->list);
|
2023-08-23 09:58:26 -04:00
|
|
|
patl->expr = expr;
|
2020-11-02 06:10:48 -05:00
|
|
|
/* and from the reference */
|
2020-11-03 08:50:29 -05:00
|
|
|
patl->from_ref = pat->ref->list_head;
|
|
|
|
|
pat->ref->list_head = &patl->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-10 16:55:20 -05:00
|
|
|
int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
return pat_idx_list_reg_cap(expr, pat, 0, err);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
return pat_idx_list_reg_cap(expr, pat, 1, err);
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
unsigned int mask;
|
2013-12-13 10:09:50 -05:00
|
|
|
struct pattern_tree *node;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* Only IPv4 can be indexed */
|
|
|
|
|
if (pat->type == SMP_T_IPV4) {
|
2014-01-23 11:53:31 -05:00
|
|
|
/* in IPv4 case, check if the mask is contiguous so that we can
|
|
|
|
|
* insert the network into the tree. A continuous mask has only
|
|
|
|
|
* ones on the left. This means that this mask + its lower bit
|
|
|
|
|
* added once again is null.
|
|
|
|
|
*/
|
2013-12-13 09:12:32 -05:00
|
|
|
mask = ntohl(pat->val.ipv4.mask.s_addr);
|
|
|
|
|
if (mask + (mask & -mask) == 0) {
|
|
|
|
|
mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
|
|
|
|
|
|
|
|
|
|
/* node memory allocation */
|
|
|
|
|
node = calloc(1, sizeof(*node) + 4);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
node->ref = pat->ref;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* FIXME: insert <addr>/<mask> into the tree here */
|
|
|
|
|
memcpy(node->node.key, &pat->val.ipv4.addr, 4); /* network byte order */
|
|
|
|
|
node->node.node.pfx = mask;
|
2014-01-29 18:27:15 -05:00
|
|
|
|
|
|
|
|
/* Insert the entry. */
|
|
|
|
|
ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2013-12-19 17:54:54 -05:00
|
|
|
else {
|
|
|
|
|
/* If the mask is not contiguous, just add the pattern to the list */
|
|
|
|
|
return pat_idx_list_val(expr, pat, err);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (pat->type == SMP_T_IPV6) {
|
|
|
|
|
/* IPv6 also can be indexed */
|
|
|
|
|
node = calloc(1, sizeof(*node) + 16);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
node->ref = pat->ref;
|
2013-12-19 17:54:54 -05:00
|
|
|
|
|
|
|
|
/* FIXME: insert <addr>/<mask> into the tree here */
|
|
|
|
|
memcpy(node->node.key, &pat->val.ipv6.addr, 16); /* network byte order */
|
|
|
|
|
node->node.node.pfx = pat->val.ipv6.mask;
|
2014-01-29 18:27:15 -05:00
|
|
|
|
|
|
|
|
/* Insert the entry. */
|
|
|
|
|
ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-19 17:54:54 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
2013-12-13 09:12:32 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-19 17:54:54 -05:00
|
|
|
return 0;
|
2013-12-13 09:12:32 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
int len;
|
2013-12-13 10:09:50 -05:00
|
|
|
struct pattern_tree *node;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* Only string can be indexed */
|
2013-12-16 18:20:33 -05:00
|
|
|
if (pat->type != SMP_T_STR) {
|
2013-12-13 09:12:32 -05:00
|
|
|
memprintf(err, "internal error: string expected, but the type is '%s'",
|
|
|
|
|
smp_to_type[pat->type]);
|
|
|
|
|
return 0;
|
2014-01-23 11:53:31 -05:00
|
|
|
}
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
|
2014-04-28 05:18:57 -04:00
|
|
|
if (expr->mflags & PAT_MF_IGNORE_CASE)
|
2013-12-13 09:12:32 -05:00
|
|
|
return pat_idx_list_str(expr, pat, err);
|
|
|
|
|
|
|
|
|
|
/* Process the key len */
|
|
|
|
|
len = strlen(pat->ptr.str) + 1;
|
2013-12-06 13:06:43 -05:00
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* node memory allocation */
|
|
|
|
|
node = calloc(1, sizeof(*node) + len);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2013-12-13 09:12:32 -05:00
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-01-28 09:54:36 -05:00
|
|
|
node->ref = pat->ref;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* copy the string */
|
|
|
|
|
memcpy(node->node.key, pat->ptr.str, len);
|
|
|
|
|
|
|
|
|
|
/* index the new node */
|
2014-01-29 18:27:15 -05:00
|
|
|
ebst_insert(&expr->pattern_tree, &node->node);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2013-12-13 09:12:32 -05:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-10 02:53:48 -04:00
|
|
|
int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err)
|
|
|
|
|
{
|
|
|
|
|
int len;
|
|
|
|
|
struct pattern_tree *node;
|
|
|
|
|
|
|
|
|
|
/* Only string can be indexed */
|
|
|
|
|
if (pat->type != SMP_T_STR) {
|
|
|
|
|
memprintf(err, "internal error: string expected, but the type is '%s'",
|
|
|
|
|
smp_to_type[pat->type]);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
|
|
|
|
|
if (expr->mflags & PAT_MF_IGNORE_CASE)
|
|
|
|
|
return pat_idx_list_str(expr, pat, err);
|
|
|
|
|
|
|
|
|
|
/* Process the key len */
|
|
|
|
|
len = strlen(pat->ptr.str);
|
|
|
|
|
|
|
|
|
|
/* node memory allocation */
|
|
|
|
|
node = calloc(1, sizeof(*node) + len + 1);
|
|
|
|
|
if (!node) {
|
|
|
|
|
memprintf(err, "out of memory while loading pattern");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* copy the pointer to sample associated to this node */
|
2015-08-19 02:35:43 -04:00
|
|
|
node->data = pat->data;
|
2014-05-10 02:53:48 -04:00
|
|
|
node->ref = pat->ref;
|
|
|
|
|
|
|
|
|
|
/* copy the string and the trailing zero */
|
|
|
|
|
memcpy(node->node.key, pat->ptr.str, len + 1);
|
|
|
|
|
node->node.node.pfx = len * 8;
|
|
|
|
|
|
|
|
|
|
/* index the new node */
|
|
|
|
|
ebmb_insert_prefix(&expr->pattern_tree, &node->node, len);
|
2023-08-23 09:58:26 -04:00
|
|
|
|
|
|
|
|
node->expr = expr;
|
2020-11-03 08:50:29 -05:00
|
|
|
node->from_ref = pat->ref->tree_head;
|
|
|
|
|
pat->ref->tree_head = &node->from_ref;
|
2020-11-02 09:26:51 -05:00
|
|
|
expr->ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
expr->ref->entry_cnt++;
|
2014-05-10 02:53:48 -04:00
|
|
|
|
|
|
|
|
/* that's ok */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 13:53:16 -05:00
|
|
|
/* Deletes all patterns from reference <elt>. Note that all of their
|
2020-11-02 07:55:22 -05:00
|
|
|
* expressions must be locked, and the pattern lock must be held as well.
|
|
|
|
|
*/
|
2020-11-02 13:53:16 -05:00
|
|
|
void pat_delete_gen(struct pat_ref *ref, struct pat_ref_elt *elt)
|
2014-01-15 05:38:49 -05:00
|
|
|
{
|
2020-11-03 08:50:29 -05:00
|
|
|
struct pattern_tree *tree;
|
|
|
|
|
struct pattern_list *pat;
|
|
|
|
|
void **node;
|
2020-11-02 13:53:16 -05:00
|
|
|
|
|
|
|
|
/* delete all known tree nodes. They are all allocated inline */
|
2020-11-03 08:50:29 -05:00
|
|
|
for (node = elt->tree_head; node;) {
|
|
|
|
|
tree = container_of(node, struct pattern_tree, from_ref);
|
|
|
|
|
node = *node;
|
2020-11-02 13:53:16 -05:00
|
|
|
BUG_ON(tree->ref != elt);
|
|
|
|
|
|
|
|
|
|
ebmb_delete(&tree->node);
|
|
|
|
|
free(tree->data);
|
|
|
|
|
free(tree);
|
|
|
|
|
}
|
2014-01-15 05:38:49 -05:00
|
|
|
|
2020-11-02 13:53:16 -05:00
|
|
|
/* delete all list nodes and free their pattern entries (str/reg) */
|
2020-11-03 08:50:29 -05:00
|
|
|
for (node = elt->list_head; node;) {
|
|
|
|
|
pat = container_of(node, struct pattern_list, from_ref);
|
|
|
|
|
node = *node;
|
2020-11-02 07:55:22 -05:00
|
|
|
BUG_ON(pat->pat.ref != elt);
|
2014-01-15 05:38:49 -05:00
|
|
|
|
|
|
|
|
/* Delete and free entry. */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&pat->list);
|
2020-11-02 13:26:02 -05:00
|
|
|
if (pat->pat.sflags & PAT_SF_REGFREE)
|
|
|
|
|
regex_free(pat->pat.ptr.reg);
|
|
|
|
|
else
|
|
|
|
|
free(pat->pat.ptr.ptr);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(pat->pat.data);
|
2014-01-15 05:38:49 -05:00
|
|
|
free(pat);
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 13:53:16 -05:00
|
|
|
/* update revision number to refresh the cache */
|
|
|
|
|
ref->revision = rdtsc();
|
2021-05-21 10:59:15 -04:00
|
|
|
ref->entry_cnt--;
|
2020-11-03 08:50:29 -05:00
|
|
|
elt->tree_head = NULL;
|
|
|
|
|
elt->list_head = NULL;
|
2014-01-15 05:38:49 -05:00
|
|
|
}
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
void pattern_init_expr(struct pattern_expr *expr)
|
|
|
|
|
{
|
|
|
|
|
LIST_INIT(&expr->patterns);
|
2014-01-29 18:27:15 -05:00
|
|
|
expr->pattern_tree = EB_ROOT;
|
|
|
|
|
expr->pattern_tree_2 = EB_ROOT;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void pattern_init_head(struct pattern_head *head)
|
|
|
|
|
{
|
|
|
|
|
LIST_INIT(&head->head);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The following functions are relative to the management of the reference
|
|
|
|
|
* lists. These lists are used to store the original pattern and associated
|
|
|
|
|
* value as string form.
|
|
|
|
|
*
|
|
|
|
|
* This is used with modifiable ACL and MAPS
|
2014-03-11 09:29:22 -04:00
|
|
|
*
|
|
|
|
|
* The pattern reference are stored with two identifiers: the unique_id and
|
|
|
|
|
* the reference.
|
|
|
|
|
*
|
|
|
|
|
* The reference identify a file. Each file with the same name point to the
|
|
|
|
|
* same reference. We can register many times one file. If the file is modified,
|
|
|
|
|
* all his dependencies are also modified. The reference can be used with map or
|
|
|
|
|
* acl.
|
|
|
|
|
*
|
|
|
|
|
* The unique_id identify inline acl. The unique id is unique for each acl.
|
|
|
|
|
* You cannot force the same id in the configuration file, because this repoort
|
|
|
|
|
* an error.
|
|
|
|
|
*
|
|
|
|
|
* A particular case appears if the filename is a number. In this case, the
|
|
|
|
|
* unique_id is set with the number represented by the filename and the
|
|
|
|
|
* reference is also set. This method prevent double unique_id.
|
|
|
|
|
*
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function looks up a reference by name. If the reference is found, a
|
|
|
|
|
* pointer to the struct pat_ref is returned, otherwise NULL is returned.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
struct pat_ref *pat_ref_lookup(const char *reference)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
2025-09-25 09:21:04 -04:00
|
|
|
/* Skip file@ or opt@ prefix, it is the default case. Can be mixed with ref omitting the prefix */
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0)
|
|
|
|
|
reference += 5;
|
2025-09-25 09:21:04 -04:00
|
|
|
else if (strlen(reference) > 4 && strncmp(reference, "opt@", 4) == 0)
|
|
|
|
|
reference += 4;
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
list_for_each_entry(ref, &pattern_reference, list)
|
2014-03-11 09:29:22 -04:00
|
|
|
if (ref->reference && strcmp(reference, ref->reference) == 0)
|
|
|
|
|
return ref;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function looks up a reference's unique id. If the reference is found, a
|
|
|
|
|
* pointer to the struct pat_ref is returned, otherwise NULL is returned.
|
2014-03-11 09:29:22 -04:00
|
|
|
*/
|
|
|
|
|
struct pat_ref *pat_ref_lookupid(int unique_id)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(ref, &pattern_reference, list)
|
|
|
|
|
if (ref->unique_id == unique_id)
|
2014-02-11 05:31:40 -05:00
|
|
|
return ref;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-02 11:30:17 -05:00
|
|
|
/* This function removes from the pattern reference <ref> all the patterns
|
|
|
|
|
* attached to the reference element <elt>, and the element itself. The
|
|
|
|
|
* reference must be locked.
|
|
|
|
|
*/
|
|
|
|
|
void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt)
|
|
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
2020-11-02 11:30:17 -05:00
|
|
|
struct pattern_expr *expr;
|
|
|
|
|
struct bref *bref, *back;
|
|
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
gen = pat_ref_gen_get(ref, elt->gen_id);
|
|
|
|
|
BUG_ON(!gen);
|
|
|
|
|
|
2020-11-02 11:30:17 -05:00
|
|
|
/*
|
|
|
|
|
* we have to unlink all watchers from this reference pattern. We must
|
|
|
|
|
* not relink them if this elt was the last one in the list.
|
|
|
|
|
*/
|
|
|
|
|
list_for_each_entry_safe(bref, back, &elt->back_refs, users) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&bref->users);
|
2020-11-02 11:30:17 -05:00
|
|
|
LIST_INIT(&bref->users);
|
2025-12-17 23:37:44 -05:00
|
|
|
if (elt->list.n != &gen->head)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
|
2020-11-02 11:30:17 -05:00
|
|
|
bref->ref = elt->list.n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* delete all entries from all expressions for this pattern */
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
|
|
|
|
pat_delete_gen(ref, elt);
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&elt->list);
|
2025-12-17 23:37:44 -05:00
|
|
|
cebs_item_delete(&gen->elt_root, node, pattern, elt);
|
2020-11-02 11:30:17 -05:00
|
|
|
free(elt->sample);
|
|
|
|
|
free(elt);
|
2025-07-04 18:07:25 -04:00
|
|
|
HA_ATOMIC_INC(&patterns_freed);
|
2020-11-02 11:30:17 -05:00
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
/* This function removes the pattern matching the pointer <refelt> from
|
2020-10-30 11:03:50 -04:00
|
|
|
* the reference and from each expr member of this reference. This function
|
|
|
|
|
* returns 1 if the entry was found and deleted, otherwise zero.
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
*
|
|
|
|
|
* <refelt> is user input: it is provided as an ID and should never be
|
|
|
|
|
* dereferenced without making sure that it is valid.
|
2014-01-28 10:43:36 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt)
|
|
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
struct pat_ref_elt *elt, *safe;
|
2023-08-22 12:32:13 -04:00
|
|
|
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
/* delete pattern from reference */
|
2025-12-17 23:37:44 -05:00
|
|
|
pat_ref_gen_foreach(gen, ref) {
|
|
|
|
|
list_for_each_entry_safe(elt, safe, &gen->head, list) {
|
|
|
|
|
if (elt == refelt) {
|
|
|
|
|
event_hdl_publish(&ref->e_subs, EVENT_HDL_SUB_PAT_REF_DEL, NULL);
|
|
|
|
|
pat_ref_delete_by_ptr(ref, elt);
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
2014-01-28 10:43:36 -05:00
|
|
|
}
|
|
|
|
|
|
2025-12-17 23:31:29 -05:00
|
|
|
/* Create a new generation object.
|
|
|
|
|
*
|
|
|
|
|
* Returns NULL in case of memory allocation failure.
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_gen *pat_ref_gen_new(struct pat_ref *ref, unsigned int gen_id)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref_gen *gen, *old;
|
|
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
gen = malloc(sizeof(struct pat_ref_gen));
|
2025-12-17 23:31:29 -05:00
|
|
|
if (!gen)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
LIST_INIT(&gen->head);
|
|
|
|
|
ceb_init_root(&gen->elt_root);
|
|
|
|
|
gen->gen_id = gen_id;
|
|
|
|
|
|
|
|
|
|
old = cebu32_item_insert(&ref->gen_root, gen_node, gen_id, gen);
|
|
|
|
|
BUG_ON(old != gen, "Generation ID already exists");
|
|
|
|
|
|
|
|
|
|
return gen;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Find the generation <gen_id> in the pattern reference <ref>.
|
|
|
|
|
*
|
|
|
|
|
* Returns NULL if the generation cannot be found.
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_gen *pat_ref_gen_get(struct pat_ref *ref, unsigned int gen_id)
|
|
|
|
|
{
|
OPTIM: patterns: cache the current generation
This makes a significant difference when loading large files and during
commit and clear operations, thanks to improved cache locality. In the
measurements below, master refers to the code before any of the changes
to the patterns code, not the code before this one commit.
Timing the replacement of 10M entries from the CLI with this command
which also reports timestamps at start, end of upload and end of clear:
$ (echo "prompt i"; echo "show activity"; echo "prepare acl #0";
awk '{print "add acl @1 #0",$0}' < bad-ip.map; echo "show activity";
echo "commit acl @1 #0"; echo "clear acl @0 #0";echo "show activity") |
socat -t 10 - /tmp/sock1 | grep ^uptim
master, on a 3.7 GHz EPYC, 3 samples:
uptime_now: 6.087030
uptime_now: 25.981777 => 21.9 sec insertion time
uptime_now: 29.286368 => 3.3 sec commit+clear
uptime_now: 5.748087
uptime_now: 25.740675 => 20.0s insertion time
uptime_now: 29.039023 => 3.3 s commit+clear
uptime_now: 7.065362
uptime_now: 26.769596 => 19.7s insertion time
uptime_now: 30.065044 => 3.3s commit+clear
And after this commit:
uptime_now: 6.119215
uptime_now: 25.023019 => 18.9 sec insertion time
uptime_now: 27.155503 => 2.1 sec commit+clear
uptime_now: 5.675931
uptime_now: 24.551035 => 18.9s insertion
uptime_now: 26.652352 => 2.1s commit+clear
uptime_now: 6.722256
uptime_now: 25.593952 => 18.9s insertion
uptime_now: 27.724153 => 2.1s commit+clear
Now timing the startup time with a 10M entries file (on another machine)
on master, 20 samples:
Standard Deviation, s: 0.061652677408033
Mean: 4.217
And after this commit:
Standard Deviation, s: 0.081821371548669
Mean: 3.78
2025-12-22 09:12:40 -05:00
|
|
|
struct pat_ref_gen *gen;
|
|
|
|
|
|
|
|
|
|
/* We optimistically try to use the cached generation if it's the current one. */
|
|
|
|
|
if (likely(gen_id == ref->curr_gen && gen_id == ref->cached_gen.id && ref->cached_gen.data))
|
|
|
|
|
return ref->cached_gen.data;
|
|
|
|
|
|
|
|
|
|
gen = cebu32_item_lookup(&ref->gen_root, gen_node, gen_id, gen_id, struct pat_ref_gen);
|
|
|
|
|
if (unlikely(!gen))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if (gen_id == ref->curr_gen) {
|
|
|
|
|
ref->cached_gen.id = gen_id;
|
|
|
|
|
ref->cached_gen.data = gen;
|
|
|
|
|
}
|
|
|
|
|
return gen;
|
2025-12-17 23:31:29 -05:00
|
|
|
}
|
|
|
|
|
|
2024-11-20 12:18:54 -05:00
|
|
|
/* This function removes all elements belonging to <gen_id> and matching <key>
|
|
|
|
|
* from the reference <ref>.
|
|
|
|
|
* This function returns 1 if the deletion is done and returns 0 if
|
|
|
|
|
* the entry is not found.
|
|
|
|
|
*/
|
|
|
|
|
int pat_ref_gen_delete(struct pat_ref *ref, unsigned int gen_id, const char *key)
|
|
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
|
|
|
|
struct pat_ref_elt *elt;
|
2024-11-20 12:18:54 -05:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
gen = pat_ref_gen_get(ref, gen_id);
|
|
|
|
|
if (!gen)
|
|
|
|
|
return 0;
|
2024-11-20 12:18:54 -05:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
/* delete pattern from reference */
|
|
|
|
|
elt = cebs_item_lookup(&gen->elt_root, node, pattern, key, struct pat_ref_elt);
|
|
|
|
|
if (!elt)
|
|
|
|
|
return 0;
|
2024-10-18 12:40:41 -04:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
pat_ref_delete_by_ptr(ref, elt);
|
|
|
|
|
event_hdl_publish(&ref->e_subs, EVENT_HDL_SUB_PAT_REF_DEL, NULL);
|
|
|
|
|
return 1;
|
2024-11-20 12:18:54 -05:00
|
|
|
}
|
|
|
|
|
|
2020-11-02 11:30:17 -05:00
|
|
|
/* This function removes all patterns matching <key> from the reference
|
2018-11-15 13:22:31 -05:00
|
|
|
* and from each expr member of the reference. This function returns 1
|
2020-11-02 11:30:17 -05:00
|
|
|
* if the deletion is done and returns 0 is the entry is not found.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_delete(struct pat_ref *ref, const char *key)
|
|
|
|
|
{
|
MEDIUM: pattern: always consider gen_id for pat_ref lookup operations
Historically, pat_ref lookup operations were performed on the whole
pat_ref elements list. As such, set, find and delete operations on a given
key would cause any matching element in pat_ref to be considered.
When prepare/commit operations were added, gen_id was impelemnted in
order to be able to work on a subset from pat_ref without impacting
the current (live) version from pat_ref, until a new subset is committed
to replace the current one.
While the logic was good, there remained a design flaw from the historical
implementation: indeed, legacy functions such as pat_ref_set(),
pat_ref_delete() and pat_ref_find_elt() kept performing the lookups on the
whole set of elements instead of considering only elements from the current
subset. Because of this, mixing new prepare/commit operations with legacy
operations could yield unexpected results.
For instance, before this commit:
echo "add map #0 key oldvalue" | socat /tmp/ha.sock -
echo "prepare map #0" | socat /tmp/ha.sock -
New version created: 1
echo "add map @1 #0 key newvalue" | socat /tmp/ha.sock -
echo "del map #0 key" | socat /tmp/ha.sock -
echo "commit map @1 #0" | socat /tmp/ha.sock -
-> the result would be that "key" entry doesn't exist anymore after the
commit, while we would expect the new value to be there instead.
Thanks to the previous commits, we may finally fix this issue: for set,
find_elt and delete operations, the current generation id is considered.
With the above example, it means that the "del map #0 key" would only
target elements from the current subset, thus elements in "version 1" of
the map would be immune to the delete (as we would expect it to work).
2024-11-20 13:03:00 -05:00
|
|
|
return pat_ref_gen_delete(ref, ref->curr_gen, key);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2024-11-20 12:07:52 -05:00
|
|
|
/*
|
|
|
|
|
* find and return an element <elt> belonging to <gen_id> and matching <key> in a
|
|
|
|
|
* reference <ref> return NULL if not found
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_elt *pat_ref_gen_find_elt(struct pat_ref *ref, unsigned int gen_id, const char *key)
|
|
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
2024-11-20 12:07:52 -05:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
gen = pat_ref_gen_get(ref, gen_id);
|
|
|
|
|
if (!gen)
|
|
|
|
|
return NULL;
|
|
|
|
|
return cebs_item_lookup(&gen->elt_root, node, pattern, key, struct pat_ref_elt);
|
2024-11-20 12:07:52 -05:00
|
|
|
}
|
|
|
|
|
|
2014-04-25 10:57:03 -04:00
|
|
|
/*
|
|
|
|
|
* find and return an element <elt> matching <key> in a reference <ref>
|
|
|
|
|
* return NULL if not found
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key)
|
|
|
|
|
{
|
MEDIUM: pattern: always consider gen_id for pat_ref lookup operations
Historically, pat_ref lookup operations were performed on the whole
pat_ref elements list. As such, set, find and delete operations on a given
key would cause any matching element in pat_ref to be considered.
When prepare/commit operations were added, gen_id was impelemnted in
order to be able to work on a subset from pat_ref without impacting
the current (live) version from pat_ref, until a new subset is committed
to replace the current one.
While the logic was good, there remained a design flaw from the historical
implementation: indeed, legacy functions such as pat_ref_set(),
pat_ref_delete() and pat_ref_find_elt() kept performing the lookups on the
whole set of elements instead of considering only elements from the current
subset. Because of this, mixing new prepare/commit operations with legacy
operations could yield unexpected results.
For instance, before this commit:
echo "add map #0 key oldvalue" | socat /tmp/ha.sock -
echo "prepare map #0" | socat /tmp/ha.sock -
New version created: 1
echo "add map @1 #0 key newvalue" | socat /tmp/ha.sock -
echo "del map #0 key" | socat /tmp/ha.sock -
echo "commit map @1 #0" | socat /tmp/ha.sock -
-> the result would be that "key" entry doesn't exist anymore after the
commit, while we would expect the new value to be there instead.
Thanks to the previous commits, we may finally fix this issue: for set,
find_elt and delete operations, the current generation id is considered.
With the above example, it means that the "del map #0 key" would only
target elements from the current subset, thus elements in "version 1" of
the map would be immune to the delete (as we would expect it to work).
2024-11-20 13:03:00 -05:00
|
|
|
return pat_ref_gen_find_elt(ref, ref->curr_gen, key);
|
2014-04-25 10:57:03 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function modifies the sample of pat_ref_elt <elt> in all expressions
|
|
|
|
|
* found under <ref> to become <value>. It is assumed that the caller has
|
|
|
|
|
* already verified that <elt> belongs to <ref>.
|
|
|
|
|
*/
|
2014-01-29 10:24:55 -05:00
|
|
|
static inline int pat_ref_set_elt(struct pat_ref *ref, struct pat_ref_elt *elt,
|
2014-01-29 13:08:49 -05:00
|
|
|
const char *value, char **err)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
2015-08-19 02:35:43 -04:00
|
|
|
struct sample_data **data;
|
2014-02-11 05:31:40 -05:00
|
|
|
char *sample;
|
2015-08-13 18:02:11 -04:00
|
|
|
struct sample_data test;
|
2023-08-23 09:58:26 -04:00
|
|
|
struct pattern_tree *tree;
|
|
|
|
|
struct pattern_list *pat;
|
|
|
|
|
void **node;
|
|
|
|
|
|
2014-01-29 13:35:06 -05:00
|
|
|
|
|
|
|
|
/* Try all needed converters. */
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list) {
|
|
|
|
|
if (!expr->pat_head->parse_smp)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!expr->pat_head->parse_smp(value, &test)) {
|
|
|
|
|
memprintf(err, "unable to parse '%s'", value);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-29 10:24:55 -05:00
|
|
|
/* Modify pattern from reference. */
|
|
|
|
|
sample = strdup(value);
|
2014-01-29 13:08:49 -05:00
|
|
|
if (!sample) {
|
|
|
|
|
memprintf(err, "out of memory error");
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
2014-01-29 13:08:49 -05:00
|
|
|
}
|
2020-10-30 11:03:50 -04:00
|
|
|
/* Load sample in each reference. All the conversions are tested
|
|
|
|
|
* below, normally these calls don't fail.
|
2014-01-29 13:35:06 -05:00
|
|
|
*/
|
2023-08-23 09:58:26 -04:00
|
|
|
for (node = elt->tree_head; node;) {
|
|
|
|
|
tree = container_of(node, struct pattern_tree, from_ref);
|
|
|
|
|
node = *node;
|
|
|
|
|
BUG_ON(tree->ref != elt);
|
|
|
|
|
expr = tree->expr;
|
2014-01-29 10:24:55 -05:00
|
|
|
if (!expr->pat_head->parse_smp)
|
|
|
|
|
continue;
|
|
|
|
|
|
2023-08-23 09:58:26 -04:00
|
|
|
data = &tree->data;
|
|
|
|
|
if (data && *data) {
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
if (!expr->pat_head->parse_smp(sample, *data))
|
|
|
|
|
*data = NULL;
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (node = elt->list_head; node;) {
|
|
|
|
|
pat = container_of(node, struct pattern_list, from_ref);
|
|
|
|
|
node = *node;
|
|
|
|
|
BUG_ON(pat->pat.ref != elt);
|
|
|
|
|
expr = pat->expr;
|
|
|
|
|
if (!expr->pat_head->parse_smp)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
data = &pat->pat.data;
|
|
|
|
|
if (data && *data) {
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
if (!expr->pat_head->parse_smp(sample, *data))
|
|
|
|
|
*data = NULL;
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2017-07-03 05:34:05 -04:00
|
|
|
/* free old sample only when all exprs are updated */
|
|
|
|
|
free(elt->sample);
|
|
|
|
|
elt->sample = sample;
|
|
|
|
|
|
|
|
|
|
|
2014-01-29 13:35:06 -05:00
|
|
|
return 1;
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function modifies the sample of pat_ref_elt <refelt> in all expressions
|
|
|
|
|
* found under <ref> to become <value>, after checking that <refelt> really
|
|
|
|
|
* belongs to <ref>.
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
*
|
|
|
|
|
* <refelt> is user input: it is provided as an ID and should never be
|
|
|
|
|
* dereferenced without making sure that it is valid.
|
2020-10-30 11:03:50 -04:00
|
|
|
*/
|
2014-01-29 13:08:49 -05:00
|
|
|
int pat_ref_set_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt, const char *value, char **err)
|
2014-01-29 10:24:55 -05:00
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
struct pat_ref_elt *elt;
|
|
|
|
|
|
|
|
|
|
/* Look for pattern in the reference. */
|
2025-12-17 23:37:44 -05:00
|
|
|
pat_ref_gen_foreach(gen, ref) {
|
|
|
|
|
list_for_each_entry(elt, &gen->head, list) {
|
|
|
|
|
if (elt == refelt) {
|
|
|
|
|
if (!pat_ref_set_elt(ref, elt, value, err))
|
|
|
|
|
return 0;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions
Some regressions were introduced by 5fea59754b ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
pat_ref_delete_by_id() fails to properly unlink and free the removed
reference because it bypasses the pat_ref_delete_by_ptr() made for
that purpose. This function is normally used everywhere the target
reference is set for removal, such as the pat_ref_delete() function
that matches pattern against a string. The call was probably skipped
by accident during the rewrite of the function.
With the above commit also comes another undesirable change:
both pat_ref_delete_by_id() and pat_ref_set_by_id() directly use the
<refelt> argument as a valid pointer (they do dereference it).
This is wrong, because <refelt> is unsafe and should be handled as an
ID, not a pointer (hence the function name). Indeed, the calling function
may directly pass user input from the CLI as <refelt> argument, so we must
first ensure that it points to a valid element before using it, else it is
probably invalid and we shouldn't touch it.
What this patch essentially does, is that it reverts pat_ref_set_by_id()
and pat_ref_delete_by_id() to pre 5fea59754b behavior. This seems like
it was the only optimization from the patch that doesn't apply.
Hopefully, after reviewing the changes with Fred, it seems that the 2
functions are only being involved in commands for manipulating maps or
acls on the cli, so the "missed" opportunity to improve their performance
shouldn't matter much. Nonetheless, if we wanted to speed up the reference
lookup by ID, we could consider adding an eb64 tree for that specific
purpose that contains all pattern references IDs (ie: pointers) so that
eb lookup functions may be used instead of linear list search.
The issue was raised by Marko Juraga as he failed to perform an an acl
removal by reference on the CLI on 2.9 which was known to work properly
on other versions.
It should be backported on 2.9.
Co-Authored-by: Frédéric Lécaille <flecaille@haproxy.com>
2023-12-08 05:46:15 -05:00
|
|
|
}
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
2014-01-29 13:08:49 -05:00
|
|
|
|
|
|
|
|
memprintf(err, "key or pattern not found");
|
2014-01-29 10:24:55 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: migrate the patterns reference to cebs_tree
cebs_tree are 24 bytes smaller than ebst_tree (16B vs 40B), and pattern
references are only used during map/acl updates, so their storage is
pure loss between updates (which most of the time never happen). By
switching their indexing to compact trees, we can save 16 to 24 bytes
per entry depending on alightment (here it's 24 per struct but 16
practical as malloc's alignment keeps 8 unused).
Tested on core i7-8650U running at 3.0 GHz, with a file containing
17.7M IP addresses (16.7M different):
$ time ./haproxy -c -f acl-ip.cfg
Save 280 MB RAM for 17.7M IP addresses, and slightly speeds up the
startup (5.8%, from 19.2s to 18.2s), a part of which possible being
attributed to having to write less memory. Note that this is on small
strings. On larger ones such as user-agents, ebtree doesn't reread
the whole key and might be more efficient.
Before:
RAM (VSZ/RSS): 4443912 3912444
real 0m19.211s
user 0m18.138s
sys 0m1.068s
Overhead Command Shared Object Symbol
44.79% haproxy haproxy [.] ebst_insert
25.07% haproxy haproxy [.] ebmb_insert_prefix
3.44% haproxy libc-2.33.so [.] __libc_calloc
2.71% haproxy libc-2.33.so [.] _int_malloc
2.33% haproxy haproxy [.] free_pattern_tree
1.78% haproxy libc-2.33.so [.] inet_pton4
1.62% haproxy libc-2.33.so [.] _IO_fgets
1.58% haproxy libc-2.33.so [.] _int_free
1.56% haproxy haproxy [.] pat_ref_push
1.35% haproxy libc-2.33.so [.] malloc_consolidate
1.16% haproxy libc-2.33.so [.] __strlen_avx2
0.79% haproxy haproxy [.] pat_idx_tree_ip
0.76% haproxy haproxy [.] pat_ref_read_from_file
0.60% haproxy libc-2.33.so [.] __strrchr_avx2
0.55% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.54% haproxy libc-2.33.so [.] __memchr_avx2
0.46% haproxy haproxy [.] pat_ref_append
After:
RAM (VSZ/RSS): 4166108 3634768
real 0m18.114s
user 0m17.113s
sys 0m0.996s
Overhead Command Shared Object Symbol
38.99% haproxy haproxy [.] cebs_insert
27.09% haproxy haproxy [.] ebmb_insert_prefix
3.63% haproxy libc-2.33.so [.] __libc_calloc
3.18% haproxy libc-2.33.so [.] _int_malloc
2.69% haproxy haproxy [.] free_pattern_tree
1.99% haproxy libc-2.33.so [.] inet_pton4
1.74% haproxy libc-2.33.so [.] _IO_fgets
1.73% haproxy libc-2.33.so [.] _int_free
1.57% haproxy haproxy [.] pat_ref_push
1.48% haproxy libc-2.33.so [.] malloc_consolidate
1.22% haproxy libc-2.33.so [.] __strlen_avx2
1.05% haproxy libc-2.33.so [.] __strcmp_avx2
0.80% haproxy haproxy [.] pat_idx_tree_ip
0.74% haproxy libc-2.33.so [.] __memchr_avx2
0.69% haproxy libc-2.33.so [.] __strrchr_avx2
0.69% haproxy libc-2.33.so [.] _IO_getline_info
0.62% haproxy haproxy [.] pat_ref_read_from_file
0.56% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.56% haproxy libc-2.33.so [.] cfree@GLIBC_2.2.5
0.46% haproxy haproxy [.] pat_ref_append
If the addresses are totally disordered (via "shuf" on the input file),
we see both implementations reach exactly 68.0s (slower due to much
higher cache miss ratio).
On large strings such as user agents (1 million here), it's now slightly
slower (+9%):
Before:
real 0m2.475s
user 0m2.316s
sys 0m0.155s
After:
real 0m2.696s
user 0m2.544s
sys 0m0.147s
But such patterns are much less common than short ones, and the memory
savings do still count.
Note that while it could be tempting to get rid of the list that chains
all these pat_ref_elt together and only enumerate them by walking along
the tree to save 16 extra bytes per entry, that's not possible due to
the problem that insertion ordering is critical (think overlapping regex
such as /index.* and /index.html). Currently it's not possible to proceed
differently because patterns are first pre-loaded into the pat_ref via
pat_ref_read_from_file_smp() and later indexed by pattern_read_from_file(),
which has to only redo the second part anyway for maps/acls declared
multiple times.
2025-01-12 13:38:28 -05:00
|
|
|
static int pat_ref_set_from_elt(struct pat_ref *ref, struct pat_ref_elt *elt, const char *value, char **err)
|
2014-01-29 10:24:55 -05:00
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
2025-09-16 05:49:01 -04:00
|
|
|
struct pat_ref_elt *elt2;
|
2025-12-17 23:37:44 -05:00
|
|
|
int found = 0, publish = 0;
|
2023-08-22 10:52:47 -04:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
if (elt) {
|
|
|
|
|
if (elt->gen_id == ref->curr_gen)
|
|
|
|
|
publish = 1;
|
|
|
|
|
gen = pat_ref_gen_get(ref, elt->gen_id);
|
|
|
|
|
BUG_ON(!gen);
|
2024-08-12 09:32:00 -04:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
for (; elt; elt = elt2) {
|
|
|
|
|
char *tmp_err = NULL;
|
MEDIUM: migrate the patterns reference to cebs_tree
cebs_tree are 24 bytes smaller than ebst_tree (16B vs 40B), and pattern
references are only used during map/acl updates, so their storage is
pure loss between updates (which most of the time never happen). By
switching their indexing to compact trees, we can save 16 to 24 bytes
per entry depending on alightment (here it's 24 per struct but 16
practical as malloc's alignment keeps 8 unused).
Tested on core i7-8650U running at 3.0 GHz, with a file containing
17.7M IP addresses (16.7M different):
$ time ./haproxy -c -f acl-ip.cfg
Save 280 MB RAM for 17.7M IP addresses, and slightly speeds up the
startup (5.8%, from 19.2s to 18.2s), a part of which possible being
attributed to having to write less memory. Note that this is on small
strings. On larger ones such as user-agents, ebtree doesn't reread
the whole key and might be more efficient.
Before:
RAM (VSZ/RSS): 4443912 3912444
real 0m19.211s
user 0m18.138s
sys 0m1.068s
Overhead Command Shared Object Symbol
44.79% haproxy haproxy [.] ebst_insert
25.07% haproxy haproxy [.] ebmb_insert_prefix
3.44% haproxy libc-2.33.so [.] __libc_calloc
2.71% haproxy libc-2.33.so [.] _int_malloc
2.33% haproxy haproxy [.] free_pattern_tree
1.78% haproxy libc-2.33.so [.] inet_pton4
1.62% haproxy libc-2.33.so [.] _IO_fgets
1.58% haproxy libc-2.33.so [.] _int_free
1.56% haproxy haproxy [.] pat_ref_push
1.35% haproxy libc-2.33.so [.] malloc_consolidate
1.16% haproxy libc-2.33.so [.] __strlen_avx2
0.79% haproxy haproxy [.] pat_idx_tree_ip
0.76% haproxy haproxy [.] pat_ref_read_from_file
0.60% haproxy libc-2.33.so [.] __strrchr_avx2
0.55% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.54% haproxy libc-2.33.so [.] __memchr_avx2
0.46% haproxy haproxy [.] pat_ref_append
After:
RAM (VSZ/RSS): 4166108 3634768
real 0m18.114s
user 0m17.113s
sys 0m0.996s
Overhead Command Shared Object Symbol
38.99% haproxy haproxy [.] cebs_insert
27.09% haproxy haproxy [.] ebmb_insert_prefix
3.63% haproxy libc-2.33.so [.] __libc_calloc
3.18% haproxy libc-2.33.so [.] _int_malloc
2.69% haproxy haproxy [.] free_pattern_tree
1.99% haproxy libc-2.33.so [.] inet_pton4
1.74% haproxy libc-2.33.so [.] _IO_fgets
1.73% haproxy libc-2.33.so [.] _int_free
1.57% haproxy haproxy [.] pat_ref_push
1.48% haproxy libc-2.33.so [.] malloc_consolidate
1.22% haproxy libc-2.33.so [.] __strlen_avx2
1.05% haproxy libc-2.33.so [.] __strcmp_avx2
0.80% haproxy haproxy [.] pat_idx_tree_ip
0.74% haproxy libc-2.33.so [.] __memchr_avx2
0.69% haproxy libc-2.33.so [.] __strrchr_avx2
0.69% haproxy libc-2.33.so [.] _IO_getline_info
0.62% haproxy haproxy [.] pat_ref_read_from_file
0.56% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.56% haproxy libc-2.33.so [.] cfree@GLIBC_2.2.5
0.46% haproxy haproxy [.] pat_ref_append
If the addresses are totally disordered (via "shuf" on the input file),
we see both implementations reach exactly 68.0s (slower due to much
higher cache miss ratio).
On large strings such as user agents (1 million here), it's now slightly
slower (+9%):
Before:
real 0m2.475s
user 0m2.316s
sys 0m0.155s
After:
real 0m2.696s
user 0m2.544s
sys 0m0.147s
But such patterns are much less common than short ones, and the memory
savings do still count.
Note that while it could be tempting to get rid of the list that chains
all these pat_ref_elt together and only enumerate them by walking along
the tree to save 16 extra bytes per entry, that's not possible due to
the problem that insertion ordering is critical (think overlapping regex
such as /index.* and /index.html). Currently it's not possible to proceed
differently because patterns are first pre-loaded into the pat_ref via
pat_ref_read_from_file_smp() and later indexed by pattern_read_from_file(),
which has to only redo the second part anyway for maps/acls declared
multiple times.
2025-01-12 13:38:28 -05:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
elt2 = cebs_item_next_dup(&gen->elt_root, node, pattern, elt);
|
|
|
|
|
|
|
|
|
|
if (!pat_ref_set_elt(ref, elt, value, &tmp_err)) {
|
|
|
|
|
if (err)
|
|
|
|
|
*err = tmp_err;
|
|
|
|
|
else
|
|
|
|
|
ha_free(&tmp_err);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
found = 1;
|
2014-01-29 10:24:55 -05:00
|
|
|
}
|
|
|
|
|
}
|
2014-01-29 13:08:49 -05:00
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
|
memprintf(err, "entry not found");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2024-10-18 12:40:41 -04:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
if (publish)
|
2024-10-18 12:40:41 -04:00
|
|
|
event_hdl_publish(&ref->e_subs, EVENT_HDL_SUB_PAT_REF_SET, NULL);
|
|
|
|
|
|
2014-01-29 13:08:49 -05:00
|
|
|
return 1;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2024-11-20 10:22:22 -05:00
|
|
|
/* modifies to <value> the sample for <elt> and all its duplicates */
|
|
|
|
|
int pat_ref_set_elt_duplicate(struct pat_ref *ref, struct pat_ref_elt *elt, const char *value,
|
|
|
|
|
char **err)
|
|
|
|
|
{
|
MEDIUM: migrate the patterns reference to cebs_tree
cebs_tree are 24 bytes smaller than ebst_tree (16B vs 40B), and pattern
references are only used during map/acl updates, so their storage is
pure loss between updates (which most of the time never happen). By
switching their indexing to compact trees, we can save 16 to 24 bytes
per entry depending on alightment (here it's 24 per struct but 16
practical as malloc's alignment keeps 8 unused).
Tested on core i7-8650U running at 3.0 GHz, with a file containing
17.7M IP addresses (16.7M different):
$ time ./haproxy -c -f acl-ip.cfg
Save 280 MB RAM for 17.7M IP addresses, and slightly speeds up the
startup (5.8%, from 19.2s to 18.2s), a part of which possible being
attributed to having to write less memory. Note that this is on small
strings. On larger ones such as user-agents, ebtree doesn't reread
the whole key and might be more efficient.
Before:
RAM (VSZ/RSS): 4443912 3912444
real 0m19.211s
user 0m18.138s
sys 0m1.068s
Overhead Command Shared Object Symbol
44.79% haproxy haproxy [.] ebst_insert
25.07% haproxy haproxy [.] ebmb_insert_prefix
3.44% haproxy libc-2.33.so [.] __libc_calloc
2.71% haproxy libc-2.33.so [.] _int_malloc
2.33% haproxy haproxy [.] free_pattern_tree
1.78% haproxy libc-2.33.so [.] inet_pton4
1.62% haproxy libc-2.33.so [.] _IO_fgets
1.58% haproxy libc-2.33.so [.] _int_free
1.56% haproxy haproxy [.] pat_ref_push
1.35% haproxy libc-2.33.so [.] malloc_consolidate
1.16% haproxy libc-2.33.so [.] __strlen_avx2
0.79% haproxy haproxy [.] pat_idx_tree_ip
0.76% haproxy haproxy [.] pat_ref_read_from_file
0.60% haproxy libc-2.33.so [.] __strrchr_avx2
0.55% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.54% haproxy libc-2.33.so [.] __memchr_avx2
0.46% haproxy haproxy [.] pat_ref_append
After:
RAM (VSZ/RSS): 4166108 3634768
real 0m18.114s
user 0m17.113s
sys 0m0.996s
Overhead Command Shared Object Symbol
38.99% haproxy haproxy [.] cebs_insert
27.09% haproxy haproxy [.] ebmb_insert_prefix
3.63% haproxy libc-2.33.so [.] __libc_calloc
3.18% haproxy libc-2.33.so [.] _int_malloc
2.69% haproxy haproxy [.] free_pattern_tree
1.99% haproxy libc-2.33.so [.] inet_pton4
1.74% haproxy libc-2.33.so [.] _IO_fgets
1.73% haproxy libc-2.33.so [.] _int_free
1.57% haproxy haproxy [.] pat_ref_push
1.48% haproxy libc-2.33.so [.] malloc_consolidate
1.22% haproxy libc-2.33.so [.] __strlen_avx2
1.05% haproxy libc-2.33.so [.] __strcmp_avx2
0.80% haproxy haproxy [.] pat_idx_tree_ip
0.74% haproxy libc-2.33.so [.] __memchr_avx2
0.69% haproxy libc-2.33.so [.] __strrchr_avx2
0.69% haproxy libc-2.33.so [.] _IO_getline_info
0.62% haproxy haproxy [.] pat_ref_read_from_file
0.56% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.56% haproxy libc-2.33.so [.] cfree@GLIBC_2.2.5
0.46% haproxy haproxy [.] pat_ref_append
If the addresses are totally disordered (via "shuf" on the input file),
we see both implementations reach exactly 68.0s (slower due to much
higher cache miss ratio).
On large strings such as user agents (1 million here), it's now slightly
slower (+9%):
Before:
real 0m2.475s
user 0m2.316s
sys 0m0.155s
After:
real 0m2.696s
user 0m2.544s
sys 0m0.147s
But such patterns are much less common than short ones, and the memory
savings do still count.
Note that while it could be tempting to get rid of the list that chains
all these pat_ref_elt together and only enumerate them by walking along
the tree to save 16 extra bytes per entry, that's not possible due to
the problem that insertion ordering is critical (think overlapping regex
such as /index.* and /index.html). Currently it's not possible to proceed
differently because patterns are first pre-loaded into the pat_ref via
pat_ref_read_from_file_smp() and later indexed by pattern_read_from_file(),
which has to only redo the second part anyway for maps/acls declared
multiple times.
2025-01-12 13:38:28 -05:00
|
|
|
return pat_ref_set_from_elt(ref, elt, value, err);
|
2024-11-20 10:22:22 -05:00
|
|
|
}
|
|
|
|
|
|
2024-11-20 11:30:39 -05:00
|
|
|
/* This function modifies to <value> the sample of all patterns matching <key>
|
|
|
|
|
* and belonging to <gen_id> under <ref>.
|
|
|
|
|
*/
|
|
|
|
|
int pat_ref_gen_set(struct pat_ref *ref, unsigned int gen_id,
|
|
|
|
|
const char *key, const char *value, char **err)
|
|
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
2024-11-20 11:30:39 -05:00
|
|
|
struct pat_ref_elt *elt;
|
|
|
|
|
|
|
|
|
|
/* Look for pattern in the reference. */
|
2025-12-17 23:37:44 -05:00
|
|
|
gen = pat_ref_gen_get(ref, gen_id);
|
|
|
|
|
if (gen)
|
|
|
|
|
elt = cebs_item_lookup(&gen->elt_root, node, pattern, key, struct pat_ref_elt);
|
|
|
|
|
else
|
|
|
|
|
elt = NULL;
|
MEDIUM: migrate the patterns reference to cebs_tree
cebs_tree are 24 bytes smaller than ebst_tree (16B vs 40B), and pattern
references are only used during map/acl updates, so their storage is
pure loss between updates (which most of the time never happen). By
switching their indexing to compact trees, we can save 16 to 24 bytes
per entry depending on alightment (here it's 24 per struct but 16
practical as malloc's alignment keeps 8 unused).
Tested on core i7-8650U running at 3.0 GHz, with a file containing
17.7M IP addresses (16.7M different):
$ time ./haproxy -c -f acl-ip.cfg
Save 280 MB RAM for 17.7M IP addresses, and slightly speeds up the
startup (5.8%, from 19.2s to 18.2s), a part of which possible being
attributed to having to write less memory. Note that this is on small
strings. On larger ones such as user-agents, ebtree doesn't reread
the whole key and might be more efficient.
Before:
RAM (VSZ/RSS): 4443912 3912444
real 0m19.211s
user 0m18.138s
sys 0m1.068s
Overhead Command Shared Object Symbol
44.79% haproxy haproxy [.] ebst_insert
25.07% haproxy haproxy [.] ebmb_insert_prefix
3.44% haproxy libc-2.33.so [.] __libc_calloc
2.71% haproxy libc-2.33.so [.] _int_malloc
2.33% haproxy haproxy [.] free_pattern_tree
1.78% haproxy libc-2.33.so [.] inet_pton4
1.62% haproxy libc-2.33.so [.] _IO_fgets
1.58% haproxy libc-2.33.so [.] _int_free
1.56% haproxy haproxy [.] pat_ref_push
1.35% haproxy libc-2.33.so [.] malloc_consolidate
1.16% haproxy libc-2.33.so [.] __strlen_avx2
0.79% haproxy haproxy [.] pat_idx_tree_ip
0.76% haproxy haproxy [.] pat_ref_read_from_file
0.60% haproxy libc-2.33.so [.] __strrchr_avx2
0.55% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.54% haproxy libc-2.33.so [.] __memchr_avx2
0.46% haproxy haproxy [.] pat_ref_append
After:
RAM (VSZ/RSS): 4166108 3634768
real 0m18.114s
user 0m17.113s
sys 0m0.996s
Overhead Command Shared Object Symbol
38.99% haproxy haproxy [.] cebs_insert
27.09% haproxy haproxy [.] ebmb_insert_prefix
3.63% haproxy libc-2.33.so [.] __libc_calloc
3.18% haproxy libc-2.33.so [.] _int_malloc
2.69% haproxy haproxy [.] free_pattern_tree
1.99% haproxy libc-2.33.so [.] inet_pton4
1.74% haproxy libc-2.33.so [.] _IO_fgets
1.73% haproxy libc-2.33.so [.] _int_free
1.57% haproxy haproxy [.] pat_ref_push
1.48% haproxy libc-2.33.so [.] malloc_consolidate
1.22% haproxy libc-2.33.so [.] __strlen_avx2
1.05% haproxy libc-2.33.so [.] __strcmp_avx2
0.80% haproxy haproxy [.] pat_idx_tree_ip
0.74% haproxy libc-2.33.so [.] __memchr_avx2
0.69% haproxy libc-2.33.so [.] __strrchr_avx2
0.69% haproxy libc-2.33.so [.] _IO_getline_info
0.62% haproxy haproxy [.] pat_ref_read_from_file
0.56% haproxy libc-2.33.so [.] unlink_chunk.constprop.0
0.56% haproxy libc-2.33.so [.] cfree@GLIBC_2.2.5
0.46% haproxy haproxy [.] pat_ref_append
If the addresses are totally disordered (via "shuf" on the input file),
we see both implementations reach exactly 68.0s (slower due to much
higher cache miss ratio).
On large strings such as user agents (1 million here), it's now slightly
slower (+9%):
Before:
real 0m2.475s
user 0m2.316s
sys 0m0.155s
After:
real 0m2.696s
user 0m2.544s
sys 0m0.147s
But such patterns are much less common than short ones, and the memory
savings do still count.
Note that while it could be tempting to get rid of the list that chains
all these pat_ref_elt together and only enumerate them by walking along
the tree to save 16 extra bytes per entry, that's not possible due to
the problem that insertion ordering is critical (think overlapping regex
such as /index.* and /index.html). Currently it's not possible to proceed
differently because patterns are first pre-loaded into the pat_ref via
pat_ref_read_from_file_smp() and later indexed by pattern_read_from_file(),
which has to only redo the second part anyway for maps/acls declared
multiple times.
2025-01-12 13:38:28 -05:00
|
|
|
return pat_ref_set_from_elt(ref, elt, value, err);
|
2024-11-20 11:30:39 -05:00
|
|
|
}
|
|
|
|
|
|
2024-11-20 10:22:22 -05:00
|
|
|
/* This function modifies to <value> the sample of all patterns matching <key>
|
|
|
|
|
* under <ref>.
|
|
|
|
|
*/
|
|
|
|
|
int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char **err)
|
|
|
|
|
{
|
MEDIUM: pattern: always consider gen_id for pat_ref lookup operations
Historically, pat_ref lookup operations were performed on the whole
pat_ref elements list. As such, set, find and delete operations on a given
key would cause any matching element in pat_ref to be considered.
When prepare/commit operations were added, gen_id was impelemnted in
order to be able to work on a subset from pat_ref without impacting
the current (live) version from pat_ref, until a new subset is committed
to replace the current one.
While the logic was good, there remained a design flaw from the historical
implementation: indeed, legacy functions such as pat_ref_set(),
pat_ref_delete() and pat_ref_find_elt() kept performing the lookups on the
whole set of elements instead of considering only elements from the current
subset. Because of this, mixing new prepare/commit operations with legacy
operations could yield unexpected results.
For instance, before this commit:
echo "add map #0 key oldvalue" | socat /tmp/ha.sock -
echo "prepare map #0" | socat /tmp/ha.sock -
New version created: 1
echo "add map @1 #0 key newvalue" | socat /tmp/ha.sock -
echo "del map #0 key" | socat /tmp/ha.sock -
echo "commit map @1 #0" | socat /tmp/ha.sock -
-> the result would be that "key" entry doesn't exist anymore after the
commit, while we would expect the new value to be there instead.
Thanks to the previous commits, we may finally fix this issue: for set,
find_elt and delete operations, the current generation id is considered.
With the above example, it means that the "del map #0 key" would only
target elements from the current subset, thus elements in "version 1" of
the map would be immune to the delete (as we would expect it to work).
2024-11-20 13:03:00 -05:00
|
|
|
return pat_ref_gen_set(ref, ref->curr_gen, key, value, err);
|
2024-11-20 10:22:22 -05:00
|
|
|
}
|
|
|
|
|
|
2024-11-07 04:01:40 -05:00
|
|
|
/* helper function to create and initialize a generic pat_ref struct
|
|
|
|
|
*
|
|
|
|
|
* Returns the new struct on success and NULL on failure (memory allocation
|
|
|
|
|
* error)
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2024-11-07 04:01:40 -05:00
|
|
|
static struct pat_ref *_pat_ref_new(const char *display, unsigned int flags)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
2024-11-07 04:05:30 -05:00
|
|
|
ref = malloc(sizeof(*ref));
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!ref)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2024-11-07 04:05:30 -05:00
|
|
|
/* don't forget to explicitly initialize all pat_ref struct members */
|
2024-11-07 04:01:40 -05:00
|
|
|
|
2014-02-10 21:31:34 -05:00
|
|
|
if (display) {
|
|
|
|
|
ref->display = strdup(display);
|
|
|
|
|
if (!ref->display) {
|
|
|
|
|
free(ref);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 04:01:40 -05:00
|
|
|
ref->reference = NULL;
|
|
|
|
|
ref->flags = flags;
|
|
|
|
|
ref->curr_gen = 0;
|
|
|
|
|
ref->next_gen = 0;
|
|
|
|
|
ref->unique_id = -1;
|
|
|
|
|
ref->revision = 0;
|
|
|
|
|
ref->entry_cnt = 0;
|
2025-12-17 23:37:44 -05:00
|
|
|
ceb_init_root(&ref->gen_root);
|
OPTIM: patterns: cache the current generation
This makes a significant difference when loading large files and during
commit and clear operations, thanks to improved cache locality. In the
measurements below, master refers to the code before any of the changes
to the patterns code, not the code before this one commit.
Timing the replacement of 10M entries from the CLI with this command
which also reports timestamps at start, end of upload and end of clear:
$ (echo "prompt i"; echo "show activity"; echo "prepare acl #0";
awk '{print "add acl @1 #0",$0}' < bad-ip.map; echo "show activity";
echo "commit acl @1 #0"; echo "clear acl @0 #0";echo "show activity") |
socat -t 10 - /tmp/sock1 | grep ^uptim
master, on a 3.7 GHz EPYC, 3 samples:
uptime_now: 6.087030
uptime_now: 25.981777 => 21.9 sec insertion time
uptime_now: 29.286368 => 3.3 sec commit+clear
uptime_now: 5.748087
uptime_now: 25.740675 => 20.0s insertion time
uptime_now: 29.039023 => 3.3 s commit+clear
uptime_now: 7.065362
uptime_now: 26.769596 => 19.7s insertion time
uptime_now: 30.065044 => 3.3s commit+clear
And after this commit:
uptime_now: 6.119215
uptime_now: 25.023019 => 18.9 sec insertion time
uptime_now: 27.155503 => 2.1 sec commit+clear
uptime_now: 5.675931
uptime_now: 24.551035 => 18.9s insertion
uptime_now: 26.652352 => 2.1s commit+clear
uptime_now: 6.722256
uptime_now: 25.593952 => 18.9s insertion
uptime_now: 27.724153 => 2.1s commit+clear
Now timing the startup time with a 10M entries file (on another machine)
on master, 20 samples:
Standard Deviation, s: 0.061652677408033
Mean: 4.217
And after this commit:
Standard Deviation, s: 0.081821371548669
Mean: 3.78
2025-12-22 09:12:40 -05:00
|
|
|
ref->cached_gen.id = ref->curr_gen;
|
|
|
|
|
ref->cached_gen.data = NULL;
|
2024-11-07 04:01:40 -05:00
|
|
|
LIST_INIT(&ref->pat);
|
|
|
|
|
HA_RWLOCK_INIT(&ref->lock);
|
2024-10-18 12:40:41 -04:00
|
|
|
event_hdl_sub_list_init(&ref->e_subs);
|
2024-11-07 04:01:40 -05:00
|
|
|
|
|
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 04:11:17 -05:00
|
|
|
/* helper func to properly de-initialize and free pat_ref struct */
|
|
|
|
|
static void pat_ref_free(struct pat_ref *ref)
|
|
|
|
|
{
|
|
|
|
|
ha_free(&ref->reference);
|
|
|
|
|
ha_free(&ref->display);
|
2024-10-18 12:40:41 -04:00
|
|
|
event_hdl_sub_list_destroy(&ref->e_subs);
|
2024-11-07 04:11:17 -05:00
|
|
|
free(ref);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 04:01:40 -05:00
|
|
|
/* This function creates a new reference. <ref> is the reference name.
|
|
|
|
|
* <flags> are PAT_REF_*. /!\ The reference is not checked, and must
|
|
|
|
|
* be unique. The user must check the reference with "pat_ref_lookup()"
|
|
|
|
|
* before calling this function. If the function fails, it returns NULL,
|
|
|
|
|
* otherwise it returns the new struct pat_ref.
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned int flags)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
|
|
|
|
ref = _pat_ref_new(display, flags);
|
|
|
|
|
if (!ref)
|
|
|
|
|
return NULL;
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
|
|
|
|
|
if (strlen(reference) > 5 && strncmp(reference, "virt@", 5) == 0)
|
2024-11-07 04:01:40 -05:00
|
|
|
ref->flags |= PAT_REF_ID;
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
else if (strlen(reference) > 4 && strncmp(reference, "opt@", 4) == 0) {
|
2024-11-07 04:01:40 -05:00
|
|
|
ref->flags |= (PAT_REF_ID|PAT_REF_FILE); // Will be decided later
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
reference += 4;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* A file by default */
|
2024-11-07 04:01:40 -05:00
|
|
|
ref->flags |= PAT_REF_FILE;
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
/* Skip file@ prefix to be mixed with ref omitting the prefix */
|
|
|
|
|
if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0)
|
|
|
|
|
reference += 5;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
ref->reference = strdup(reference);
|
|
|
|
|
if (!ref->reference) {
|
2024-11-07 04:11:17 -05:00
|
|
|
pat_ref_free(ref);
|
2014-02-11 05:31:40 -05:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&pattern_reference, &ref->list);
|
2014-03-11 09:29:22 -04:00
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function creates a new reference. <unique_id> is the unique id. If
|
2014-03-11 09:29:22 -04:00
|
|
|
* the value of <unique_id> is -1, the unique id is calculated later.
|
|
|
|
|
* <flags> are PAT_REF_*. /!\ The reference is not checked, and must
|
|
|
|
|
* be unique. The user must check the reference with "pat_ref_lookup()"
|
|
|
|
|
* or pat_ref_lookupid before calling this function. If the function
|
2020-10-30 11:03:50 -04:00
|
|
|
* fails, it returns NULL, otherwise it returns the new struct pat_ref.
|
2014-03-11 09:29:22 -04:00
|
|
|
*/
|
2014-02-10 21:31:34 -05:00
|
|
|
struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int flags)
|
2014-03-11 09:29:22 -04:00
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
|
2024-11-07 04:01:40 -05:00
|
|
|
ref = _pat_ref_new(display, flags);
|
2014-03-11 09:29:22 -04:00
|
|
|
if (!ref)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
ref->unique_id = unique_id;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2024-11-07 04:01:40 -05:00
|
|
|
LIST_APPEND(&pattern_reference, &ref->list);
|
2014-02-11 05:31:40 -05:00
|
|
|
return ref;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 05:52:46 -04:00
|
|
|
/* This function adds entry to <ref>. It can fail on memory error. It returns
|
|
|
|
|
* the newly added element on success, or NULL on failure. The PATREF_LOCK on
|
2020-10-28 06:43:49 -04:00
|
|
|
* <ref> must be held. It sets the newly created pattern's generation number
|
|
|
|
|
* to the same value as the reference's.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, unsigned int gen_id,
|
|
|
|
|
const char *pattern, const char *sample, int line)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pat_ref_elt *elt;
|
2023-11-26 05:56:08 -05:00
|
|
|
int len = strlen(pattern);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
elt = calloc(1, sizeof(*elt) + len + 1);
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!elt)
|
2020-10-28 05:52:46 -04:00
|
|
|
goto fail;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
gen = pat_ref_gen_get(ref, gen_id);
|
|
|
|
|
if (!gen) {
|
|
|
|
|
gen = pat_ref_gen_new(ref, gen_id);
|
|
|
|
|
if (!gen)
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-30 13:58:56 -05:00
|
|
|
elt->gen_id = gen_id;
|
2014-02-11 05:31:40 -05:00
|
|
|
elt->line = line;
|
|
|
|
|
|
2023-11-26 05:56:08 -05:00
|
|
|
memcpy((char*)elt->pattern, pattern, len + 1);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
if (sample) {
|
|
|
|
|
elt->sample = strdup(sample);
|
2020-10-28 05:52:46 -04:00
|
|
|
if (!elt->sample)
|
|
|
|
|
goto fail;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2017-06-29 09:40:33 -04:00
|
|
|
LIST_INIT(&elt->back_refs);
|
2020-11-03 08:50:29 -05:00
|
|
|
elt->list_head = NULL;
|
|
|
|
|
elt->tree_head = NULL;
|
2025-12-17 23:37:44 -05:00
|
|
|
LIST_APPEND(&gen->head, &elt->list);
|
|
|
|
|
cebs_item_insert(&gen->elt_root, node, pattern, elt);
|
2025-07-04 18:07:25 -04:00
|
|
|
HA_ATOMIC_INC(&patterns_added);
|
2020-10-28 05:52:46 -04:00
|
|
|
return elt;
|
|
|
|
|
fail:
|
|
|
|
|
free(elt);
|
|
|
|
|
return NULL;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function creates sample found in <elt>, parses the pattern also
|
|
|
|
|
* found in <elt> and inserts it in <expr>. The function copies <patflags>
|
|
|
|
|
* into <expr>. If the function fails, it returns 0 and <err> is filled.
|
2020-06-21 12:42:57 -04:00
|
|
|
* In success case, the function returns 1.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_push(struct pat_ref_elt *elt, struct pattern_expr *expr,
|
|
|
|
|
int patflags, char **err)
|
|
|
|
|
{
|
2015-08-19 02:35:43 -04:00
|
|
|
struct sample_data *data;
|
2014-01-28 09:34:35 -05:00
|
|
|
struct pattern pattern;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
/* Create sample */
|
|
|
|
|
if (elt->sample && expr->pat_head->parse_smp) {
|
|
|
|
|
/* New sample. */
|
2015-08-19 02:35:43 -04:00
|
|
|
data = malloc(sizeof(*data));
|
|
|
|
|
if (!data)
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* Parse value. */
|
2015-08-19 02:35:43 -04:00
|
|
|
if (!expr->pat_head->parse_smp(elt->sample, data)) {
|
2014-02-11 05:31:40 -05:00
|
|
|
memprintf(err, "unable to parse '%s'", elt->sample);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(data);
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
else
|
2015-08-19 02:35:43 -04:00
|
|
|
data = NULL;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-28 09:34:35 -05:00
|
|
|
/* initialise pattern */
|
|
|
|
|
memset(&pattern, 0, sizeof(pattern));
|
2015-08-19 02:35:43 -04:00
|
|
|
pattern.data = data;
|
2014-01-28 09:54:36 -05:00
|
|
|
pattern.ref = elt;
|
2014-01-28 09:34:35 -05:00
|
|
|
|
|
|
|
|
/* parse pattern */
|
2014-04-28 05:18:57 -04:00
|
|
|
if (!expr->pat_head->parse(elt->pattern, &pattern, expr->mflags, err)) {
|
2015-08-19 02:35:43 -04:00
|
|
|
free(data);
|
2014-01-28 09:34:35 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
2014-01-28 09:34:35 -05:00
|
|
|
/* index pattern */
|
|
|
|
|
if (!expr->pat_head->index(expr, &pattern, err)) {
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
2015-08-19 02:35:43 -04:00
|
|
|
free(data);
|
2014-02-11 05:31:40 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 13:45:45 -04:00
|
|
|
/* This function tries to commit entry <elt> into <ref>. The new entry must
|
|
|
|
|
* have already been inserted using pat_ref_append(), and its generation number
|
|
|
|
|
* may have been adjusted as it will not be changed. <err> must point to a NULL
|
|
|
|
|
* pointer. The PATREF lock on <ref> must be held. All the pattern_expr for
|
|
|
|
|
* this reference will be updated (parsing, indexing). On success, non-zero is
|
|
|
|
|
* returned. On failure, all the operation is rolled back (the element is
|
|
|
|
|
* deleted from all expressions and is freed), zero is returned and the error
|
|
|
|
|
* pointer <err> may have been updated (and the caller must free it). Failure
|
|
|
|
|
* causes include memory allocation, parsing error or indexing error.
|
|
|
|
|
*/
|
2021-01-15 08:11:59 -05:00
|
|
|
int pat_ref_commit_elt(struct pat_ref *ref, struct pat_ref_elt *elt, char **err)
|
2020-10-28 13:45:45 -04:00
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list) {
|
|
|
|
|
if (!pat_ref_push(elt, expr, 0, err)) {
|
|
|
|
|
pat_ref_delete_by_ptr(ref, elt);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-29 04:21:43 -04:00
|
|
|
/* Loads <pattern>:<sample> into <ref> for generation <gen>. <sample> may be
|
|
|
|
|
* NULL if none exists (e.g. ACL). If not needed, the generation number should
|
|
|
|
|
* be set to ref->curr_gen. The error pointer must initially point to NULL. The
|
|
|
|
|
* new entry will be propagated to all use places, involving allocation, parsing
|
|
|
|
|
* and indexing. On error (parsing, allocation), the operation will be rolled
|
|
|
|
|
* back, an error may be reported, and NULL will be reported. On success, the
|
|
|
|
|
* freshly allocated element will be returned. The PATREF lock on <ref> must be
|
|
|
|
|
* held during the operation.
|
|
|
|
|
*/
|
|
|
|
|
struct pat_ref_elt *pat_ref_load(struct pat_ref *ref, unsigned int gen,
|
|
|
|
|
const char *pattern, const char *sample,
|
|
|
|
|
int line, char **err)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref_elt *elt;
|
|
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
elt = pat_ref_append(ref, gen, pattern, sample, line);
|
2020-10-29 04:21:43 -04:00
|
|
|
if (elt) {
|
2021-01-15 08:11:59 -05:00
|
|
|
if (!pat_ref_commit_elt(ref, elt, err))
|
2020-10-29 04:21:43 -04:00
|
|
|
elt = NULL;
|
|
|
|
|
} else
|
|
|
|
|
memprintf(err, "out of memory error");
|
|
|
|
|
|
2024-10-18 12:40:41 -04:00
|
|
|
/* ignore if update requires committing to be seen */
|
|
|
|
|
if (elt && gen == ref->curr_gen)
|
|
|
|
|
event_hdl_publish(&ref->e_subs, EVENT_HDL_SUB_PAT_REF_ADD, NULL);
|
|
|
|
|
|
2020-10-29 04:21:43 -04:00
|
|
|
return elt;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-28 05:58:05 -04:00
|
|
|
/* This function adds entry to <ref>. It can fail on memory error. The new
|
2014-01-29 18:27:15 -05:00
|
|
|
* entry is added at all the pattern_expr registered in this reference. The
|
2020-10-28 05:58:05 -04:00
|
|
|
* function stops on the first error encountered. It returns 0 and <err> is
|
2014-01-29 18:27:15 -05:00
|
|
|
* filled. If an error is encountered, the complete add operation is cancelled.
|
|
|
|
|
* If the insertion is a success the function returns 1.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
|
|
|
|
int pat_ref_add(struct pat_ref *ref,
|
|
|
|
|
const char *pattern, const char *sample,
|
2014-04-28 05:18:57 -04:00
|
|
|
char **err)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
2020-10-29 04:21:43 -04:00
|
|
|
return !!pat_ref_load(ref, ref->curr_gen, pattern, sample, -1, err);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2021-04-30 07:19:37 -04:00
|
|
|
/* This function purges all elements from <ref> whose generation is included in
|
|
|
|
|
* the range of <from> to <to> (inclusive), taking wrapping into consideration.
|
|
|
|
|
* It will not purge more than <budget> entries at once, in order to remain
|
|
|
|
|
* responsive. If budget is negative, no limit is applied.
|
2020-10-28 13:23:49 -04:00
|
|
|
* The caller must already hold the PATREF_LOCK on <ref>. The function will
|
|
|
|
|
* take the PATEXP_LOCK on all expressions of the pattern as needed. It returns
|
|
|
|
|
* non-zero on completion, or zero if it had to stop before the end after
|
|
|
|
|
* <budget> was depleted.
|
|
|
|
|
*/
|
2021-04-30 07:19:37 -04:00
|
|
|
int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget)
|
2020-10-28 13:23:49 -04:00
|
|
|
{
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen, *gen2;
|
2020-10-28 13:23:49 -04:00
|
|
|
struct pat_ref_elt *elt, *elt_bck;
|
|
|
|
|
struct bref *bref, *bref_bck;
|
|
|
|
|
struct pattern_expr *expr;
|
|
|
|
|
int done;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
|
|
|
|
/* all expr are locked, we can safely remove all pat_ref */
|
|
|
|
|
|
|
|
|
|
/* assume completion for e.g. empty lists */
|
|
|
|
|
done = 1;
|
2025-12-17 23:37:44 -05:00
|
|
|
pat_ref_gen_foreach_safe(gen, gen2, ref) {
|
|
|
|
|
if (gen->gen_id - from > to - from) {
|
|
|
|
|
if (from <= to) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-10-28 13:23:49 -04:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
list_for_each_entry_safe(elt, elt_bck, &gen->head, list) {
|
|
|
|
|
if (budget >= 0 && !budget--) {
|
|
|
|
|
done = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BUG_ON(elt->gen_id != gen->gen_id);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* we have to unlink all watchers from this reference pattern. We must
|
|
|
|
|
* not relink them if this elt was the last one in the list.
|
|
|
|
|
*/
|
|
|
|
|
list_for_each_entry_safe(bref, bref_bck, &elt->back_refs, users) {
|
|
|
|
|
LIST_DELETE(&bref->users);
|
|
|
|
|
LIST_INIT(&bref->users);
|
|
|
|
|
if (elt->list.n != &gen->head)
|
|
|
|
|
LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
|
|
|
|
|
bref->ref = elt->list.n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* delete the storage for all representations of this pattern. */
|
|
|
|
|
pat_delete_gen(ref, elt);
|
|
|
|
|
|
|
|
|
|
LIST_DELETE(&elt->list);
|
|
|
|
|
cebs_item_delete(&gen->elt_root, node, pattern, elt);
|
|
|
|
|
free(elt->sample);
|
|
|
|
|
free(elt);
|
|
|
|
|
HA_ATOMIC_INC(&patterns_freed);
|
2020-10-28 13:23:49 -04:00
|
|
|
}
|
|
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
if (!done)
|
|
|
|
|
break;
|
2020-10-28 13:23:49 -04:00
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
BUG_ON(!LIST_ISEMPTY(&gen->head));
|
|
|
|
|
BUG_ON(!ceb_isempty(&gen->elt_root));
|
|
|
|
|
cebu32_item_delete(&ref->gen_root, gen_node, gen_id, gen);
|
OPTIM: patterns: cache the current generation
This makes a significant difference when loading large files and during
commit and clear operations, thanks to improved cache locality. In the
measurements below, master refers to the code before any of the changes
to the patterns code, not the code before this one commit.
Timing the replacement of 10M entries from the CLI with this command
which also reports timestamps at start, end of upload and end of clear:
$ (echo "prompt i"; echo "show activity"; echo "prepare acl #0";
awk '{print "add acl @1 #0",$0}' < bad-ip.map; echo "show activity";
echo "commit acl @1 #0"; echo "clear acl @0 #0";echo "show activity") |
socat -t 10 - /tmp/sock1 | grep ^uptim
master, on a 3.7 GHz EPYC, 3 samples:
uptime_now: 6.087030
uptime_now: 25.981777 => 21.9 sec insertion time
uptime_now: 29.286368 => 3.3 sec commit+clear
uptime_now: 5.748087
uptime_now: 25.740675 => 20.0s insertion time
uptime_now: 29.039023 => 3.3 s commit+clear
uptime_now: 7.065362
uptime_now: 26.769596 => 19.7s insertion time
uptime_now: 30.065044 => 3.3s commit+clear
And after this commit:
uptime_now: 6.119215
uptime_now: 25.023019 => 18.9 sec insertion time
uptime_now: 27.155503 => 2.1 sec commit+clear
uptime_now: 5.675931
uptime_now: 24.551035 => 18.9s insertion
uptime_now: 26.652352 => 2.1s commit+clear
uptime_now: 6.722256
uptime_now: 25.593952 => 18.9s insertion
uptime_now: 27.724153 => 2.1s commit+clear
Now timing the startup time with a 10M entries file (on another machine)
on master, 20 samples:
Standard Deviation, s: 0.061652677408033
Mean: 4.217
And after this commit:
Standard Deviation, s: 0.081821371548669
Mean: 3.78
2025-12-22 09:12:40 -05:00
|
|
|
if (gen->gen_id == ref->cached_gen.id)
|
|
|
|
|
ref->cached_gen.data = NULL;
|
2025-12-17 23:37:44 -05:00
|
|
|
free(gen);
|
2020-10-28 13:23:49 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
|
|
|
|
|
|
2024-10-18 12:40:41 -04:00
|
|
|
/* only publish when we're done and if curr_gen was impacted by the
|
|
|
|
|
* purge
|
|
|
|
|
*/
|
|
|
|
|
if (done && ref->curr_gen - from <= to - from)
|
|
|
|
|
event_hdl_publish(&ref->e_subs, EVENT_HDL_SUB_PAT_REF_CLEAR, NULL);
|
|
|
|
|
|
2020-10-28 13:23:49 -04:00
|
|
|
return done;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-03 04:37:31 -05:00
|
|
|
/* This function prunes all entries of <ref> and all their associated
|
|
|
|
|
* pattern_expr. It may return before the end of the list is reached,
|
|
|
|
|
* returning 0, to yield, indicating to the caller that it must call it again.
|
|
|
|
|
* until it returns non-zero. All patterns are purged, both current ones and
|
|
|
|
|
* future or incomplete ones. This is used by "clear map" or "clear acl".
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2019-12-20 12:22:02 -05:00
|
|
|
int pat_ref_prune(struct pat_ref *ref)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
2021-04-30 07:19:37 -04:00
|
|
|
return pat_ref_purge_range(ref, 0, ~0, 100);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function looks up any existing reference <ref> in pattern_head <head>, and
|
|
|
|
|
* returns the associated pattern_expr pointer if found, otherwise NULL.
|
|
|
|
|
*/
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pattern_expr *pattern_lookup_expr(struct pattern_head *head, struct pat_ref *ref)
|
|
|
|
|
{
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *expr;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
list_for_each_entry(expr, &head->head, list)
|
|
|
|
|
if (expr->expr->ref == ref)
|
|
|
|
|
return expr->expr;
|
2014-02-11 05:31:40 -05:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-15 13:22:31 -05:00
|
|
|
/* This function creates new pattern_expr associated to the reference <ref>.
|
|
|
|
|
* <ref> can be NULL. If an error occurs, the function returns NULL and
|
2014-02-11 05:31:40 -05:00
|
|
|
* <err> is filled. Otherwise, the function returns new pattern_expr linked
|
|
|
|
|
* with <head> and <ref>.
|
2014-11-24 05:14:42 -05:00
|
|
|
*
|
2018-11-15 13:22:31 -05:00
|
|
|
* The returned value can be an already filled pattern list, in this case the
|
2014-11-24 05:14:42 -05:00
|
|
|
* flag <reuse> is set.
|
2014-02-11 05:31:40 -05:00
|
|
|
*/
|
2014-11-24 05:14:42 -05:00
|
|
|
struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref *ref,
|
2017-07-03 11:54:23 -04:00
|
|
|
int patflags, char **err, int *reuse)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pattern_expr *expr;
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *list;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-11-24 05:14:42 -05:00
|
|
|
if (reuse)
|
|
|
|
|
*reuse = 0;
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* Memory and initialization of the chain element. */
|
2020-10-30 10:35:11 -04:00
|
|
|
list = calloc(1, sizeof(*list));
|
2014-01-20 08:29:33 -05:00
|
|
|
if (!list) {
|
2014-02-11 05:31:40 -05:00
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* Look for existing similar expr. No that only the index, parse and
|
|
|
|
|
* parse_smp function must be identical for having similar pattern.
|
2018-11-15 13:22:31 -05:00
|
|
|
* The other function depends of these first.
|
2014-01-20 08:29:33 -05:00
|
|
|
*/
|
|
|
|
|
if (ref) {
|
|
|
|
|
list_for_each_entry(expr, &ref->pat, list)
|
|
|
|
|
if (expr->pat_head->index == head->index &&
|
|
|
|
|
expr->pat_head->parse == head->parse &&
|
2017-07-03 11:54:23 -04:00
|
|
|
expr->pat_head->parse_smp == head->parse_smp &&
|
|
|
|
|
expr->mflags == patflags)
|
2014-01-20 08:29:33 -05:00
|
|
|
break;
|
|
|
|
|
if (&expr->list == &ref->pat)
|
|
|
|
|
expr = NULL;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
expr = NULL;
|
|
|
|
|
|
|
|
|
|
/* If no similar expr was found, we create new expr. */
|
|
|
|
|
if (!expr) {
|
|
|
|
|
/* Get a lot of memory for the expr struct. */
|
2020-10-30 10:35:11 -04:00
|
|
|
expr = calloc(1, sizeof(*expr));
|
2014-01-20 08:29:33 -05:00
|
|
|
if (!expr) {
|
2016-03-03 14:20:23 -05:00
|
|
|
free(list);
|
2014-01-20 08:29:33 -05:00
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* Initialize this new expr. */
|
|
|
|
|
pattern_init_expr(expr);
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2017-07-03 11:54:23 -04:00
|
|
|
/* Copy the pattern matching and indexing flags. */
|
|
|
|
|
expr->mflags = patflags;
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* This new pattern expression reference one of his heads. */
|
|
|
|
|
expr->pat_head = head;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2021-04-21 01:32:39 -04:00
|
|
|
/* Link with ref, or to self to facilitate LIST_DELETE() */
|
2014-01-20 08:29:33 -05:00
|
|
|
if (ref)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&ref->pat, &expr->list);
|
2014-01-20 08:29:33 -05:00
|
|
|
else
|
|
|
|
|
LIST_INIT(&expr->list);
|
|
|
|
|
|
|
|
|
|
expr->ref = ref;
|
|
|
|
|
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_INIT(&expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2014-11-24 05:14:42 -05:00
|
|
|
if (reuse)
|
|
|
|
|
*reuse = 1;
|
2014-01-20 08:29:33 -05:00
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: pattern: prevent UAF on reused pattern expr
Since c5959fd ("MEDIUM: pattern: merge same pattern"), UAF (leading to
crash) can be experienced if the same pattern file (and match method) is
used in two default sections and the first one is not referenced later in
the config. In this case, the first default section will be cleaned up.
However, due to an unhandled case in the above optimization, the original
expr which the second default section relies on is mistakenly freed.
This issue was discovered while trying to reproduce GH #2708. The issue
was particularly tricky to reproduce given the config and sequence
required to make the UAF happen. Hopefully, Github user @asmnek not only
provided useful informations, but since he was able to consistently
trigger the crash in his environment he was able to nail down the crash to
the use of pattern file involved with 2 named default sections. Big thanks
to him.
To fix the issue, let's push the logic from c5959fd a bit further. Instead
of relying on "do_free" variable to know if the expression should be freed
or not (which proved to be insufficient in our case), let's switch to a
simple refcounting logic. This way, no matter who owns the expression, the
last one attempting to free it will be responsible for freeing it.
Refcount is implemented using a 32bit value which fills a previous 4 bytes
structure gap:
int mflags; /* 80 4 */
/* XXX 4 bytes hole, try to pack */
long unsigned int lock; /* 88 8 */
(output from pahole)
Even though it was not reproduced in 2.6 or below by @asmnek (the bug was
revealed thanks to another bugfix), this issue theorically affects all
stable versions (up to c5959fd), thus it should be backported to all
stable versions.
2024-09-09 08:59:19 -04:00
|
|
|
HA_ATOMIC_INC(&expr->refcount);
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
/* The new list element reference the pattern_expr. */
|
|
|
|
|
list->expr = expr;
|
|
|
|
|
|
|
|
|
|
/* Link the list element with the pattern_head. */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&head->head, &list->list);
|
2014-02-11 05:31:40 -05:00
|
|
|
return expr;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-29 07:29:45 -05:00
|
|
|
/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
|
|
|
|
|
* be returned there on errors and the caller will have to free it.
|
|
|
|
|
*
|
|
|
|
|
* The file contains one key + value per line. Lines which start with '#' are
|
|
|
|
|
* ignored, just like empty lines. Leading tabs/spaces are stripped. The key is
|
|
|
|
|
* then the first "word" (series of non-space/tabs characters), and the value is
|
|
|
|
|
* what follows this series of space/tab till the end of the line excluding
|
|
|
|
|
* trailing spaces/tabs.
|
|
|
|
|
*
|
|
|
|
|
* Example :
|
|
|
|
|
*
|
|
|
|
|
* # this is a comment and is ignored
|
|
|
|
|
* 62.212.114.60 1wt.eu \n
|
|
|
|
|
* <-><-----------><---><----><---->
|
|
|
|
|
* | | | | `--- trailing spaces ignored
|
|
|
|
|
* | | | `-------- value
|
|
|
|
|
* | | `--------------- middle spaces ignored
|
|
|
|
|
* | `------------------------ key
|
|
|
|
|
* `-------------------------------- leading spaces ignored
|
|
|
|
|
*
|
2020-06-21 12:42:57 -04:00
|
|
|
* Return non-zero in case of success, otherwise 0.
|
2014-01-29 07:29:45 -05:00
|
|
|
*/
|
2023-12-01 06:04:13 -05:00
|
|
|
int pat_ref_read_from_file_smp(struct pat_ref *ref, char **err)
|
2014-01-29 07:29:45 -05:00
|
|
|
{
|
|
|
|
|
FILE *file;
|
|
|
|
|
char *c;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
int line = 0;
|
|
|
|
|
char *key_beg;
|
|
|
|
|
char *key_end;
|
|
|
|
|
char *value_beg;
|
|
|
|
|
char *value_end;
|
|
|
|
|
|
2023-12-01 06:04:13 -05:00
|
|
|
file = fopen(ref->reference, "r");
|
2014-01-29 07:29:45 -05:00
|
|
|
if (!file) {
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (ref->flags & PAT_REF_ID) {
|
|
|
|
|
/* file not found for an optional file, switch it to a virtual list of patterns */
|
|
|
|
|
ref->flags &= ~PAT_REF_FILE;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2023-12-01 06:04:13 -05:00
|
|
|
memprintf(err, "failed to open pattern file <%s>", ref->reference);
|
2014-01-29 07:29:45 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
ref->flags |= PAT_REF_FILE;
|
2014-01-29 07:29:45 -05:00
|
|
|
|
|
|
|
|
/* now parse all patterns. The file may contain only one pattern
|
|
|
|
|
* followed by one value per line. The start spaces, separator spaces
|
|
|
|
|
* and and spaces are stripped. Each can contain comment started by '#'
|
|
|
|
|
*/
|
2018-07-13 04:54:26 -04:00
|
|
|
while (fgets(trash.area, trash.size, file) != NULL) {
|
2014-01-29 07:29:45 -05:00
|
|
|
line++;
|
2018-07-13 04:54:26 -04:00
|
|
|
c = trash.area;
|
2014-01-29 07:29:45 -05:00
|
|
|
|
|
|
|
|
/* ignore lines beginning with a dash */
|
|
|
|
|
if (*c == '#')
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* strip leading spaces and tabs */
|
|
|
|
|
while (*c == ' ' || *c == '\t')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
/* empty lines are ignored too */
|
|
|
|
|
if (*c == '\0' || *c == '\r' || *c == '\n')
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* look for the end of the key */
|
|
|
|
|
key_beg = c;
|
|
|
|
|
while (*c && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
key_end = c;
|
|
|
|
|
|
|
|
|
|
/* strip middle spaces and tabs */
|
|
|
|
|
while (*c == ' ' || *c == '\t')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
/* look for the end of the value, it is the end of the line */
|
|
|
|
|
value_beg = c;
|
|
|
|
|
while (*c && *c != '\n' && *c != '\r')
|
|
|
|
|
c++;
|
|
|
|
|
value_end = c;
|
|
|
|
|
|
|
|
|
|
/* trim possibly trailing spaces and tabs */
|
|
|
|
|
while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
|
|
|
|
|
value_end--;
|
|
|
|
|
|
|
|
|
|
/* set final \0 and check entries */
|
|
|
|
|
*key_end = '\0';
|
|
|
|
|
*value_end = '\0';
|
|
|
|
|
|
|
|
|
|
/* insert values */
|
2025-12-17 23:37:44 -05:00
|
|
|
if (!pat_ref_append(ref, ref->curr_gen, key_beg, value_beg, line)) {
|
2014-01-29 07:29:45 -05:00
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
goto out_close;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-17 10:09:33 -05:00
|
|
|
if (ferror(file)) {
|
|
|
|
|
memprintf(err, "error encountered while reading <%s> : %s",
|
2023-12-01 06:04:13 -05:00
|
|
|
ref->reference, strerror(errno));
|
2020-01-17 10:09:33 -05:00
|
|
|
goto out_close;
|
|
|
|
|
}
|
2020-06-21 12:42:57 -04:00
|
|
|
/* success */
|
2014-01-29 07:29:45 -05:00
|
|
|
ret = 1;
|
|
|
|
|
|
|
|
|
|
out_close:
|
|
|
|
|
fclose(file);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-28 05:05:19 -05:00
|
|
|
/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
|
|
|
|
|
* be returned there on errors and the caller will have to free it.
|
|
|
|
|
*/
|
2023-12-01 06:04:13 -05:00
|
|
|
int pat_ref_read_from_file(struct pat_ref *ref, char **err)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
|
|
|
|
FILE *file;
|
|
|
|
|
char *c;
|
|
|
|
|
char *arg;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
int line = 0;
|
|
|
|
|
|
2023-12-01 06:04:13 -05:00
|
|
|
file = fopen(ref->reference, "r");
|
2013-11-28 05:05:19 -05:00
|
|
|
if (!file) {
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (ref->flags & PAT_REF_ID) {
|
|
|
|
|
/* file not found for an optional file, switch it to a virtual list of patterns */
|
|
|
|
|
ref->flags &= ~PAT_REF_FILE;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2023-12-01 06:04:13 -05:00
|
|
|
memprintf(err, "failed to open pattern file <%s>", ref->reference);
|
2013-11-28 05:05:19 -05:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* now parse all patterns. The file may contain only one pattern per
|
|
|
|
|
* line. If the line contains spaces, they will be part of the pattern.
|
|
|
|
|
* The pattern stops at the first CR, LF or EOF encountered.
|
|
|
|
|
*/
|
2018-07-13 04:54:26 -04:00
|
|
|
while (fgets(trash.area, trash.size, file) != NULL) {
|
2013-11-28 05:05:19 -05:00
|
|
|
line++;
|
2018-07-13 04:54:26 -04:00
|
|
|
c = trash.area;
|
2013-11-28 05:05:19 -05:00
|
|
|
|
|
|
|
|
/* ignore lines beginning with a dash */
|
|
|
|
|
if (*c == '#')
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* strip leading spaces and tabs */
|
|
|
|
|
while (*c == ' ' || *c == '\t')
|
|
|
|
|
c++;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
arg = c;
|
|
|
|
|
while (*c && *c != '\n' && *c != '\r')
|
|
|
|
|
c++;
|
|
|
|
|
*c = 0;
|
|
|
|
|
|
|
|
|
|
/* empty lines are ignored too */
|
|
|
|
|
if (c == arg)
|
|
|
|
|
continue;
|
|
|
|
|
|
2025-12-17 23:37:44 -05:00
|
|
|
if (!pat_ref_append(ref, ref->curr_gen, arg, NULL, line)) {
|
2023-12-01 06:04:13 -05:00
|
|
|
memprintf(err, "out of memory when loading patterns from file <%s>", ref->reference);
|
2013-11-28 05:05:19 -05:00
|
|
|
goto out_close;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-17 10:09:33 -05:00
|
|
|
if (ferror(file)) {
|
|
|
|
|
memprintf(err, "error encountered while reading <%s> : %s",
|
2023-12-01 06:04:13 -05:00
|
|
|
ref->reference, strerror(errno));
|
2020-01-17 10:09:33 -05:00
|
|
|
goto out_close;
|
|
|
|
|
}
|
2013-11-28 05:05:19 -05:00
|
|
|
ret = 1; /* success */
|
|
|
|
|
|
|
|
|
|
out_close:
|
|
|
|
|
fclose(file);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 05:31:40 -05:00
|
|
|
int pattern_read_from_file(struct pattern_head *head, unsigned int refflags,
|
2014-01-29 07:29:45 -05:00
|
|
|
const char *filename, int patflags, int load_smp,
|
2014-02-11 08:36:45 -05:00
|
|
|
char **err, const char *file, int line)
|
2014-02-11 05:31:40 -05:00
|
|
|
{
|
|
|
|
|
struct pat_ref *ref;
|
|
|
|
|
struct pattern_expr *expr;
|
2025-12-17 23:37:44 -05:00
|
|
|
struct pat_ref_gen *gen;
|
2014-01-29 07:29:45 -05:00
|
|
|
struct pat_ref_elt *elt;
|
2014-11-26 07:17:03 -05:00
|
|
|
int reuse = 0;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-03-11 09:29:22 -04:00
|
|
|
/* Lookup for the existing reference. */
|
2014-02-11 05:31:40 -05:00
|
|
|
ref = pat_ref_lookup(filename);
|
2014-03-11 09:29:22 -04:00
|
|
|
|
|
|
|
|
/* If the reference doesn't exists, create it and load associated file. */
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!ref) {
|
2014-02-11 08:36:45 -05:00
|
|
|
chunk_printf(&trash,
|
|
|
|
|
"pattern loaded from file '%s' used by %s at file '%s' line %d",
|
|
|
|
|
filename, refflags & PAT_REF_MAP ? "map" : "acl", file, line);
|
|
|
|
|
|
2018-07-13 04:54:26 -04:00
|
|
|
ref = pat_ref_new(filename, trash.area, refflags);
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!ref) {
|
|
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
MEDIUM: pattern: Add support for virtual and optional files for patterns
Before this patch, it was not possible to use a list of patterns, map or a
list of acls, without an existing file. However, it could be handy to just
use an ID, with no file on the disk. It is pretty useful for everyone
managing dynamically these lists. It could also be handy to try to load a
list from a file if it exists without failing if not. This way, it could be
possible to make a cold start without any file (instead of empty file),
dynamically add and del patterns, dump the list to the file periodically to
reuse it on reload (via an external process).
In this patch, we uses some prefixes to be able to use virtual or optional
files.
The default case remains unchanged. regular files are used. A filename, with
no prefix, is used as reference, and it must exist on the disk. With the
prefix "file@", the same is performed. Internally this prefix is
skipped. Thus the same file, with ou without "file@" prefix, references the
same list of patterns.
To use a virtual map, "virt@" prefix must be used. No file is read, even if
the following name looks like a file. It is just an ID. The prefix is part
of ID and must always be used.
To use a optional file, ie a file that may or may not exist on a disk at
startup, "opt@" prefix must be used. If the file exists, its content is
loaded. But HAProxy doesn't complain if not. The prefix is not part of
ID. For a given file, optional files and regular files reference the same
list of patterns.
This patch should fix the issue #2202.
2023-12-01 06:04:35 -05:00
|
|
|
if (ref->flags & PAT_REF_FILE) {
|
|
|
|
|
if (load_smp) {
|
|
|
|
|
ref->flags |= PAT_REF_SMP;
|
|
|
|
|
if (!pat_ref_read_from_file_smp(ref, err))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (!pat_ref_read_from_file(ref, err))
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2014-01-29 07:29:45 -05:00
|
|
|
}
|
2025-09-25 04:03:41 -04:00
|
|
|
else if ((ref->flags & PAT_REF_ID) && load_smp)
|
|
|
|
|
ref->flags |= PAT_REF_SMP;
|
2014-01-29 07:29:45 -05:00
|
|
|
}
|
|
|
|
|
else {
|
2014-01-29 06:32:58 -05:00
|
|
|
/* The reference already exists, check the map compatibility. */
|
|
|
|
|
|
|
|
|
|
/* If the load require samples and the flag PAT_REF_SMP is not set,
|
|
|
|
|
* the reference doesn't contain sample, and cannot be used.
|
|
|
|
|
*/
|
|
|
|
|
if (load_smp) {
|
|
|
|
|
if (!(ref->flags & PAT_REF_SMP)) {
|
|
|
|
|
memprintf(err, "The file \"%s\" is already used as one column file "
|
|
|
|
|
"and cannot be used by as two column file.",
|
|
|
|
|
filename);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* The load doesn't require samples. If the flag PAT_REF_SMP is
|
|
|
|
|
* set, the reference contains a sample, and cannot be used.
|
|
|
|
|
*/
|
|
|
|
|
if (ref->flags & PAT_REF_SMP) {
|
|
|
|
|
memprintf(err, "The file \"%s\" is already used as two column file "
|
|
|
|
|
"and cannot be used by as one column file.",
|
|
|
|
|
filename);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 08:36:45 -05:00
|
|
|
/* Extends display */
|
|
|
|
|
chunk_printf(&trash, "%s", ref->display);
|
|
|
|
|
chunk_appendf(&trash, ", by %s at file '%s' line %d",
|
|
|
|
|
refflags & PAT_REF_MAP ? "map" : "acl", file, line);
|
|
|
|
|
free(ref->display);
|
2018-07-13 04:54:26 -04:00
|
|
|
ref->display = strdup(trash.area);
|
2014-02-11 08:36:45 -05:00
|
|
|
if (!ref->display) {
|
|
|
|
|
memprintf(err, "out of memory");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-29 06:32:58 -05:00
|
|
|
/* Merge flags. */
|
2014-01-29 07:29:45 -05:00
|
|
|
ref->flags |= refflags;
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now, we can loading patterns from the reference. */
|
|
|
|
|
|
|
|
|
|
/* Lookup for existing reference in the head. If the reference
|
|
|
|
|
* doesn't exists, create it.
|
|
|
|
|
*/
|
|
|
|
|
expr = pattern_lookup_expr(head, ref);
|
2014-04-28 05:18:57 -04:00
|
|
|
if (!expr || (expr->mflags != patflags)) {
|
2017-07-03 11:54:23 -04:00
|
|
|
expr = pattern_new_expr(head, ref, patflags, err, &reuse);
|
2014-02-11 05:31:40 -05:00
|
|
|
if (!expr)
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-24 05:14:42 -05:00
|
|
|
/* The returned expression may be not empty, because the function
|
|
|
|
|
* "pattern_new_expr" lookup for similar pattern list and can
|
|
|
|
|
* reuse a already filled pattern list. In this case, we can not
|
|
|
|
|
* reload the patterns.
|
|
|
|
|
*/
|
|
|
|
|
if (reuse)
|
|
|
|
|
return 1;
|
|
|
|
|
|
BUG/MINOR: map: list-based matching potential ordering regression
An unexpected side-effect was introduced by 5fea597 ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
The above commit tried to use eb tree API to manipulate elements as much
as possible in the hope to accelerate some functions.
Prior to 5fea597, pattern_read_from_file() used to iterate over all
elements from the map file in the same order they were seen in the file
(using list_for_each_entry) to push them in the pattern expression.
Now, since eb api is used to iterate over elements, the ordering is lost
very early.
This is known to cause behavior changes with existing setups (same conf
and map file) when compared with previous versions for some list-based
matching methods as described in GH #2400. For instance, the map_dom()
converter may return a different matching key from the one that was
returned by older haproxy versions.
For IP or STR matching, matching is based on tree lookups for better
efficiency, so in this case the ordering is lost at the name of
performance. The order in which they are loaded doesn't matter because
tree ordering is based on the content, it is not positional.
But with some other types, matching is based on list lookups (e.g.: dom),
and the order in which elements are pushed into the list can affect the
matching element that will be returned (in case of multiple matches, since
only the first matching element in the list will be returned).
Despite the documentation not officially stating that the file ordering
should be preserved for list-based matching methods, it's probably best
to be conservative here and stick to historical behavior. Moreover, there
was no performance benefit from using the eb tree api to iterate over
elements in pattern_read_from_file() since all elements are visited
anyway.
This should be backported to 2.9.
2024-01-03 05:54:03 -05:00
|
|
|
/* Load reference content in the pattern expression.
|
|
|
|
|
* We need to load elements in the same order they were seen in the
|
2024-01-11 04:31:04 -05:00
|
|
|
* file. Indeed, some list-based matching types may rely on it as the
|
|
|
|
|
* list is positional, and for tree-based matching, even if the tree is
|
|
|
|
|
* content-based in case of duplicated keys we only want the first key
|
|
|
|
|
* in the file to be considered.
|
BUG/MINOR: map: list-based matching potential ordering regression
An unexpected side-effect was introduced by 5fea597 ("MEDIUM: map/acl:
Accelerate several functions using pat_ref_elt struct ->head list")
The above commit tried to use eb tree API to manipulate elements as much
as possible in the hope to accelerate some functions.
Prior to 5fea597, pattern_read_from_file() used to iterate over all
elements from the map file in the same order they were seen in the file
(using list_for_each_entry) to push them in the pattern expression.
Now, since eb api is used to iterate over elements, the ordering is lost
very early.
This is known to cause behavior changes with existing setups (same conf
and map file) when compared with previous versions for some list-based
matching methods as described in GH #2400. For instance, the map_dom()
converter may return a different matching key from the one that was
returned by older haproxy versions.
For IP or STR matching, matching is based on tree lookups for better
efficiency, so in this case the ordering is lost at the name of
performance. The order in which they are loaded doesn't matter because
tree ordering is based on the content, it is not positional.
But with some other types, matching is based on list lookups (e.g.: dom),
and the order in which elements are pushed into the list can affect the
matching element that will be returned (in case of multiple matches, since
only the first matching element in the list will be returned).
Despite the documentation not officially stating that the file ordering
should be preserved for list-based matching methods, it's probably best
to be conservative here and stick to historical behavior. Moreover, there
was no performance benefit from using the eb tree api to iterate over
elements in pattern_read_from_file() since all elements are visited
anyway.
This should be backported to 2.9.
2024-01-03 05:54:03 -05:00
|
|
|
*/
|
2025-12-17 23:37:44 -05:00
|
|
|
pat_ref_gen_foreach(gen, ref) {
|
|
|
|
|
list_for_each_entry(elt, &gen->head, list) {
|
|
|
|
|
if (!pat_ref_push(elt, expr, patflags, err)) {
|
|
|
|
|
if (elt->line > 0)
|
|
|
|
|
memprintf(err, "%s at line %d of file '%s'",
|
|
|
|
|
*err, elt->line, filename);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2014-01-29 07:29:45 -05:00
|
|
|
}
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-17 09:25:13 -05:00
|
|
|
/* This function executes a pattern match on a sample. It applies pattern <expr>
|
2021-01-07 23:35:52 -05:00
|
|
|
* to sample <smp>. The function returns NULL if the sample don't match. It returns
|
2014-01-17 09:25:13 -05:00
|
|
|
* non-null if the sample match. If <fill> is true and the sample match, the
|
|
|
|
|
* function returns the matched pattern. In many cases, this pattern can be a
|
|
|
|
|
* static buffer.
|
2013-11-28 05:05:19 -05:00
|
|
|
*/
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pattern *pattern_exec_match(struct pattern_head *head, struct sample *smp, int fill)
|
2013-11-28 05:05:19 -05:00
|
|
|
{
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *list;
|
2014-02-11 05:31:40 -05:00
|
|
|
struct pattern *pat;
|
|
|
|
|
|
|
|
|
|
if (!head->match) {
|
2014-01-17 09:25:13 -05:00
|
|
|
if (fill) {
|
2015-08-19 02:35:43 -04:00
|
|
|
static_pattern.data = NULL;
|
2014-01-28 09:54:36 -05:00
|
|
|
static_pattern.ref = NULL;
|
2014-04-28 05:18:57 -04:00
|
|
|
static_pattern.sflags = 0;
|
2015-07-06 17:43:03 -04:00
|
|
|
static_pattern.type = SMP_T_SINT;
|
2013-12-16 08:22:13 -05:00
|
|
|
static_pattern.val.i = 1;
|
2014-01-17 09:25:13 -05:00
|
|
|
}
|
|
|
|
|
return &static_pattern;
|
|
|
|
|
}
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-27 08:19:53 -05:00
|
|
|
/* convert input to string */
|
|
|
|
|
if (!sample_convert(smp, head->expect_type))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
list_for_each_entry(list, &head->head, list) {
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_RDLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
pat = head->match(smp, list->expr, fill);
|
2017-07-03 05:34:05 -04:00
|
|
|
if (pat) {
|
|
|
|
|
/* We duplicate the pattern cause it could be modified
|
|
|
|
|
by another thread */
|
|
|
|
|
if (pat != &static_pattern) {
|
|
|
|
|
memcpy(&static_pattern, pat, sizeof(struct pattern));
|
|
|
|
|
pat = &static_pattern;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We also duplicate the sample data for
|
|
|
|
|
same reason */
|
|
|
|
|
if (pat->data && (pat->data != &static_sample_data)) {
|
2017-11-09 10:14:16 -05:00
|
|
|
switch(pat->data->type) {
|
2017-07-03 05:34:05 -04:00
|
|
|
case SMP_T_STR:
|
|
|
|
|
static_sample_data.type = SMP_T_STR;
|
|
|
|
|
static_sample_data.u.str = *get_trash_chunk();
|
2018-07-13 04:54:26 -04:00
|
|
|
static_sample_data.u.str.data = pat->data->u.str.data;
|
|
|
|
|
if (static_sample_data.u.str.data >= static_sample_data.u.str.size)
|
|
|
|
|
static_sample_data.u.str.data = static_sample_data.u.str.size - 1;
|
|
|
|
|
memcpy(static_sample_data.u.str.area,
|
2020-06-11 10:37:35 -04:00
|
|
|
pat->data->u.str.area, static_sample_data.u.str.data);
|
2018-07-13 04:54:26 -04:00
|
|
|
static_sample_data.u.str.area[static_sample_data.u.str.data] = 0;
|
2020-06-11 10:37:35 -04:00
|
|
|
pat->data = &static_sample_data;
|
|
|
|
|
break;
|
|
|
|
|
|
2017-07-03 05:34:05 -04:00
|
|
|
case SMP_T_IPV4:
|
|
|
|
|
case SMP_T_IPV6:
|
|
|
|
|
case SMP_T_SINT:
|
|
|
|
|
memcpy(&static_sample_data, pat->data, sizeof(struct sample_data));
|
2020-06-11 10:37:35 -04:00
|
|
|
pat->data = &static_sample_data;
|
|
|
|
|
break;
|
2017-07-03 05:34:05 -04:00
|
|
|
default:
|
2020-06-11 10:37:35 -04:00
|
|
|
/* unimplemented pattern type */
|
2017-07-03 05:34:05 -04:00
|
|
|
pat->data = NULL;
|
2020-06-11 10:37:35 -04:00
|
|
|
break;
|
2017-07-03 05:34:05 -04:00
|
|
|
}
|
|
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-02-11 05:31:40 -05:00
|
|
|
return pat;
|
2017-07-03 05:34:05 -04:00
|
|
|
}
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
|
|
|
|
return NULL;
|
2013-11-28 05:05:19 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function prunes the pattern expressions starting at pattern_head <head>. */
|
2014-02-11 05:31:40 -05:00
|
|
|
void pattern_prune(struct pattern_head *head)
|
2014-01-14 10:24:51 -05:00
|
|
|
{
|
2014-01-20 08:29:33 -05:00
|
|
|
struct pattern_expr_list *list, *safe;
|
2014-02-11 05:31:40 -05:00
|
|
|
|
2014-01-20 08:29:33 -05:00
|
|
|
list_for_each_entry_safe(list, safe, &head->head, list) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&list->list);
|
BUG/MEDIUM: pattern: prevent UAF on reused pattern expr
Since c5959fd ("MEDIUM: pattern: merge same pattern"), UAF (leading to
crash) can be experienced if the same pattern file (and match method) is
used in two default sections and the first one is not referenced later in
the config. In this case, the first default section will be cleaned up.
However, due to an unhandled case in the above optimization, the original
expr which the second default section relies on is mistakenly freed.
This issue was discovered while trying to reproduce GH #2708. The issue
was particularly tricky to reproduce given the config and sequence
required to make the UAF happen. Hopefully, Github user @asmnek not only
provided useful informations, but since he was able to consistently
trigger the crash in his environment he was able to nail down the crash to
the use of pattern file involved with 2 named default sections. Big thanks
to him.
To fix the issue, let's push the logic from c5959fd a bit further. Instead
of relying on "do_free" variable to know if the expression should be freed
or not (which proved to be insufficient in our case), let's switch to a
simple refcounting logic. This way, no matter who owns the expression, the
last one attempting to free it will be responsible for freeing it.
Refcount is implemented using a 32bit value which fills a previous 4 bytes
structure gap:
int mflags; /* 80 4 */
/* XXX 4 bytes hole, try to pack */
long unsigned int lock; /* 88 8 */
(output from pahole)
Even though it was not reproduced in 2.6 or below by @asmnek (the bug was
revealed thanks to another bugfix), this issue theorically affects all
stable versions (up to c5959fd), thus it should be backported to all
stable versions.
2024-09-09 08:59:19 -04:00
|
|
|
if (HA_ATOMIC_SUB_FETCH(&list->expr->refcount, 1) == 0) {
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_DELETE(&list->expr->list);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
head->prune(list->expr);
|
2017-11-07 04:42:54 -05:00
|
|
|
HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &list->expr->lock);
|
2014-01-20 08:29:33 -05:00
|
|
|
free(list->expr);
|
|
|
|
|
}
|
|
|
|
|
free(list);
|
2014-02-11 05:31:40 -05:00
|
|
|
}
|
2014-01-14 10:24:51 -05:00
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function compares two pat_ref** on their unique_id, and returns -1/0/1
|
|
|
|
|
* depending on their order (suitable for sorting).
|
|
|
|
|
*/
|
2020-02-27 10:45:50 -05:00
|
|
|
static int cmp_pat_ref(const void *_a, const void *_b)
|
|
|
|
|
{
|
|
|
|
|
struct pat_ref * const *a = _a;
|
|
|
|
|
struct pat_ref * const *b = _b;
|
|
|
|
|
|
|
|
|
|
if ((*a)->unique_id < (*b)->unique_id)
|
|
|
|
|
return -1;
|
|
|
|
|
else if ((*a)->unique_id > (*b)->unique_id)
|
|
|
|
|
return 1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 11:03:50 -04:00
|
|
|
/* This function finalizes the configuration parsing. It sets all the
|
|
|
|
|
* automatic ids.
|
2014-03-11 09:29:22 -04:00
|
|
|
*/
|
2020-02-27 10:45:50 -05:00
|
|
|
int pattern_finalize_config(void)
|
2014-03-11 09:29:22 -04:00
|
|
|
{
|
2020-03-17 16:08:24 -04:00
|
|
|
size_t len = 0;
|
|
|
|
|
size_t unassigned_pos = 0;
|
2020-02-27 10:45:50 -05:00
|
|
|
int next_unique_id = 0;
|
2020-03-17 16:08:24 -04:00
|
|
|
size_t i, j;
|
2020-02-27 10:45:50 -05:00
|
|
|
struct pat_ref *ref, **arr;
|
2014-03-11 09:29:22 -04:00
|
|
|
struct list pr = LIST_HEAD_INIT(pr);
|
|
|
|
|
|
BUG/MEDIUM: random: implement a thread-safe and process-safe PRNG
This is the replacement of failed attempt to add thread safety and
per-process sequences of random numbers initally tried with commit
1c306aa84d ("BUG/MEDIUM: random: implement per-thread and per-process
random sequences").
This new version takes a completely different approach and doesn't try
to work around the horrible OS-specific and non-portable random API
anymore. Instead it implements "xoroshiro128**", a reputedly high
quality random number generator, which is one of the many variants of
xorshift, which passes all quality tests and which is described here:
http://prng.di.unimi.it/
While not cryptographically secure, it is fast and features a 2^128-1
period. It supports fast jumps allowing to cut the period into smaller
non-overlapping sequences, which we use here to support up to 2^32
processes each having their own, non-overlapping sequence of 2^96
numbers (~7*10^28). This is enough to provide 1 billion randoms per
second and per process for 2200 billion years.
The implementation was made thread-safe either by using a double 64-bit
CAS on platforms supporting it (x86_64, aarch64) or by using a local
lock for the time needed to perform the shift operations. This ensures
that all threads pick numbers from the same pool so that it is not
needed to assign per-thread ranges. For processes we use the fast jump
method to advance the sequence by 2^96 for each process.
Before this patch, the following config:
global
nbproc 8
frontend f
bind :4445
mode http
log stdout format raw daemon
log-format "%[uuid] %pid"
redirect location /
Would produce this output:
a4d0ad64-2645-4b74-b894-48acce0669af 12987
a4d0ad64-2645-4b74-b894-48acce0669af 12992
a4d0ad64-2645-4b74-b894-48acce0669af 12986
a4d0ad64-2645-4b74-b894-48acce0669af 12988
a4d0ad64-2645-4b74-b894-48acce0669af 12991
a4d0ad64-2645-4b74-b894-48acce0669af 12989
a4d0ad64-2645-4b74-b894-48acce0669af 12990
82d5f6cd-f6c1-4f85-a89c-36ae85d26fb9 12987
82d5f6cd-f6c1-4f85-a89c-36ae85d26fb9 12992
82d5f6cd-f6c1-4f85-a89c-36ae85d26fb9 12986
(...)
And now produces:
f94b29b3-da74-4e03-a0c5-a532c635bad9 13011
47470c02-4862-4c33-80e7-a952899570e5 13014
86332123-539a-47bf-853f-8c8ea8b2a2b5 13013
8f9efa99-3143-47b2-83cf-d618c8dea711 13012
3cc0f5c7-d790-496b-8d39-bec77647af5b 13015
3ec64915-8f95-4374-9e66-e777dc8791e0 13009
0f9bf894-dcde-408c-b094-6e0bb3255452 13011
49c7bfde-3ffb-40e9-9a8d-8084d650ed8f 13014
e23f6f2e-35c5-4433-a294-b790ab902653 13012
There are multiple benefits to using this method. First, it doesn't
depend anymore on a non-portable API. Second it's thread safe. Third it
is fast and more proven than any hack we could attempt to try to work
around the deficiencies of the various implementations around.
This commit depends on previous patches "MINOR: tools: add 64-bit rotate
operators" and "BUG/MEDIUM: random: initialize the random pool a bit
better", all of which will need to be backported at least as far as
version 2.0. It doesn't require to backport the build fixes for circular
include files dependecy anymore.
2020-03-07 18:42:37 -05:00
|
|
|
pat_lru_seed = ha_random();
|
2015-04-29 10:24:50 -04:00
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
/* Count pat_refs with user defined unique_id and totalt count */
|
2014-03-11 09:29:22 -04:00
|
|
|
list_for_each_entry(ref, &pattern_reference, list) {
|
2020-02-27 10:45:50 -05:00
|
|
|
len++;
|
|
|
|
|
if (ref->unique_id != -1)
|
|
|
|
|
unassigned_pos++;
|
|
|
|
|
}
|
2014-03-11 09:29:22 -04:00
|
|
|
|
2020-03-17 16:08:24 -04:00
|
|
|
if (len == 0) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
arr = calloc(len, sizeof(*arr));
|
|
|
|
|
if (arr == NULL) {
|
|
|
|
|
ha_alert("Out of memory error.\n");
|
|
|
|
|
return ERR_ALERT | ERR_FATAL;
|
2014-03-11 09:29:22 -04:00
|
|
|
}
|
|
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
i = 0;
|
|
|
|
|
j = unassigned_pos;
|
|
|
|
|
list_for_each_entry(ref, &pattern_reference, list) {
|
|
|
|
|
if (ref->unique_id != -1)
|
|
|
|
|
arr[i++] = ref;
|
|
|
|
|
else
|
|
|
|
|
arr[j++] = ref;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sort first segment of array with user-defined unique ids for
|
|
|
|
|
* fast lookup when generating unique ids
|
|
|
|
|
*/
|
|
|
|
|
qsort(arr, unassigned_pos, sizeof(*arr), cmp_pat_ref);
|
|
|
|
|
|
|
|
|
|
/* Assign unique ids to the rest of the elements */
|
|
|
|
|
for (i = unassigned_pos; i < len; i++) {
|
|
|
|
|
do {
|
|
|
|
|
arr[i]->unique_id = next_unique_id++;
|
|
|
|
|
} while (bsearch(&arr[i], arr, unassigned_pos, sizeof(*arr), cmp_pat_ref));
|
2014-03-11 09:29:22 -04:00
|
|
|
}
|
|
|
|
|
|
2020-02-27 10:45:50 -05:00
|
|
|
/* Sort complete array */
|
|
|
|
|
qsort(arr, len, sizeof(*arr), cmp_pat_ref);
|
|
|
|
|
|
|
|
|
|
/* Convert back to linked list */
|
|
|
|
|
for (i = 0; i < len; i++)
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&pr, &arr[i]->list);
|
2020-02-27 10:45:50 -05:00
|
|
|
|
2014-03-11 09:29:22 -04:00
|
|
|
/* swap root */
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_INSERT(&pr, &pattern_reference);
|
|
|
|
|
LIST_DELETE(&pr);
|
2020-02-27 10:45:50 -05:00
|
|
|
|
|
|
|
|
free(arr);
|
|
|
|
|
return 0;
|
2014-03-11 09:29:22 -04:00
|
|
|
}
|
2019-10-23 00:59:31 -04:00
|
|
|
|
|
|
|
|
static int pattern_per_thread_lru_alloc()
|
|
|
|
|
{
|
|
|
|
|
if (!global.tune.pattern_cache)
|
|
|
|
|
return 1;
|
|
|
|
|
pat_lru_tree = lru64_new(global.tune.pattern_cache);
|
|
|
|
|
return !!pat_lru_tree;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void pattern_per_thread_lru_free()
|
|
|
|
|
{
|
|
|
|
|
lru64_destroy(pat_lru_tree);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
REGISTER_PER_THREAD_ALLOC(pattern_per_thread_lru_alloc);
|
|
|
|
|
REGISTER_PER_THREAD_FREE(pattern_per_thread_lru_free);
|