2012-04-19 09:24:50 -04:00
|
|
|
/*
|
|
|
|
|
* Functions used to parse typed argument lists
|
|
|
|
|
*
|
|
|
|
|
* Copyright 2012 Willy Tarreau <w@1wt.eu>
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <sys/socket.h>
|
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
|
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/arg.h>
|
2020-06-02 04:22:45 -04:00
|
|
|
#include <haproxy/chunk.h>
|
2020-06-04 11:05:57 -04:00
|
|
|
#include <haproxy/global.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/tools.h>
|
2012-04-19 09:24:50 -04:00
|
|
|
|
2014-12-08 13:50:43 -05:00
|
|
|
const char *arg_type_names[ARGT_NBTYPES] = {
|
2012-04-19 09:24:50 -04:00
|
|
|
[ARGT_STOP] = "end of arguments",
|
2015-07-20 11:45:02 -04:00
|
|
|
[ARGT_SINT] = "integer",
|
2012-04-19 09:24:50 -04:00
|
|
|
[ARGT_STR] = "string",
|
|
|
|
|
[ARGT_IPV4] = "IPv4 address",
|
|
|
|
|
[ARGT_MSK4] = "IPv4 mask",
|
|
|
|
|
[ARGT_IPV6] = "IPv6 address",
|
|
|
|
|
[ARGT_MSK6] = "IPv6 mask",
|
|
|
|
|
[ARGT_TIME] = "delay",
|
|
|
|
|
[ARGT_SIZE] = "size",
|
|
|
|
|
[ARGT_FE] = "frontend",
|
|
|
|
|
[ARGT_BE] = "backend",
|
|
|
|
|
[ARGT_TAB] = "table",
|
|
|
|
|
[ARGT_SRV] = "server",
|
|
|
|
|
[ARGT_USR] = "user list",
|
2015-01-19 12:58:20 -05:00
|
|
|
[ARGT_MAP] = "map",
|
2015-01-19 13:00:58 -05:00
|
|
|
[ARGT_REG] = "regex",
|
2015-09-21 14:57:12 -04:00
|
|
|
[ARGT_VAR] = "variable",
|
2019-02-25 09:20:35 -05:00
|
|
|
[ARGT_PBUF_FNUM] = "Protocol buffers field number",
|
2012-04-19 09:24:50 -04:00
|
|
|
/* Unassigned types must never happen. Better crash during parsing if they do. */
|
|
|
|
|
};
|
|
|
|
|
|
2012-10-19 13:49:09 -04:00
|
|
|
/* This dummy arg list may be used by default when no arg is found, it helps
|
|
|
|
|
* parsers by removing pointer checks.
|
|
|
|
|
*/
|
2015-01-19 12:44:07 -05:00
|
|
|
struct arg empty_arg_list[ARGM_NBARGS] = { };
|
2012-10-19 13:49:09 -04:00
|
|
|
|
MAJOR: sample: maintain a per-proxy list of the fetch args to resolve
While ACL args were resolved after all the config was parsed, it was not the
case with sample fetch args because they're almost everywhere now.
The issue is that ACLs now solely rely on sample fetches, so their args
resolving doesn't work anymore. And many fetches involving a server, a
proxy or a userlist don't work at all.
The real issue is that at the bottom layers we have no information about
proxies, line numbers, even ACLs in order to report understandable errors,
and that at the top layers we have no visibility over the locations where
fetches are referenced (think log node).
After failing multiple unsatisfying solutions attempts, we now have a new
concept of args list. The principle is that every proxy has a list head
which contains a number of indications such as the config keyword, the
context where it's used, the file and line number, etc... and a list of
arguments. This list head is of the same type as the elements, so it
serves as a template for adding new elements. This way, it is filled from
top to bottom by the callers with the information they have (eg: line
numbers, ACL name, ...) and the lower layers just have to duplicate it and
add an element when they face an argument they cannot resolve yet.
Then at the end of the configuration parsing, a loop passes over each
proxy's list and resolves all the args in sequence. And this way there is
all necessary information to report verbose errors.
The first immediate benefit is that for the first time we got very precise
location of issues (arg number in a keyword in its context, ...). Second,
in order to do this we had to parse log-format and unique-id-format a bit
earlier, so that was a great opportunity for doing so when the directives
are encountered (unless it's a default section). This way, the recorded
line numbers for these args are the ones of the place where the log format
is declared, not the end of the file.
Userlists report slightly more information now. They're the only remaining
ones in the ACL resolving function.
2013-04-02 10:34:32 -04:00
|
|
|
/* This function clones a struct arg_list template into a new one which is
|
|
|
|
|
* returned.
|
|
|
|
|
*/
|
|
|
|
|
struct arg_list *arg_list_clone(const struct arg_list *orig)
|
|
|
|
|
{
|
|
|
|
|
struct arg_list *new;
|
|
|
|
|
|
|
|
|
|
if ((new = calloc(1, sizeof(*new))) != NULL) {
|
|
|
|
|
/* ->list will be set by the caller when inserting the element.
|
|
|
|
|
* ->arg and ->arg_pos will be set by the caller.
|
|
|
|
|
*/
|
|
|
|
|
new->ctx = orig->ctx;
|
|
|
|
|
new->kw = orig->kw;
|
|
|
|
|
new->conv = orig->conv;
|
|
|
|
|
new->file = orig->file;
|
|
|
|
|
new->line = orig->line;
|
|
|
|
|
}
|
|
|
|
|
return new;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This function clones a struct <arg_list> template into a new one which is
|
|
|
|
|
* set to point to arg <arg> at pos <pos>, and which is returned if the caller
|
|
|
|
|
* wants to apply further changes.
|
|
|
|
|
*/
|
|
|
|
|
struct arg_list *arg_list_add(struct arg_list *orig, struct arg *arg, int pos)
|
|
|
|
|
{
|
|
|
|
|
struct arg_list *new;
|
|
|
|
|
|
|
|
|
|
new = arg_list_clone(orig);
|
2017-04-12 16:28:52 -04:00
|
|
|
if (new) {
|
|
|
|
|
new->arg = arg;
|
|
|
|
|
new->arg_pos = pos;
|
2021-04-21 01:32:39 -04:00
|
|
|
LIST_APPEND(&orig->list, &new->list);
|
2017-04-12 16:28:52 -04:00
|
|
|
}
|
MAJOR: sample: maintain a per-proxy list of the fetch args to resolve
While ACL args were resolved after all the config was parsed, it was not the
case with sample fetch args because they're almost everywhere now.
The issue is that ACLs now solely rely on sample fetches, so their args
resolving doesn't work anymore. And many fetches involving a server, a
proxy or a userlist don't work at all.
The real issue is that at the bottom layers we have no information about
proxies, line numbers, even ACLs in order to report understandable errors,
and that at the top layers we have no visibility over the locations where
fetches are referenced (think log node).
After failing multiple unsatisfying solutions attempts, we now have a new
concept of args list. The principle is that every proxy has a list head
which contains a number of indications such as the config keyword, the
context where it's used, the file and line number, etc... and a list of
arguments. This list head is of the same type as the elements, so it
serves as a template for adding new elements. This way, it is filled from
top to bottom by the callers with the information they have (eg: line
numbers, ACL name, ...) and the lower layers just have to duplicate it and
add an element when they face an argument they cannot resolve yet.
Then at the end of the configuration parsing, a loop passes over each
proxy's list and resolves all the args in sequence. And this way there is
all necessary information to report verbose errors.
The first immediate benefit is that for the first time we got very precise
location of issues (arg number in a keyword in its context, ...). Second,
in order to do this we had to parse log-format and unique-id-format a bit
earlier, so that was a great opportunity for doing so when the directives
are encountered (unless it's a default section). This way, the recorded
line numbers for these args are the ones of the place where the log format
is declared, not the end of the file.
Userlists report slightly more information now. They're the only remaining
ones in the ACL resolving function.
2013-04-02 10:34:32 -04:00
|
|
|
return new;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-14 02:40:37 -05:00
|
|
|
/* This function builds an argument list from a config line, and stops at the
|
|
|
|
|
* first non-matching character, which is pointed to in <end_ptr>. A valid arg
|
|
|
|
|
* list starts with an opening parenthesis '(', contains a number of comma-
|
|
|
|
|
* delimited words, and ends with the closing parenthesis ')'. An empty list
|
|
|
|
|
* (with or without the parenthesis) will lead to a valid empty argument if the
|
|
|
|
|
* keyword has a mandatory one. The function returns the number of arguments
|
|
|
|
|
* emitted, or <0 in case of any error. Everything needed it automatically
|
|
|
|
|
* allocated. A pointer to an error message might be returned in err_msg if not
|
|
|
|
|
* NULL, in which case it would be allocated and the caller will have to check
|
|
|
|
|
* it and free it. The output arg list is returned in argp which must be valid.
|
|
|
|
|
* The returned array is always terminated by an arg of type ARGT_STOP (0),
|
|
|
|
|
* unless the mask indicates that no argument is supported. Unresolved arguments
|
|
|
|
|
* are appended to arg list <al>, which also serves as a template to create new
|
|
|
|
|
* entries. The mask is composed of a number of mandatory arguments in its lower
|
|
|
|
|
* ARGM_BITS bits, and a concatenation of each argument type in each subsequent
|
|
|
|
|
* ARGT_BITS-bit sblock. If <err_msg> is not NULL, it must point to a freeable
|
|
|
|
|
* or NULL pointer. The caller is expected to restart the parsing from the new
|
|
|
|
|
* pointer set in <end_ptr>, which is the first character considered as not
|
|
|
|
|
* being part of the arg list. The input string ends on the first between <len>
|
|
|
|
|
* characters (when len is positive) or the first NUL character. Placing -1 in
|
|
|
|
|
* <len> will make it virtually unbounded (~2GB long strings).
|
2012-04-19 09:24:50 -04:00
|
|
|
*/
|
2016-03-15 15:00:35 -04:00
|
|
|
int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp,
|
2020-02-14 02:40:37 -05:00
|
|
|
char **err_msg, const char **end_ptr, int *err_arg,
|
MAJOR: sample: maintain a per-proxy list of the fetch args to resolve
While ACL args were resolved after all the config was parsed, it was not the
case with sample fetch args because they're almost everywhere now.
The issue is that ACLs now solely rely on sample fetches, so their args
resolving doesn't work anymore. And many fetches involving a server, a
proxy or a userlist don't work at all.
The real issue is that at the bottom layers we have no information about
proxies, line numbers, even ACLs in order to report understandable errors,
and that at the top layers we have no visibility over the locations where
fetches are referenced (think log node).
After failing multiple unsatisfying solutions attempts, we now have a new
concept of args list. The principle is that every proxy has a list head
which contains a number of indications such as the config keyword, the
context where it's used, the file and line number, etc... and a list of
arguments. This list head is of the same type as the elements, so it
serves as a template for adding new elements. This way, it is filled from
top to bottom by the callers with the information they have (eg: line
numbers, ACL name, ...) and the lower layers just have to duplicate it and
add an element when they face an argument they cannot resolve yet.
Then at the end of the configuration parsing, a loop passes over each
proxy's list and resolves all the args in sequence. And this way there is
all necessary information to report verbose errors.
The first immediate benefit is that for the first time we got very precise
location of issues (arg number in a keyword in its context, ...). Second,
in order to do this we had to parse log-format and unique-id-format a bit
earlier, so that was a great opportunity for doing so when the directives
are encountered (unless it's a default section). This way, the recorded
line numbers for these args are the ones of the place where the log format
is declared, not the end of the file.
Userlists report slightly more information now. They're the only remaining
ones in the ACL resolving function.
2013-04-02 10:34:32 -04:00
|
|
|
struct arg_list *al)
|
2012-04-19 09:24:50 -04:00
|
|
|
{
|
|
|
|
|
int nbarg;
|
|
|
|
|
int pos;
|
MAJOR: sample: maintain a per-proxy list of the fetch args to resolve
While ACL args were resolved after all the config was parsed, it was not the
case with sample fetch args because they're almost everywhere now.
The issue is that ACLs now solely rely on sample fetches, so their args
resolving doesn't work anymore. And many fetches involving a server, a
proxy or a userlist don't work at all.
The real issue is that at the bottom layers we have no information about
proxies, line numbers, even ACLs in order to report understandable errors,
and that at the top layers we have no visibility over the locations where
fetches are referenced (think log node).
After failing multiple unsatisfying solutions attempts, we now have a new
concept of args list. The principle is that every proxy has a list head
which contains a number of indications such as the config keyword, the
context where it's used, the file and line number, etc... and a list of
arguments. This list head is of the same type as the elements, so it
serves as a template for adding new elements. This way, it is filled from
top to bottom by the callers with the information they have (eg: line
numbers, ACL name, ...) and the lower layers just have to duplicate it and
add an element when they face an argument they cannot resolve yet.
Then at the end of the configuration parsing, a loop passes over each
proxy's list and resolves all the args in sequence. And this way there is
all necessary information to report verbose errors.
The first immediate benefit is that for the first time we got very precise
location of issues (arg number in a keyword in its context, ...). Second,
in order to do this we had to parse log-format and unique-id-format a bit
earlier, so that was a great opportunity for doing so when the directives
are encountered (unless it's a default section). This way, the recorded
line numbers for these args are the ones of the place where the log format
is declared, not the end of the file.
Userlists report slightly more information now. They're the only remaining
ones in the ACL resolving function.
2013-04-02 10:34:32 -04:00
|
|
|
struct arg *arg;
|
2012-04-19 09:24:50 -04:00
|
|
|
const char *beg;
|
|
|
|
|
const char *ptr_err = NULL;
|
|
|
|
|
int min_arg;
|
2020-02-14 02:40:37 -05:00
|
|
|
int empty;
|
2017-04-12 16:32:04 -04:00
|
|
|
struct arg_list *new_al = al;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
MAJOR: sample: maintain a per-proxy list of the fetch args to resolve
While ACL args were resolved after all the config was parsed, it was not the
case with sample fetch args because they're almost everywhere now.
The issue is that ACLs now solely rely on sample fetches, so their args
resolving doesn't work anymore. And many fetches involving a server, a
proxy or a userlist don't work at all.
The real issue is that at the bottom layers we have no information about
proxies, line numbers, even ACLs in order to report understandable errors,
and that at the top layers we have no visibility over the locations where
fetches are referenced (think log node).
After failing multiple unsatisfying solutions attempts, we now have a new
concept of args list. The principle is that every proxy has a list head
which contains a number of indications such as the config keyword, the
context where it's used, the file and line number, etc... and a list of
arguments. This list head is of the same type as the elements, so it
serves as a template for adding new elements. This way, it is filled from
top to bottom by the callers with the information they have (eg: line
numbers, ACL name, ...) and the lower layers just have to duplicate it and
add an element when they face an argument they cannot resolve yet.
Then at the end of the configuration parsing, a loop passes over each
proxy's list and resolves all the args in sequence. And this way there is
all necessary information to report verbose errors.
The first immediate benefit is that for the first time we got very precise
location of issues (arg number in a keyword in its context, ...). Second,
in order to do this we had to parse log-format and unique-id-format a bit
earlier, so that was a great opportunity for doing so when the directives
are encountered (unless it's a default section). This way, the recorded
line numbers for these args are the ones of the place where the log format
is declared, not the end of the file.
Userlists report slightly more information now. They're the only remaining
ones in the ACL resolving function.
2013-04-02 10:34:32 -04:00
|
|
|
*argp = NULL;
|
|
|
|
|
|
2020-02-14 02:40:37 -05:00
|
|
|
empty = 0;
|
|
|
|
|
if (!len || *in != '(') {
|
|
|
|
|
/* it's already not for us, stop here */
|
|
|
|
|
empty = 1;
|
|
|
|
|
len = 0;
|
|
|
|
|
} else {
|
|
|
|
|
/* skip opening parenthesis */
|
|
|
|
|
len--;
|
|
|
|
|
in++;
|
|
|
|
|
}
|
|
|
|
|
|
2015-01-19 12:44:07 -05:00
|
|
|
min_arg = mask & ARGM_MASK;
|
|
|
|
|
mask >>= ARGM_BITS;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
|
|
|
|
pos = 0;
|
2015-01-19 12:44:07 -05:00
|
|
|
/* find between 0 and NBARGS the max number of args supported by the mask */
|
|
|
|
|
for (nbarg = 0; nbarg < ARGM_NBARGS && ((mask >> (nbarg * ARGT_BITS)) & ARGT_MASK); nbarg++);
|
2012-04-19 09:24:50 -04:00
|
|
|
|
|
|
|
|
if (!nbarg)
|
|
|
|
|
goto end_parse;
|
|
|
|
|
|
|
|
|
|
/* Note: an empty input string contains an empty argument if this argument
|
|
|
|
|
* is marked mandatory. Otherwise we can ignore it.
|
|
|
|
|
*/
|
2020-02-14 02:40:37 -05:00
|
|
|
if (empty && !min_arg)
|
2012-04-19 09:24:50 -04:00
|
|
|
goto end_parse;
|
|
|
|
|
|
2020-09-12 14:26:43 -04:00
|
|
|
arg = *argp = calloc(nbarg + 1, sizeof(**argp));
|
2012-04-19 09:24:50 -04:00
|
|
|
|
2021-05-19 06:00:54 -04:00
|
|
|
if (!arg)
|
|
|
|
|
goto alloc_err;
|
|
|
|
|
|
2012-04-19 09:24:50 -04:00
|
|
|
/* Note: empty arguments after a comma always exist. */
|
|
|
|
|
while (pos < nbarg) {
|
2015-07-20 11:45:02 -04:00
|
|
|
unsigned int uint;
|
MEDIUM: arg: make make_arg_list() support quotes in arguments
Now it becomes possible to reuse the quotes within arguments, allowing
the parser to distinguish a ',' or ')' that is part of the value from
one which delimits the argument. In addition, ',' and ')' may be escaped
using a backslash. However, it is also important to keep in mind that
just like in shell, quotes are first resolved by the word tokenizer, so
in order to pass quotes that are visible to the argument parser, a second
level is needed, either using backslash escaping, or by using an alternate
type.
For example, it's possible to write this to append a comma:
http-request add-header paren-comma-paren "%[str('(--,--)')]"
or this:
http-request add-header paren-comma-paren '%[str("(--,--)")]'
or this:
http-request add-header paren-comma-paren %[str(\'(--,--)\')]
or this:
http-request add-header paren-comma-paren %[str(\"(--,--)\")]
or this:
http-request add-header paren-comma-paren %[str(\"(\"--\',\'--\")\")]
Note that due to the wide use of '\' in front of parenthesis in regex,
the backslash character will purposely *not* escape parenthesis, so that
'\)' placed in quotes is passed verbatim to a regex engine.
2020-02-14 07:37:20 -05:00
|
|
|
int squote = 0, dquote = 0;
|
|
|
|
|
char *out;
|
|
|
|
|
|
|
|
|
|
chunk_reset(&trash);
|
|
|
|
|
out = trash.area;
|
|
|
|
|
|
|
|
|
|
while (len && *in && trash.data < trash.size - 1) {
|
|
|
|
|
if (*in == '"' && !squote) { /* double quote outside single quotes */
|
|
|
|
|
if (dquote)
|
|
|
|
|
dquote = 0;
|
|
|
|
|
else
|
|
|
|
|
dquote = 1;
|
|
|
|
|
in++; len--;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
else if (*in == '\'' && !dquote) { /* single quote outside double quotes */
|
|
|
|
|
if (squote)
|
|
|
|
|
squote = 0;
|
|
|
|
|
else
|
|
|
|
|
squote = 1;
|
|
|
|
|
in++; len--;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
else if (*in == '\\' && !squote && len != 1) {
|
|
|
|
|
/* '\', ', ' ', '"' support being escaped by '\' */
|
|
|
|
|
if (len == 1 || in[1] == 0)
|
|
|
|
|
goto unquote_err;
|
|
|
|
|
|
|
|
|
|
if (in[1] == '\\' || in[1] == ' ' || in[1] == '"' || in[1] == '\'') {
|
|
|
|
|
in++; len--;
|
|
|
|
|
*out++ = *in;
|
|
|
|
|
}
|
|
|
|
|
else if (in[1] == 'r') {
|
|
|
|
|
in++; len--;
|
|
|
|
|
*out++ = '\r';
|
|
|
|
|
}
|
|
|
|
|
else if (in[1] == 'n') {
|
|
|
|
|
in++; len--;
|
|
|
|
|
*out++ = '\n';
|
|
|
|
|
}
|
|
|
|
|
else if (in[1] == 't') {
|
|
|
|
|
in++; len--;
|
|
|
|
|
*out++ = '\t';
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* just a lone '\' */
|
|
|
|
|
*out++ = *in;
|
|
|
|
|
}
|
|
|
|
|
in++; len--;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (!squote && !dquote && (*in == ',' || *in == ')')) {
|
|
|
|
|
/* end of argument */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* verbatim copy */
|
|
|
|
|
*out++ = *in++;
|
|
|
|
|
len--;
|
|
|
|
|
}
|
|
|
|
|
trash.data = out - trash.area;
|
2012-04-19 09:24:50 -04:00
|
|
|
}
|
2020-02-15 08:54:28 -05:00
|
|
|
|
2020-02-16 04:46:37 -05:00
|
|
|
if (len && *in && *in != ',' && *in != ')')
|
2020-02-15 08:54:28 -05:00
|
|
|
goto buffer_err;
|
|
|
|
|
|
MEDIUM: arg: make make_arg_list() support quotes in arguments
Now it becomes possible to reuse the quotes within arguments, allowing
the parser to distinguish a ',' or ')' that is part of the value from
one which delimits the argument. In addition, ',' and ')' may be escaped
using a backslash. However, it is also important to keep in mind that
just like in shell, quotes are first resolved by the word tokenizer, so
in order to pass quotes that are visible to the argument parser, a second
level is needed, either using backslash escaping, or by using an alternate
type.
For example, it's possible to write this to append a comma:
http-request add-header paren-comma-paren "%[str('(--,--)')]"
or this:
http-request add-header paren-comma-paren '%[str("(--,--)")]'
or this:
http-request add-header paren-comma-paren %[str(\'(--,--)\')]
or this:
http-request add-header paren-comma-paren %[str(\"(--,--)\")]
or this:
http-request add-header paren-comma-paren %[str(\"(\"--\',\'--\")\")]
Note that due to the wide use of '\' in front of parenthesis in regex,
the backslash character will purposely *not* escape parenthesis, so that
'\)' placed in quotes is passed verbatim to a regex engine.
2020-02-14 07:37:20 -05:00
|
|
|
trash.area[trash.data] = 0;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
2015-01-19 12:44:07 -05:00
|
|
|
arg->type = (mask >> (pos * ARGT_BITS)) & ARGT_MASK;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
|
|
|
|
switch (arg->type) {
|
|
|
|
|
case ARGT_SINT:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty number
|
2012-04-27 10:32:26 -04:00
|
|
|
goto empty_err;
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
beg = trash.area;
|
|
|
|
|
arg->data.sint = read_int64(&beg, trash.area + trash.data);
|
|
|
|
|
if (beg < trash.area + trash.data)
|
2012-04-27 10:32:26 -04:00
|
|
|
goto parse_err;
|
2015-07-20 11:45:02 -04:00
|
|
|
arg->type = ARGT_SINT;
|
2012-04-19 09:24:50 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ARGT_FE:
|
|
|
|
|
case ARGT_BE:
|
|
|
|
|
case ARGT_TAB:
|
|
|
|
|
case ARGT_SRV:
|
|
|
|
|
case ARGT_USR:
|
2015-01-19 13:00:58 -05:00
|
|
|
case ARGT_REG:
|
2012-06-01 04:38:29 -04:00
|
|
|
/* These argument types need to be stored as strings during
|
|
|
|
|
* parsing then resolved later.
|
|
|
|
|
*/
|
|
|
|
|
arg->unresolved = 1;
|
2017-04-12 16:32:04 -04:00
|
|
|
new_al = arg_list_add(al, arg, pos);
|
MAJOR: sample: maintain a per-proxy list of the fetch args to resolve
While ACL args were resolved after all the config was parsed, it was not the
case with sample fetch args because they're almost everywhere now.
The issue is that ACLs now solely rely on sample fetches, so their args
resolving doesn't work anymore. And many fetches involving a server, a
proxy or a userlist don't work at all.
The real issue is that at the bottom layers we have no information about
proxies, line numbers, even ACLs in order to report understandable errors,
and that at the top layers we have no visibility over the locations where
fetches are referenced (think log node).
After failing multiple unsatisfying solutions attempts, we now have a new
concept of args list. The principle is that every proxy has a list head
which contains a number of indications such as the config keyword, the
context where it's used, the file and line number, etc... and a list of
arguments. This list head is of the same type as the elements, so it
serves as a template for adding new elements. This way, it is filled from
top to bottom by the callers with the information they have (eg: line
numbers, ACL name, ...) and the lower layers just have to duplicate it and
add an element when they face an argument they cannot resolve yet.
Then at the end of the configuration parsing, a loop passes over each
proxy's list and resolves all the args in sequence. And this way there is
all necessary information to report verbose errors.
The first immediate benefit is that for the first time we got very precise
location of issues (arg number in a keyword in its context, ...). Second,
in order to do this we had to parse log-format and unique-id-format a bit
earlier, so that was a great opportunity for doing so when the directives
are encountered (unless it's a default section). This way, the recorded
line numbers for these args are the ones of the place where the log format
is declared, not the end of the file.
Userlists report slightly more information now. They're the only remaining
ones in the ACL resolving function.
2013-04-02 10:34:32 -04:00
|
|
|
|
2012-06-01 04:38:29 -04:00
|
|
|
/* fall through */
|
2012-04-19 09:24:50 -04:00
|
|
|
case ARGT_STR:
|
|
|
|
|
/* all types that must be resolved are stored as strings
|
|
|
|
|
* during the parsing. The caller must at one point resolve
|
|
|
|
|
* them and free the string.
|
|
|
|
|
*/
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
arg->data.str.area = my_strndup(trash.area, trash.data);
|
|
|
|
|
arg->data.str.data = trash.data;
|
|
|
|
|
arg->data.str.size = trash.data + 1;
|
2012-04-19 09:24:50 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ARGT_IPV4:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty address
|
2012-04-27 10:32:26 -04:00
|
|
|
goto empty_err;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (inet_pton(AF_INET, trash.area, &arg->data.ipv4) <= 0)
|
2012-04-19 09:24:50 -04:00
|
|
|
goto parse_err;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ARGT_MSK4:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty mask
|
2012-04-27 10:32:26 -04:00
|
|
|
goto empty_err;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!str2mask(trash.area, &arg->data.ipv4))
|
2012-04-19 09:24:50 -04:00
|
|
|
goto parse_err;
|
|
|
|
|
|
|
|
|
|
arg->type = ARGT_IPV4;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ARGT_IPV6:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty address
|
2012-04-27 10:32:26 -04:00
|
|
|
goto empty_err;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (inet_pton(AF_INET6, trash.area, &arg->data.ipv6) <= 0)
|
2012-04-19 09:24:50 -04:00
|
|
|
goto parse_err;
|
|
|
|
|
break;
|
|
|
|
|
|
2018-01-25 10:24:50 -05:00
|
|
|
case ARGT_MSK6:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty mask
|
2018-01-25 10:24:50 -05:00
|
|
|
goto empty_err;
|
|
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!str2mask6(trash.area, &arg->data.ipv6))
|
2018-01-25 10:24:50 -05:00
|
|
|
goto parse_err;
|
|
|
|
|
|
|
|
|
|
arg->type = ARGT_IPV6;
|
|
|
|
|
break;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
|
|
|
|
case ARGT_TIME:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty time
|
2012-04-27 10:32:26 -04:00
|
|
|
goto empty_err;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
ptr_err = parse_time_err(trash.area, &uint, TIME_UNIT_MS);
|
2019-06-07 13:00:37 -04:00
|
|
|
if (ptr_err) {
|
|
|
|
|
if (ptr_err == PARSE_TIME_OVER || ptr_err == PARSE_TIME_UNDER)
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
ptr_err = trash.area;
|
2012-04-19 09:24:50 -04:00
|
|
|
goto parse_err;
|
2019-06-07 13:00:37 -04:00
|
|
|
}
|
2015-07-20 11:45:02 -04:00
|
|
|
arg->data.sint = uint;
|
|
|
|
|
arg->type = ARGT_SINT;
|
2012-04-19 09:24:50 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ARGT_SIZE:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data) // empty size
|
2012-04-27 10:32:26 -04:00
|
|
|
goto empty_err;
|
2012-04-19 09:24:50 -04:00
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
ptr_err = parse_size_err(trash.area, &uint);
|
2012-04-19 09:24:50 -04:00
|
|
|
if (ptr_err)
|
|
|
|
|
goto parse_err;
|
|
|
|
|
|
2015-07-20 11:45:02 -04:00
|
|
|
arg->data.sint = uint;
|
|
|
|
|
arg->type = ARGT_SINT;
|
2012-04-19 09:24:50 -04:00
|
|
|
break;
|
|
|
|
|
|
2019-02-25 09:20:35 -05:00
|
|
|
case ARGT_PBUF_FNUM:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!trash.data)
|
2019-03-04 13:03:48 -05:00
|
|
|
goto empty_err;
|
|
|
|
|
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
if (!parse_dotted_uints(trash.area, &arg->data.fid.ids, &arg->data.fid.sz))
|
2019-02-25 09:20:35 -05:00
|
|
|
goto parse_err;
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
2012-04-19 09:24:50 -04:00
|
|
|
/* FIXME: other types need to be implemented here */
|
|
|
|
|
default:
|
2012-04-27 10:32:26 -04:00
|
|
|
goto not_impl;
|
2012-04-19 09:24:50 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pos++;
|
|
|
|
|
arg++;
|
|
|
|
|
|
|
|
|
|
/* don't go back to parsing if we reached end */
|
2020-02-14 02:40:37 -05:00
|
|
|
if (!len || !*in || *in == ')' || pos >= nbarg)
|
2012-04-19 09:24:50 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* skip comma */
|
|
|
|
|
in++; len--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
end_parse:
|
|
|
|
|
if (pos < min_arg) {
|
|
|
|
|
/* not enough arguments */
|
2012-09-20 13:43:14 -04:00
|
|
|
memprintf(err_msg,
|
2013-12-12 18:38:47 -05:00
|
|
|
"missing arguments (got %d/%d), type '%s' expected",
|
2015-01-19 12:44:07 -05:00
|
|
|
pos, min_arg, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK]);
|
2012-04-19 09:24:50 -04:00
|
|
|
goto err;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-14 02:40:37 -05:00
|
|
|
if (empty) {
|
|
|
|
|
/* nothing to do */
|
|
|
|
|
} else if (*in == ')') {
|
|
|
|
|
/* skip the expected closing parenthesis */
|
|
|
|
|
in++;
|
|
|
|
|
} else {
|
2012-09-20 13:43:14 -04:00
|
|
|
/* the caller is responsible for freeing this message */
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
char *word = (len > 0) ? my_strndup(in, len) : (char *)in;
|
2021-05-06 08:50:30 -04:00
|
|
|
|
|
|
|
|
if (*word)
|
|
|
|
|
memprintf(err_msg, "expected ')' before '%s'", word);
|
|
|
|
|
else
|
|
|
|
|
memprintf(err_msg, "expected ')'");
|
|
|
|
|
|
2020-02-14 02:40:37 -05:00
|
|
|
if (len > 0)
|
|
|
|
|
free(word);
|
2020-07-05 07:36:08 -04:00
|
|
|
/* when we're missing a right paren, the empty part preceding
|
2020-02-14 02:40:37 -05:00
|
|
|
* already created an empty arg, adding one to the position, so
|
|
|
|
|
* let's fix the reporting to avoid being confusing.
|
|
|
|
|
*/
|
|
|
|
|
if (pos > 1)
|
|
|
|
|
pos--;
|
2012-04-19 09:24:50 -04:00
|
|
|
goto err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* note that pos might be < nbarg and this is not an error, it's up to the
|
|
|
|
|
* caller to decide what to do with optional args.
|
|
|
|
|
*/
|
|
|
|
|
if (err_arg)
|
|
|
|
|
*err_arg = pos;
|
2020-02-14 02:40:37 -05:00
|
|
|
if (end_ptr)
|
|
|
|
|
*end_ptr = in;
|
2012-04-19 09:24:50 -04:00
|
|
|
return pos;
|
|
|
|
|
|
|
|
|
|
err:
|
2017-04-12 16:32:04 -04:00
|
|
|
if (new_al == al) {
|
|
|
|
|
/* only free the arg area if we have not queued unresolved args
|
|
|
|
|
* still pointing to it.
|
|
|
|
|
*/
|
|
|
|
|
free(*argp);
|
|
|
|
|
}
|
2013-12-06 09:30:05 -05:00
|
|
|
*argp = NULL;
|
2012-04-19 09:24:50 -04:00
|
|
|
if (err_arg)
|
|
|
|
|
*err_arg = pos;
|
2020-02-14 02:40:37 -05:00
|
|
|
if (end_ptr)
|
|
|
|
|
*end_ptr = in;
|
2012-04-19 09:24:50 -04:00
|
|
|
return -1;
|
2012-04-27 10:32:26 -04:00
|
|
|
|
|
|
|
|
empty_err:
|
BUG/MEDIUM: arg: empty args list must be dropped
Before commit 80b53ffb1 ("MEDIUM: arg: make make_arg_list() stop after
its own arguments"), consumers of arguments would measure the length of
the string between the first opening and closing parenthesis before
calling make_arg_list(), and this latter one would detect an empty string
early by len==0 and would not allocate an argument list.
Since that commit, this has a changed a bit because the argument parser
is now the one in charge for delimiting the argument string, so the early
test cannot be used anymore. But the argument list is still allocated,
and despite the number of arguments being returned, consumers do not
necessarily rely on it but instead they rely on the non-null arg_p
pointer that used to be allocated only if at least one argument was
present. But as it's now always allocated, the first argument always
carries the first argument's type with an empty value, which confuses
all functions that take a unique optional argument (such as uuid()).
The proper long term solution would be to always use the returned argument
count, but at least we can make sure the function always returns an empty
argument list when fed with an empty set of parenthesis, as it always used
to do. This is what this patch does.
This fix must be backported to 2.2 and fixes github issue #763. Thanks to
Luke Seelenbinder for reporting the problem.
2020-07-21 09:44:38 -04:00
|
|
|
/* If we've only got an empty set of parenthesis with nothing
|
|
|
|
|
* in between, there is no arg at all.
|
|
|
|
|
*/
|
|
|
|
|
if (!pos) {
|
2021-02-20 04:46:51 -05:00
|
|
|
ha_free(argp);
|
BUG/MEDIUM: arg: empty args list must be dropped
Before commit 80b53ffb1 ("MEDIUM: arg: make make_arg_list() stop after
its own arguments"), consumers of arguments would measure the length of
the string between the first opening and closing parenthesis before
calling make_arg_list(), and this latter one would detect an empty string
early by len==0 and would not allocate an argument list.
Since that commit, this has a changed a bit because the argument parser
is now the one in charge for delimiting the argument string, so the early
test cannot be used anymore. But the argument list is still allocated,
and despite the number of arguments being returned, consumers do not
necessarily rely on it but instead they rely on the non-null arg_p
pointer that used to be allocated only if at least one argument was
present. But as it's now always allocated, the first argument always
carries the first argument's type with an empty value, which confuses
all functions that take a unique optional argument (such as uuid()).
The proper long term solution would be to always use the returned argument
count, but at least we can make sure the function always returns an empty
argument list when fed with an empty set of parenthesis, as it always used
to do. This is what this patch does.
This fix must be backported to 2.2 and fixes github issue #763. Thanks to
Luke Seelenbinder for reporting the problem.
2020-07-21 09:44:38 -04:00
|
|
|
}
|
|
|
|
|
|
2020-02-28 10:41:29 -05:00
|
|
|
if (pos >= min_arg)
|
|
|
|
|
goto end_parse;
|
|
|
|
|
|
2012-09-20 13:43:14 -04:00
|
|
|
memprintf(err_msg, "expected type '%s' at position %d, but got nothing",
|
2015-01-19 12:44:07 -05:00
|
|
|
arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
|
2012-04-27 10:32:26 -04:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
|
|
parse_err:
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
/* come here with the word attempted to parse in trash */
|
2012-09-20 13:43:14 -04:00
|
|
|
memprintf(err_msg, "failed to parse '%s' as type '%s' at position %d",
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
|
2012-04-27 10:32:26 -04:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
|
|
not_impl:
|
2012-09-20 13:43:14 -04:00
|
|
|
memprintf(err_msg, "parsing for type '%s' was not implemented, please report this bug",
|
2015-01-19 12:44:07 -05:00
|
|
|
arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK]);
|
2012-04-27 10:32:26 -04:00
|
|
|
goto err;
|
MEDIUM: arg: copy parsed arguments into the trash instead of allocating them
For each and every argument parsed by make_arg_list(), there was an
strndup() call, just so that we have a trailing zero for most functions,
and this temporary buffer is released afterwards except for strings where
it is kept.
Proceeding like this is not convenient because 1) it performs a huge
malloc/free dance, and 2) it forces to decide upfront where the argument
ends, which is what prevents commas and right parenthesis from being used.
This patch makes the function copy the temporary argument into the trash
instead, so that we avoid the malloc/free dance for most all non-string
args (e.g. integers, addresses, time, size etc), and that we can later
produce the contents on the fly while parsing the input. It adds a length
check to make sure that the argument is not longer than the buffer size,
which should obviously never be the case but who knows what people put in
their configuration.
2020-02-14 05:34:35 -05:00
|
|
|
|
|
|
|
|
buffer_err:
|
|
|
|
|
memprintf(err_msg, "too small buffer size to store decoded argument %d, increase bufsize ?",
|
|
|
|
|
pos + 1);
|
|
|
|
|
goto err;
|
MEDIUM: arg: make make_arg_list() support quotes in arguments
Now it becomes possible to reuse the quotes within arguments, allowing
the parser to distinguish a ',' or ')' that is part of the value from
one which delimits the argument. In addition, ',' and ')' may be escaped
using a backslash. However, it is also important to keep in mind that
just like in shell, quotes are first resolved by the word tokenizer, so
in order to pass quotes that are visible to the argument parser, a second
level is needed, either using backslash escaping, or by using an alternate
type.
For example, it's possible to write this to append a comma:
http-request add-header paren-comma-paren "%[str('(--,--)')]"
or this:
http-request add-header paren-comma-paren '%[str("(--,--)")]'
or this:
http-request add-header paren-comma-paren %[str(\'(--,--)\')]
or this:
http-request add-header paren-comma-paren %[str(\"(--,--)\")]
or this:
http-request add-header paren-comma-paren %[str(\"(\"--\',\'--\")\")]
Note that due to the wide use of '\' in front of parenthesis in regex,
the backslash character will purposely *not* escape parenthesis, so that
'\)' placed in quotes is passed verbatim to a regex engine.
2020-02-14 07:37:20 -05:00
|
|
|
|
|
|
|
|
unquote_err:
|
|
|
|
|
/* come here with the parsed part in <trash.area>:<trash.data> and the
|
|
|
|
|
* unparsable part in <in>.
|
|
|
|
|
*/
|
|
|
|
|
trash.area[trash.data] = 0;
|
|
|
|
|
memprintf(err_msg, "failed to parse '%s' after '%s' as type '%s' at position %d",
|
|
|
|
|
in, trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
|
|
|
|
|
goto err;
|
|
|
|
|
|
2021-05-19 06:00:54 -04:00
|
|
|
alloc_err:
|
|
|
|
|
memprintf(err_msg, "out of memory");
|
|
|
|
|
goto err;
|
2012-04-19 09:24:50 -04:00
|
|
|
}
|