haproxy/src/h2.c

815 lines
27 KiB
C
Raw Normal View History

/*
* HTTP/2 protocol processing
*
* Copyright 2017 Willy Tarreau <w@1wt.eu>
* Copyright (C) 2017 HAProxy Technologies
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <inttypes.h>
#include <haproxy/api.h>
#include <haproxy/global.h>
#include <haproxy/h2.h>
#include <haproxy/http-hdr-t.h>
#include <haproxy/http.h>
#include <haproxy/http_htx.h>
#include <haproxy/htx.h>
#include <import/ist.h>
struct h2_frame_definition h2_frame_definition[H2_FT_ENTRIES] = {
[H2_FT_DATA ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
[H2_FT_HEADERS ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 1, .max_len = H2_MAX_FRAME_LEN, },
[H2_FT_PRIORITY ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 5, .max_len = 5, },
[H2_FT_RST_STREAM ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = 4, },
[H2_FT_SETTINGS ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
[H2_FT_PUSH_PROMISE ] = { .dir = 0, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = H2_MAX_FRAME_LEN, },
[H2_FT_PING ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 8, .max_len = 8, },
[H2_FT_GOAWAY ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 8, .max_len = H2_MAX_FRAME_LEN, },
[H2_FT_WINDOW_UPDATE] = { .dir = 3, .min_id = 0, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = 4, },
[H2_FT_CONTINUATION ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
};
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
/* Looks into <ist> for forbidden characters for header values (0x00, 0x0A,
* 0x0D), starting at pointer <start> which must be within <ist>. Returns
* non-zero if such a character is found, 0 otherwise. When run on unlikely
* header match, it's recommended to first check for the presence of control
* chars using ist_find_ctl().
*/
static int has_forbidden_char(const struct ist ist, const char *start)
{
do {
if ((uint8_t)*start <= 0x0d &&
(1U << (uint8_t)*start) & ((1<<13) | (1<<10) | (1<<0)))
return 1;
start++;
} while (start < istend(ist));
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
return 0;
}
/* Prepare the request line into <htx> from pseudo headers stored in <phdr[]>.
* <fields> indicates what was found so far. This should be called once at the
* detection of the first general header field or at the end of the request if
* no general header field was found yet. Returns the created start line on
* success, or NULL on failure. Upon success, <msgf> is updated with a few
* H2_MSGF_* flags indicating what was found while parsing.
*
* The rules below deserve a bit of explanation. There tends to be some
* confusion regarding H2's authority vs the Host header. They are different
* though may sometimes be exchanged. In H2, the request line is broken into :
* - :method
* - :scheme
* - :authority
* - :path
*
* An equivalent HTTP/1.x absolute-form request would then look like :
* <:method> <:scheme>://<:authority><:path> HTTP/x.y
*
* Except for CONNECT which doesn't have scheme nor path and looks like :
* <:method> <:authority> HTTP/x.y
*
* It's worth noting that H2 still supports an encoding to map H1 origin-form
* and asterisk-form requests. These ones do not specify the authority. However
* in H2 they must still specify the scheme, which is not present in H1. Also,
* when encoding an absolute-form H1 request without a path, the path
* automatically becomes "/" except for the OPTIONS method where it
* becomes "*".
*
* As such it is explicitly permitted for an H2 client to send a request
* featuring a Host header and no :authority, though it's not the recommended
* way to use H2 for a client. It is however the only permitted way to encode
* an origin-form H1 request over H2. Thus we need to respect such differences
* as much as possible when re-encoding the H2 request into HTX.
*/
static struct htx_sl *h2_prepare_htx_reqline(uint32_t fields, struct ist *phdr, struct htx *htx, unsigned int *msgf)
{
struct ist uri, meth_sl;
unsigned int flags = HTX_SL_F_NONE;
struct htx_sl *sl;
size_t i;
if ((fields & H2_PHDR_FND_METH) && isteq(phdr[H2_PHDR_IDX_METH], ist("CONNECT"))) {
if (fields & H2_PHDR_FND_PROT) {
/* rfc 8441 Extended Connect Protocol
* #4 :scheme and :path must be present, as well as
* :authority like all h2 requests
*/
if (!(fields & H2_PHDR_FND_SCHM)) {
/* missing scheme */
goto fail;
}
else if (!(fields & H2_PHDR_FND_PATH)) {
/* missing path */
goto fail;
}
else if (!(fields & H2_PHDR_FND_AUTH)) {
/* missing authority */
goto fail;
}
flags |= HTX_SL_F_HAS_SCHM;
if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("http")))
flags |= HTX_SL_F_SCHM_HTTP;
else if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("https")))
flags |= HTX_SL_F_SCHM_HTTPS;
BUG/MAJOR: h2: verify early that non-http/https schemes match the valid syntax While we do explicitly check for strict character sets in the scheme, this is only done when extracting URL components from an assembled one, and we have special handling for "http" and "https" schemes directly in the H2-to-HTX conversion. Sadly, this lets all other ones pass through if they start exactly with "http://" or "https://", allowing the reconstructed URI to start with a different looking authority if it was part of the scheme. It's interesting to note that in this case the valid authority is in the Host header and that the request will only be wrong if emitted over H2 on the backend side, since H1 will not emit an absolute URI by default and will drop the scheme. So in essence, this is a variant of the scheme-based attack described below in that it only affects H2-H2 and not H2-H1 forwarding: https://portswigger.net/research/http2 As such, a simple workaround consists in just inserting the following rule before other ones in the frontend, which will have for effect to renormalize the authority in the request line according to the concatenated version (making haproxy see the same authority and host as what the target server will see): http-request set-uri %[url] This patch simply adds the missing syntax checks for non-http/https schemes before the concatenation in the H2 code. An improvement may consist in the future in splitting these ones apart in the start line so that only the "url" sample fetch function requires to access them together and that all other places continue to access them separately. This will then allow the core code to perform such checks itself. The patch needs to be backported as far as 2.2. Before 2.2 the full URI was not being reconstructed so the scheme and authority part were always dropped from H2 requests to leave only origin requests. Note for backporters: this depends on this previous patch: MINOR: http: add a new function http_validate_scheme() to validate a scheme Many thanks to Tim Düsterhus for figuring that one and providing a reproducer.
2021-08-10 09:37:34 -04:00
else if (!http_validate_scheme(phdr[H2_PHDR_IDX_SCHM]))
htx->flags |= HTX_FL_PARSING_ERROR;
meth_sl = ist("GET");
*msgf |= H2_MSGF_EXT_CONNECT;
/* no ES on the HEADERS frame but no body either for
* Extended CONNECT */
*msgf &= ~H2_MSGF_BODY;
}
else {
/* RFC 7540 #8.2.6 regarding CONNECT: ":scheme" and ":path"
* MUST be omitted ; ":authority" contains the host and port
* to connect to.
*/
if (fields & H2_PHDR_FND_SCHM) {
/* scheme not allowed */
goto fail;
}
else if (fields & H2_PHDR_FND_PATH) {
/* path not allowed */
goto fail;
}
else if (!(fields & H2_PHDR_FND_AUTH)) {
/* missing authority */
goto fail;
}
meth_sl = phdr[H2_PHDR_IDX_METH];
}
*msgf |= H2_MSGF_BODY_TUNNEL;
}
else if ((fields & (H2_PHDR_FND_METH|H2_PHDR_FND_SCHM|H2_PHDR_FND_PATH)) !=
(H2_PHDR_FND_METH|H2_PHDR_FND_SCHM|H2_PHDR_FND_PATH)) {
/* RFC 7540 #8.1.2.3 : all requests MUST include exactly one
* valid value for the ":method", ":scheme" and ":path" phdr
* unless it is a CONNECT request.
*/
if (!(fields & H2_PHDR_FND_METH)) {
/* missing method */
goto fail;
}
else if (!(fields & H2_PHDR_FND_SCHM)) {
/* missing scheme */
goto fail;
}
else {
/* missing path */
goto fail;
}
}
else { /* regular methods */
/* RFC3986#6.2.2.1: scheme is case-insensitive. We need to
* classify the scheme as "present/http", "present/https",
* "present/other", "absent" so as to decide whether or not
* we're facing a normalized URI that will have to be encoded
* in origin or absolute form. Indeed, 7540#8.1.2.3 says that
* clients should use the absolute form, thus we cannot infer
* whether or not the client wanted to use a proxy here.
*/
flags |= HTX_SL_F_HAS_SCHM;
if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("http")))
flags |= HTX_SL_F_SCHM_HTTP;
else if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("https")))
flags |= HTX_SL_F_SCHM_HTTPS;
BUG/MAJOR: h2: verify early that non-http/https schemes match the valid syntax While we do explicitly check for strict character sets in the scheme, this is only done when extracting URL components from an assembled one, and we have special handling for "http" and "https" schemes directly in the H2-to-HTX conversion. Sadly, this lets all other ones pass through if they start exactly with "http://" or "https://", allowing the reconstructed URI to start with a different looking authority if it was part of the scheme. It's interesting to note that in this case the valid authority is in the Host header and that the request will only be wrong if emitted over H2 on the backend side, since H1 will not emit an absolute URI by default and will drop the scheme. So in essence, this is a variant of the scheme-based attack described below in that it only affects H2-H2 and not H2-H1 forwarding: https://portswigger.net/research/http2 As such, a simple workaround consists in just inserting the following rule before other ones in the frontend, which will have for effect to renormalize the authority in the request line according to the concatenated version (making haproxy see the same authority and host as what the target server will see): http-request set-uri %[url] This patch simply adds the missing syntax checks for non-http/https schemes before the concatenation in the H2 code. An improvement may consist in the future in splitting these ones apart in the start line so that only the "url" sample fetch function requires to access them together and that all other places continue to access them separately. This will then allow the core code to perform such checks itself. The patch needs to be backported as far as 2.2. Before 2.2 the full URI was not being reconstructed so the scheme and authority part were always dropped from H2 requests to leave only origin requests. Note for backporters: this depends on this previous patch: MINOR: http: add a new function http_validate_scheme() to validate a scheme Many thanks to Tim Düsterhus for figuring that one and providing a reproducer.
2021-08-10 09:37:34 -04:00
else if (!http_validate_scheme(phdr[H2_PHDR_IDX_SCHM]))
htx->flags |= HTX_FL_PARSING_ERROR;
meth_sl = phdr[H2_PHDR_IDX_METH];
}
if (fields & H2_PHDR_FND_PATH) {
/* 7540#8.1.2.3: :path must not be empty, and must be either
* '*' or an RFC3986 "path-absolute" starting with a "/" but
* not with "//".
* However, this "path-absolute" was a mistake which was
* later fixed in http2bis as "absolute-path" to match
* HTTP/1, thus also allowing "//".
*/
if (unlikely(!phdr[H2_PHDR_IDX_PATH].len))
goto fail;
else if (unlikely(phdr[H2_PHDR_IDX_PATH].ptr[0] != '/')) {
if (!isteq(phdr[H2_PHDR_IDX_PATH], ist("*")))
goto fail;
}
}
if (!(flags & HTX_SL_F_HAS_SCHM)) {
/* no scheme, use authority only (CONNECT) */
uri = phdr[H2_PHDR_IDX_AUTH];
flags |= HTX_SL_F_HAS_AUTHORITY;
}
MEDIUM: h2: use the normalized URI encoding for absolute form requests H2 strongly recommends that clients exclusively use the absolute form for requests, which contains a scheme, an authority and a path, instead of the old format involving the Host header and a path. Thus there is no way to distinguish between a request intended for a proxy and an origin request, and as such proxied requests are lost. This patch makes sure to keep the encoding of all absolute form requests so that the URI is kept end-to-end. If the scheme is http or https, there is an uncertainty so the request is tagged as a normalized URI so that the other end (H1) can decide to emit it in origin form as this is by far the most commonly expected one, and it's certain that quite a number of H1 setups are not ready to cope with absolute URIs. There is a direct visible impact of this change, which is that the uri sample fetch will now return absolute URIs (as they really come on the wire) whenever these are used. It also means that default http logs will report absolute URIs. If a situation is once met where a client uses H2 to join an H1 proxy with haproxy in the middle, then it will be trivial to add an option to ask the H1 output to use absolute encoding for such requests. Later we may be able to consider that the normalized URI is the default output format and stop sending them in origin form unless an option is set. Now chaining multiple instances keeps the semantics as far as possible along the whole chain : 1) H1 to H1 H1:"GET /" --> H1:"GET /" # log: / H1:"GET http://" --> H1:"GET http://" # log: http:// H1:"GET ftp://" --> H1:"GET ftp://" # log: ftp:// 2) H2 to H1 H2:"GET /" --> H1:"GET /" # log: / H2:"GET http://" --> H1:"GET /" # log: http:// H2:"GET ftp://" --> H1:"GET ftp://" # log: ftp:// 3) H1 to H2 to H2 to H1 H1:"GET /" --> H2:"GET /" --> H2:"GET /" --> H1:"GET /" H1:"GET http://" --> H2:"GET http://" --> H2:"GET http://" --> H1:"GET /" H1:"GET ftp://" --> H2:"GET ftp://" --> H2:"GET ftp://" --> H1:"GET ftp://" Thus there is zero loss on H1->H1, H1->H2 nor H2->H2, and H2->H1 is normalized in origin format if ambiguous.
2019-10-08 12:33:19 -04:00
else if (fields & H2_PHDR_FND_AUTH) {
/* authority is present, let's use the absolute form. We simply
* use the trash to concatenate them since all of them MUST fit
* in a bufsize since it's where they come from.
*/
uri = ist2bin(trash.area, phdr[H2_PHDR_IDX_SCHM]);
istcat(&uri, ist("://"), trash.size);
istcat(&uri, phdr[H2_PHDR_IDX_AUTH], trash.size);
if (!isteq(phdr[H2_PHDR_IDX_PATH], ist("*")))
istcat(&uri, phdr[H2_PHDR_IDX_PATH], trash.size);
flags |= HTX_SL_F_HAS_AUTHORITY;
MEDIUM: h2: use the normalized URI encoding for absolute form requests H2 strongly recommends that clients exclusively use the absolute form for requests, which contains a scheme, an authority and a path, instead of the old format involving the Host header and a path. Thus there is no way to distinguish between a request intended for a proxy and an origin request, and as such proxied requests are lost. This patch makes sure to keep the encoding of all absolute form requests so that the URI is kept end-to-end. If the scheme is http or https, there is an uncertainty so the request is tagged as a normalized URI so that the other end (H1) can decide to emit it in origin form as this is by far the most commonly expected one, and it's certain that quite a number of H1 setups are not ready to cope with absolute URIs. There is a direct visible impact of this change, which is that the uri sample fetch will now return absolute URIs (as they really come on the wire) whenever these are used. It also means that default http logs will report absolute URIs. If a situation is once met where a client uses H2 to join an H1 proxy with haproxy in the middle, then it will be trivial to add an option to ask the H1 output to use absolute encoding for such requests. Later we may be able to consider that the normalized URI is the default output format and stop sending them in origin form unless an option is set. Now chaining multiple instances keeps the semantics as far as possible along the whole chain : 1) H1 to H1 H1:"GET /" --> H1:"GET /" # log: / H1:"GET http://" --> H1:"GET http://" # log: http:// H1:"GET ftp://" --> H1:"GET ftp://" # log: ftp:// 2) H2 to H1 H2:"GET /" --> H1:"GET /" # log: / H2:"GET http://" --> H1:"GET /" # log: http:// H2:"GET ftp://" --> H1:"GET ftp://" # log: ftp:// 3) H1 to H2 to H2 to H1 H1:"GET /" --> H2:"GET /" --> H2:"GET /" --> H1:"GET /" H1:"GET http://" --> H2:"GET http://" --> H2:"GET http://" --> H1:"GET /" H1:"GET ftp://" --> H2:"GET ftp://" --> H2:"GET ftp://" --> H1:"GET ftp://" Thus there is zero loss on H1->H1, H1->H2 nor H2->H2, and H2->H1 is normalized in origin format if ambiguous.
2019-10-08 12:33:19 -04:00
if (flags & (HTX_SL_F_SCHM_HTTP|HTX_SL_F_SCHM_HTTPS)) {
/* we don't know if it was originally an absolute or a
* relative request because newer versions of HTTP use
* the absolute URI format by default, which we call
* the normalized URI format internally. This is the
* strongly recommended way of sending a request for
* a regular client, so we cannot distinguish this
* from a request intended for a proxy. For other
* schemes however there is no doubt.
*/
flags |= HTX_SL_F_NORMALIZED_URI;
}
}
else {
/* usual schemes with or without authority, use origin form */
uri = phdr[H2_PHDR_IDX_PATH];
if (fields & H2_PHDR_FND_AUTH)
flags |= HTX_SL_F_HAS_AUTHORITY;
}
BUG/MAJOR: h2: enforce stricter syntax checks on the :method pseudo-header Before HTX was introduced, all the HTTP request elements passed in pseudo-headers fields were used to build an HTTP/1 request whose syntax was then scrutinized by the HTTP/1 parser, leaving no room to inject invalid characters. While NUL, CR and LF are properly blocked, it is possible to inject spaces in the method so that once translated to HTTP/1, fields are shifted by one spcae, and a lenient HTTP/1 server could possibly be fooled into using a part of the method as the URI. For example, the following request: H2 request :method: "GET /admin? HTTP/1.1" :path: "/static/images" would become: GET /admin? HTTP/1.1 /static/images HTTP/1.1 It's important to note that the resulting request is *not* valid, and that in order for this to be a problem, it requires that this request is delivered to an already vulnerable HTTP/1 server. A workaround here is to reject malformed methods by placing this rule in the frontend or backend, at least before leaving haproxy in H1: http-request reject if { method -m reg [^A-Z0-9] } Alternately H2 may be globally disabled by commenting out the "alpn" directive on "bind" lines, and by rejecting H2 streams creation by adding the following statement to the global section: tune.h2.max-concurrent-streams 0 This patch adds a check for each character of the method to make sure they belong to the ones permitted in a token, as mentioned in RFC7231#4.1. This should be backported to versions 2.0 and above. For older versions not having HTX_FL_PARSING_ERROR, a "goto fail" works as well as it results in a protocol error at the stream level. Non-HTX versions are safe because the resulting invalid request will be rejected by the internal HTTP/1 parser. Thanks to Tim Düsterhus for reporting that one.
2021-08-11 05:12:46 -04:00
/* The method is a non-empty token (RFC7231#4.1) */
if (!meth_sl.len)
goto fail;
for (i = 0; i < meth_sl.len; i++) {
if (!HTTP_IS_TOKEN(meth_sl.ptr[i]))
htx->flags |= HTX_FL_PARSING_ERROR;
}
/* make sure the final URI isn't empty. Note that 7540#8.1.2.3 states
* that :path must not be empty.
*/
if (!uri.len)
goto fail;
/* The final URI must not contain LWS nor CTL characters */
for (i = 0; i < uri.len; i++) {
unsigned char c = uri.ptr[i];
if (HTTP_IS_LWS(c) || HTTP_IS_CTL(c))
htx->flags |= HTX_FL_PARSING_ERROR;
}
/* Set HTX start-line flags */
flags |= HTX_SL_F_VER_11; // V2 in fact
flags |= HTX_SL_F_XFER_LEN; // xfer len always known with H2
sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth_sl, uri, ist("HTTP/2.0"));
if (!sl)
goto fail;
sl->info.req.meth = find_http_meth(meth_sl.ptr, meth_sl.len);
if (sl->info.req.meth == HTTP_METH_HEAD)
*msgf |= H2_MSGF_BODYLESS_RSP;
return sl;
fail:
return NULL;
}
/* Takes an H2 request present in the headers list <list> terminated by a name
* being <NULL,0> and emits the equivalent HTX request according to the rules
* documented in RFC7540 #8.1.2. The output contents are emitted in <htx>, and
* non-zero is returned if some bytes were emitted. In case of error, a
* negative error code is returned.
*
* Upon success, <msgf> is filled with a few H2_MSGF_* flags indicating what
* was found while parsing. The caller must set it to zero in or H2_MSGF_BODY
* if a body is detected (!ES).
*
* The headers list <list> must be composed of :
* - n.name != NULL, n.len > 0 : literal header name
* - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
* among H2_PHDR_IDX_*
* - n.name ignored, n.len == 0 : end of list
* - in all cases except the end of list, v.name and v.len must designate a
* valid value.
*
* The Cookie header will be reassembled at the end, and for this, the <list>
* will be used to create a linked list, so its contents may be destroyed.
*/
int h2_make_htx_request(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len)
{
struct ist phdr_val[H2_PHDR_NUM_ENTRIES];
uint32_t fields; /* bit mask of H2_PHDR_FND_* */
uint32_t idx;
int ck, lck; /* cookie index and last cookie index */
int phdr;
int ret;
int i;
struct htx_sl *sl = NULL;
unsigned int sl_flags = 0;
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
const char *ctl;
lck = ck = -1; // no cookie for now
fields = 0;
for (idx = 0; list[idx].n.len != 0; idx++) {
if (!isttest(list[idx].n)) {
/* this is an indexed pseudo-header */
phdr = list[idx].n.len;
}
else {
/* this can be any type of header */
/* RFC7540#8.1.2: upper case not allowed in header field names.
* #10.3: header names must be valid (i.e. match a token).
* For pseudo-headers we check from 2nd char and for other ones
* from the first char, because HTTP_IS_TOKEN() also excludes
* the colon.
*/
phdr = h2_str_to_phdr(list[idx].n);
for (i = !!phdr; i < list[idx].n.len; i++)
if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
goto fail;
}
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
/* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
* rejecting NUL, CR and LF characters.
*/
ctl = ist_find_ctl(list[idx].v);
if (unlikely(ctl) && has_forbidden_char(list[idx].v, ctl))
goto fail;
if (phdr > 0 && phdr < H2_PHDR_NUM_ENTRIES) {
/* insert a pseudo header by its index (in phdr) and value (in value) */
if (fields & ((1 << phdr) | H2_PHDR_FND_NONE)) {
if (fields & H2_PHDR_FND_NONE) {
/* pseudo header field after regular headers */
goto fail;
}
else {
/* repeated pseudo header field */
goto fail;
}
}
fields |= 1 << phdr;
phdr_val[phdr] = list[idx].v;
continue;
}
else if (phdr != 0) {
/* invalid pseudo header -- should never happen here */
goto fail;
}
/* regular header field in (name,value) */
if (unlikely(!(fields & H2_PHDR_FND_NONE))) {
/* no more pseudo-headers, time to build the request line */
sl = h2_prepare_htx_reqline(fields, phdr_val, htx, msgf);
if (!sl)
goto fail;
fields |= H2_PHDR_FND_NONE;
BUG/MEDIUM: h2: give :authority precedence over Host The wording regarding Host vs :authority in RFC7540 is ambiguous as it says that an intermediary must produce a host header from :authority if Host is missing, but, contrary to HTTP/1.1, doesn't say anything regarding the possibility that Host and :authority differ, which leaves Host with higher precedence there. In addition it mentions that clients should use :authority *instead* of Host, and that H1->H2 should use :authority only if the original request was in authority form. This leaves some gray area in the middle of the chain for fully valid H2 requests arboring a Host header that are forwarded to the other side where it's possible to drop the Host header and use the authority only after forwarding to a second H2 layer, thus possibly seeing two different values of Host at a different stage. There's no such issue when forwarding from H2 to H1 as the authority is dropped only only the Host is kept. Note that the following request is sufficient to re-normalize such a request: http-request set-header host %[req.hdr(host)] The new spec in progress (draft-ietf-httpbis-http2bis-03) addresses this trouble by being a bit is stricter on these rules. It clarifies that :authority must always be used instead of Host and that Host ought to be ignored. This is much saner as it avoids to convey two distinct values along the chain. This becomes the protocol-level equivalent of: http-request set-uri %[url] So this patch does exactly this, which we were initially a bit reluctant to do initially by lack of visibility about other implementations' expectations. In addition it slightly simplifies the Host header field creation by always placing it first in the list of headers instead of last; this could also speed up the look up a little bit. This needs to be backported to 2.0. Non-HTX versions are safe regarding this because they drop the URI during the conversion to HTTP/1.1 so only Host is used and transmitted. Thanks to Tim Düsterhus for reporting that one.
2021-08-11 09:39:13 -04:00
/* http2bis draft recommends to drop Host in favor of :authority when
* the latter is present. This is required to make sure there is no
* discrepancy between the authority and the host header, especially
* since routing rules usually involve Host. Here we already know if
* :authority was found so we can emit it right now and mark the host
* as filled so that it's skipped later.
*/
if (fields & H2_PHDR_FND_AUTH) {
if (!htx_add_header(htx, ist("host"), phdr_val[H2_PHDR_IDX_AUTH]))
goto fail;
fields |= H2_PHDR_FND_HOST;
}
}
BUG/MEDIUM: h2: give :authority precedence over Host The wording regarding Host vs :authority in RFC7540 is ambiguous as it says that an intermediary must produce a host header from :authority if Host is missing, but, contrary to HTTP/1.1, doesn't say anything regarding the possibility that Host and :authority differ, which leaves Host with higher precedence there. In addition it mentions that clients should use :authority *instead* of Host, and that H1->H2 should use :authority only if the original request was in authority form. This leaves some gray area in the middle of the chain for fully valid H2 requests arboring a Host header that are forwarded to the other side where it's possible to drop the Host header and use the authority only after forwarding to a second H2 layer, thus possibly seeing two different values of Host at a different stage. There's no such issue when forwarding from H2 to H1 as the authority is dropped only only the Host is kept. Note that the following request is sufficient to re-normalize such a request: http-request set-header host %[req.hdr(host)] The new spec in progress (draft-ietf-httpbis-http2bis-03) addresses this trouble by being a bit is stricter on these rules. It clarifies that :authority must always be used instead of Host and that Host ought to be ignored. This is much saner as it avoids to convey two distinct values along the chain. This becomes the protocol-level equivalent of: http-request set-uri %[url] So this patch does exactly this, which we were initially a bit reluctant to do initially by lack of visibility about other implementations' expectations. In addition it slightly simplifies the Host header field creation by always placing it first in the list of headers instead of last; this could also speed up the look up a little bit. This needs to be backported to 2.0. Non-HTX versions are safe regarding this because they drop the URI during the conversion to HTTP/1.1 so only Host is used and transmitted. Thanks to Tim Düsterhus for reporting that one.
2021-08-11 09:39:13 -04:00
if (isteq(list[idx].n, ist("host"))) {
if (fields & H2_PHDR_FND_HOST)
continue;
fields |= H2_PHDR_FND_HOST;
BUG/MEDIUM: h2: give :authority precedence over Host The wording regarding Host vs :authority in RFC7540 is ambiguous as it says that an intermediary must produce a host header from :authority if Host is missing, but, contrary to HTTP/1.1, doesn't say anything regarding the possibility that Host and :authority differ, which leaves Host with higher precedence there. In addition it mentions that clients should use :authority *instead* of Host, and that H1->H2 should use :authority only if the original request was in authority form. This leaves some gray area in the middle of the chain for fully valid H2 requests arboring a Host header that are forwarded to the other side where it's possible to drop the Host header and use the authority only after forwarding to a second H2 layer, thus possibly seeing two different values of Host at a different stage. There's no such issue when forwarding from H2 to H1 as the authority is dropped only only the Host is kept. Note that the following request is sufficient to re-normalize such a request: http-request set-header host %[req.hdr(host)] The new spec in progress (draft-ietf-httpbis-http2bis-03) addresses this trouble by being a bit is stricter on these rules. It clarifies that :authority must always be used instead of Host and that Host ought to be ignored. This is much saner as it avoids to convey two distinct values along the chain. This becomes the protocol-level equivalent of: http-request set-uri %[url] So this patch does exactly this, which we were initially a bit reluctant to do initially by lack of visibility about other implementations' expectations. In addition it slightly simplifies the Host header field creation by always placing it first in the list of headers instead of last; this could also speed up the look up a little bit. This needs to be backported to 2.0. Non-HTX versions are safe regarding this because they drop the URI during the conversion to HTTP/1.1 so only Host is used and transmitted. Thanks to Tim Düsterhus for reporting that one.
2021-08-11 09:39:13 -04:00
}
if (isteq(list[idx].n, ist("content-length"))) {
ret = http_parse_cont_len_header(&list[idx].v, body_len,
*msgf & H2_MSGF_BODY_CL);
if (ret < 0)
goto fail;
*msgf |= H2_MSGF_BODY_CL;
sl_flags |= HTX_SL_F_CLEN;
if (ret == 0)
continue; // skip this duplicate
}
/* these ones are forbidden in requests (RFC7540#8.1.2.2) */
if (isteq(list[idx].n, ist("connection")) ||
isteq(list[idx].n, ist("proxy-connection")) ||
isteq(list[idx].n, ist("keep-alive")) ||
isteq(list[idx].n, ist("upgrade")) ||
isteq(list[idx].n, ist("transfer-encoding")))
goto fail;
if (isteq(list[idx].n, ist("te")) && !isteq(list[idx].v, ist("trailers")))
goto fail;
/* cookie requires special processing at the end */
if (isteq(list[idx].n, ist("cookie"))) {
http_cookie_register(list, idx, &ck, &lck);
continue;
}
if (!htx_add_header(htx, list[idx].n, list[idx].v))
goto fail;
}
/* RFC7540#8.1.2.1 mandates to reject response pseudo-headers (:status) */
if (fields & H2_PHDR_FND_STAT)
goto fail;
/* Let's dump the request now if not yet emitted. */
if (!(fields & H2_PHDR_FND_NONE)) {
sl = h2_prepare_htx_reqline(fields, phdr_val, htx, msgf);
if (!sl)
goto fail;
}
if (*msgf & H2_MSGF_BODY_TUNNEL)
*msgf &= ~(H2_MSGF_BODY|H2_MSGF_BODY_CL);
if (!(*msgf & H2_MSGF_BODY) || ((*msgf & H2_MSGF_BODY_CL) && *body_len == 0) ||
(*msgf & H2_MSGF_BODY_TUNNEL)) {
/* Request without body or tunnel requested */
sl_flags |= HTX_SL_F_BODYLESS;
htx->flags |= HTX_FL_EOM;
}
if (*msgf & H2_MSGF_EXT_CONNECT) {
if (!htx_add_header(htx, ist("upgrade"), phdr_val[H2_PHDR_IDX_PROT]))
goto fail;
if (!htx_add_header(htx, ist("connection"), ist("upgrade")))
goto fail;
sl_flags |= HTX_SL_F_CONN_UPG;
}
/* update the start line with last detected header info */
sl->flags |= sl_flags;
BUG/MEDIUM: h2: give :authority precedence over Host The wording regarding Host vs :authority in RFC7540 is ambiguous as it says that an intermediary must produce a host header from :authority if Host is missing, but, contrary to HTTP/1.1, doesn't say anything regarding the possibility that Host and :authority differ, which leaves Host with higher precedence there. In addition it mentions that clients should use :authority *instead* of Host, and that H1->H2 should use :authority only if the original request was in authority form. This leaves some gray area in the middle of the chain for fully valid H2 requests arboring a Host header that are forwarded to the other side where it's possible to drop the Host header and use the authority only after forwarding to a second H2 layer, thus possibly seeing two different values of Host at a different stage. There's no such issue when forwarding from H2 to H1 as the authority is dropped only only the Host is kept. Note that the following request is sufficient to re-normalize such a request: http-request set-header host %[req.hdr(host)] The new spec in progress (draft-ietf-httpbis-http2bis-03) addresses this trouble by being a bit is stricter on these rules. It clarifies that :authority must always be used instead of Host and that Host ought to be ignored. This is much saner as it avoids to convey two distinct values along the chain. This becomes the protocol-level equivalent of: http-request set-uri %[url] So this patch does exactly this, which we were initially a bit reluctant to do initially by lack of visibility about other implementations' expectations. In addition it slightly simplifies the Host header field creation by always placing it first in the list of headers instead of last; this could also speed up the look up a little bit. This needs to be backported to 2.0. Non-HTX versions are safe regarding this because they drop the URI during the conversion to HTTP/1.1 so only Host is used and transmitted. Thanks to Tim Düsterhus for reporting that one.
2021-08-11 09:39:13 -04:00
/* complete with missing Host if needed (we may validate this test if
* no regular header was found).
*/
if ((fields & (H2_PHDR_FND_HOST|H2_PHDR_FND_AUTH)) == H2_PHDR_FND_AUTH) {
/* missing Host field, use :authority instead */
if (!htx_add_header(htx, ist("host"), phdr_val[H2_PHDR_IDX_AUTH]))
goto fail;
}
/* now we may have to build a cookie list. We'll dump the values of all
* visited headers.
*/
if (ck >= 0) {
if (http_cookie_merge(htx, list, ck))
goto fail;
}
/* now send the end of headers marker */
BUG/MAJOR: mux-h1/mux-h2/htx: Fix HTTP tunnel management at the mux level Tunnel management between the H1 and H2 multiplexers is a bit blurred. And the HTX is not enough well defined on this point to make things clear. In fact, Establishing a tunnel between an H2 client and an H1 server, or the opposite is buggy because the both multiplexers don't handle the EOM block the same way when a tunnel is established. In fact, the H2 multiplexer is pretty strict and add an END_STREAM flag when an EOM block is found, while the H1 multiplexer is more flexible. The purpose of this patch is to make the EOM block usage pretty clear and to fix the HTTP multiplexers to really handle HTTP tunnels in the right way. Now, an EOM block is used to mark the end of an HTTP message, semantically speaking. That means it may be followed by tunneled data. Thus, CONNECT requests are now finished by an EOM block, just after the EOH block. On the H1 multiplexer side, a tunnel is now only established on the response path. So a CONNECT request remains in a DONE state waiting for the 2xx response. On the H2 multiplexer side, a flag is used to know an HTTP tunnel is requested, to not immediately add the END_STREAM flag on the EOM block. All these changes are sensitives and not backportable because of recent changes. The same problem exists on earlier versions and should be addressed. But it will only be possible with a specific patchset. This patch relies on the following ones : * MEDIUM: mux-h1: Properly handle tunnel establishments and aborts * MEDIUM: mux-h2: Close streams when processing data for an aborted tunnel * MEDIUM: mux-h2: Block client data on server side waiting tunnel establishment * MINOR: mux-h2: Add 2 flags to help to properly handle tunnel mode * MINOR: mux-h1: Split H1C_F_WAIT_OPPOSITE flag to separate input/output sides * MINOR: mux-h1/mux-fcgi: Don't set TUNNEL mode if payload length is unknown
2021-01-22 09:28:03 -05:00
if (!htx_add_endof(htx, HTX_BLK_EOH))
goto fail;
/* proceed to scheme-based normalization on target-URI */
if (fields & H2_PHDR_FND_SCHM)
http_scheme_based_normalize(htx);
ret = 1;
return ret;
fail:
return -1;
}
/* Prepare the status line into <htx> from pseudo headers stored in <phdr[]>.
* <fields> indicates what was found so far. This should be called once at the
* detection of the first general header field or at the end of the message if
* no general header field was found yet. Returns the created start line on
* success, or NULL on failure. Upon success, <msgf> is updated with a few
* H2_MSGF_* flags indicating what was found while parsing.
*/
static struct htx_sl *h2_prepare_htx_stsline(uint32_t fields, struct ist *phdr, struct htx *htx, unsigned int *msgf)
{
unsigned int status, flags = HTX_SL_F_IS_RESP;
struct htx_sl *sl;
struct ist stat;
/* only :status is allowed as a pseudo header */
if (!(fields & H2_PHDR_FND_STAT))
goto fail;
if (phdr[H2_PHDR_IDX_STAT].len != 3)
goto fail;
/* if Extended CONNECT is used, convert status code from 200 to htx 101
* following rfc 8441 */
if (unlikely(*msgf & H2_MSGF_EXT_CONNECT) &&
isteq(phdr[H2_PHDR_IDX_STAT], ist("200"))) {
stat = ist("101");
status = 101;
}
else {
unsigned char h, t, u;
stat = phdr[H2_PHDR_IDX_STAT];
h = stat.ptr[0] - '0';
t = stat.ptr[1] - '0';
u = stat.ptr[2] - '0';
if (h > 9 || t > 9 || u > 9)
goto fail;
status = h * 100 + t * 10 + u;
}
/* 101 responses are not supported in H2, so return a error.
* On 1xx responses there is no ES on the HEADERS frame but there is no
* body. So remove the flag H2_MSGF_BODY and add H2_MSGF_RSP_1XX to
* notify the decoder another HEADERS frame is expected.
* 204/304 response have no body by definition. So remove the flag
* H2_MSGF_BODY and set H2_MSGF_BODYLESS_RSP.
*
* Note however that there is a special condition for Extended CONNECT.
* In this case, we explicitly convert it to HTX 101 to mimic
* Get+Upgrade HTTP/1.1 mechanism
*/
if (status == 101) {
if (!(*msgf & H2_MSGF_EXT_CONNECT))
goto fail;
}
else if (status < 200) {
*msgf |= H2_MSGF_RSP_1XX;
*msgf &= ~H2_MSGF_BODY;
}
else if (status == 204 || status == 304) {
*msgf &= ~H2_MSGF_BODY;
*msgf |= H2_MSGF_BODYLESS_RSP;
}
/* Set HTX start-line flags */
flags |= HTX_SL_F_VER_11; // V2 in fact
flags |= HTX_SL_F_XFER_LEN; // xfer len always known with H2
sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/2.0"), stat, ist(""));
if (!sl)
goto fail;
sl->info.res.status = status;
return sl;
fail:
return NULL;
}
/* Takes an H2 response present in the headers list <list> terminated by a name
* being <NULL,0> and emits the equivalent HTX response according to the rules
* documented in RFC7540 #8.1.2. The output contents are emitted in <htx>, and
* a positive value is returned if some bytes were emitted. In case of error, a
* negative error code is returned.
*
* Upon success, <msgf> is filled with a few H2_MSGF_* flags indicating what
* was found while parsing. The caller must set it to zero in or H2_MSGF_BODY
* if a body is detected (!ES).
*
* The headers list <list> must be composed of :
* - n.name != NULL, n.len > 0 : literal header name
* - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
* among H2_PHDR_IDX_*
* - n.name ignored, n.len == 0 : end of list
* - in all cases except the end of list, v.name and v.len must designate a
* valid value.
*
* <upgrade_protocol> is only used if the htx status code is 101 indicating a
* response to an upgrade or h2-equivalent request.
*/
int h2_make_htx_response(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len, char *upgrade_protocol)
{
struct ist phdr_val[H2_PHDR_NUM_ENTRIES];
uint32_t fields; /* bit mask of H2_PHDR_FND_* */
uint32_t idx;
int phdr;
int ret;
int i;
struct htx_sl *sl = NULL;
unsigned int sl_flags = 0;
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
const char *ctl;
fields = 0;
for (idx = 0; list[idx].n.len != 0; idx++) {
if (!isttest(list[idx].n)) {
/* this is an indexed pseudo-header */
phdr = list[idx].n.len;
}
else {
/* this can be any type of header */
/* RFC7540#8.1.2: upper case not allowed in header field names.
* #10.3: header names must be valid (i.e. match a token).
* For pseudo-headers we check from 2nd char and for other ones
* from the first char, because HTTP_IS_TOKEN() also excludes
* the colon.
*/
phdr = h2_str_to_phdr(list[idx].n);
for (i = !!phdr; i < list[idx].n.len; i++)
if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
goto fail;
}
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
/* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
* rejecting NUL, CR and LF characters.
*/
ctl = ist_find_ctl(list[idx].v);
if (unlikely(ctl) && has_forbidden_char(list[idx].v, ctl))
goto fail;
if (phdr > 0 && phdr < H2_PHDR_NUM_ENTRIES) {
/* insert a pseudo header by its index (in phdr) and value (in value) */
if (fields & ((1 << phdr) | H2_PHDR_FND_NONE)) {
if (fields & H2_PHDR_FND_NONE) {
/* pseudo header field after regular headers */
goto fail;
}
else {
/* repeated pseudo header field */
goto fail;
}
}
fields |= 1 << phdr;
phdr_val[phdr] = list[idx].v;
continue;
}
else if (phdr != 0) {
/* invalid pseudo header -- should never happen here */
goto fail;
}
/* regular header field in (name,value) */
if (!(fields & H2_PHDR_FND_NONE)) {
/* no more pseudo-headers, time to build the status line */
sl = h2_prepare_htx_stsline(fields, phdr_val, htx, msgf);
if (!sl)
goto fail;
fields |= H2_PHDR_FND_NONE;
}
if (isteq(list[idx].n, ist("content-length"))) {
ret = http_parse_cont_len_header(&list[idx].v, body_len,
*msgf & H2_MSGF_BODY_CL);
if (ret < 0)
goto fail;
*msgf |= H2_MSGF_BODY_CL;
sl_flags |= HTX_SL_F_CLEN;
if (ret == 0)
continue; // skip this duplicate
}
/* these ones are forbidden in responses (RFC7540#8.1.2.2) */
if (isteq(list[idx].n, ist("connection")) ||
isteq(list[idx].n, ist("proxy-connection")) ||
isteq(list[idx].n, ist("keep-alive")) ||
isteq(list[idx].n, ist("upgrade")) ||
isteq(list[idx].n, ist("transfer-encoding")))
goto fail;
if (!htx_add_header(htx, list[idx].n, list[idx].v))
goto fail;
}
/* RFC7540#8.1.2.1 mandates to reject request pseudo-headers */
if (fields & (H2_PHDR_FND_AUTH|H2_PHDR_FND_METH|H2_PHDR_FND_PATH|H2_PHDR_FND_SCHM))
goto fail;
/* Let's dump the request now if not yet emitted. */
if (!(fields & H2_PHDR_FND_NONE)) {
sl = h2_prepare_htx_stsline(fields, phdr_val, htx, msgf);
if (!sl)
goto fail;
}
if (sl->info.res.status == 101 && upgrade_protocol) {
if (!htx_add_header(htx, ist("connection"), ist("upgrade")))
goto fail;
if (!htx_add_header(htx, ist("upgrade"), ist(upgrade_protocol)))
goto fail;
sl_flags |= HTX_SL_F_CONN_UPG;
}
if ((*msgf & H2_MSGF_BODY_TUNNEL) &&
((sl->info.res.status >= 200 && sl->info.res.status < 300) || sl->info.res.status == 101))
*msgf &= ~(H2_MSGF_BODY|H2_MSGF_BODY_CL);
else
*msgf &= ~H2_MSGF_BODY_TUNNEL;
if (!(*msgf & H2_MSGF_BODY) || ((*msgf & H2_MSGF_BODY_CL) && *body_len == 0) ||
(*msgf & H2_MSGF_BODY_TUNNEL)) {
/* Response without body or tunnel successfully established */
sl_flags |= HTX_SL_F_BODYLESS;
htx->flags |= HTX_FL_EOM;
}
/* update the start line with last detected header info */
sl->flags |= sl_flags;
if ((*msgf & (H2_MSGF_BODY|H2_MSGF_BODY_TUNNEL|H2_MSGF_BODY_CL)) == H2_MSGF_BODY) {
/* FIXME: Do we need to signal anything when we have a body and
* no content-length, to have the equivalent of H1's chunked
* encoding?
*/
}
/* now send the end of headers marker */
BUG/MAJOR: mux-h1/mux-h2/htx: Fix HTTP tunnel management at the mux level Tunnel management between the H1 and H2 multiplexers is a bit blurred. And the HTX is not enough well defined on this point to make things clear. In fact, Establishing a tunnel between an H2 client and an H1 server, or the opposite is buggy because the both multiplexers don't handle the EOM block the same way when a tunnel is established. In fact, the H2 multiplexer is pretty strict and add an END_STREAM flag when an EOM block is found, while the H1 multiplexer is more flexible. The purpose of this patch is to make the EOM block usage pretty clear and to fix the HTTP multiplexers to really handle HTTP tunnels in the right way. Now, an EOM block is used to mark the end of an HTTP message, semantically speaking. That means it may be followed by tunneled data. Thus, CONNECT requests are now finished by an EOM block, just after the EOH block. On the H1 multiplexer side, a tunnel is now only established on the response path. So a CONNECT request remains in a DONE state waiting for the 2xx response. On the H2 multiplexer side, a flag is used to know an HTTP tunnel is requested, to not immediately add the END_STREAM flag on the EOM block. All these changes are sensitives and not backportable because of recent changes. The same problem exists on earlier versions and should be addressed. But it will only be possible with a specific patchset. This patch relies on the following ones : * MEDIUM: mux-h1: Properly handle tunnel establishments and aborts * MEDIUM: mux-h2: Close streams when processing data for an aborted tunnel * MEDIUM: mux-h2: Block client data on server side waiting tunnel establishment * MINOR: mux-h2: Add 2 flags to help to properly handle tunnel mode * MINOR: mux-h1: Split H1C_F_WAIT_OPPOSITE flag to separate input/output sides * MINOR: mux-h1/mux-fcgi: Don't set TUNNEL mode if payload length is unknown
2021-01-22 09:28:03 -05:00
if (!htx_add_endof(htx, HTX_BLK_EOH))
goto fail;
ret = 1;
return ret;
fail:
return -1;
}
/* Takes an H2 headers list <list> terminated by a name being <NULL,0> and emits
* the equivalent HTX trailers blocks. The output contents are emitted in <htx>,
* and a positive value is returned if some bytes were emitted. In case of
* error, a negative error code is returned. The caller must have verified that
* the message in the buffer is compatible with receipt of trailers.
*
* The headers list <list> must be composed of :
* - n.name != NULL, n.len > 0 : literal header name
* - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
* among H2_PHDR_IDX_* (illegal here)
* - n.name ignored, n.len == 0 : end of list
* - in all cases except the end of list, v.name and v.len must designate a
* valid value.
*/
int h2_make_htx_trailers(struct http_hdr *list, struct htx *htx)
{
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
const char *ctl;
uint32_t idx;
int i;
for (idx = 0; list[idx].n.len != 0; idx++) {
if (!isttest(list[idx].n)) {
/* This is an indexed pseudo-header (RFC7540#8.1.2.1) */
goto fail;
}
/* RFC7540#8.1.2: upper case not allowed in header field names.
* #10.3: header names must be valid (i.e. match a token). This
* also catches pseudo-headers which are forbidden in trailers.
*/
for (i = 0; i < list[idx].n.len; i++)
if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
goto fail;
/* these ones are forbidden in trailers (RFC7540#8.1.2.2) */
if (isteq(list[idx].n, ist("host")) ||
isteq(list[idx].n, ist("content-length")) ||
isteq(list[idx].n, ist("connection")) ||
isteq(list[idx].n, ist("proxy-connection")) ||
isteq(list[idx].n, ist("keep-alive")) ||
isteq(list[idx].n, ist("upgrade")) ||
isteq(list[idx].n, ist("te")) ||
isteq(list[idx].n, ist("transfer-encoding")))
goto fail;
BUG/MAJOR: h2: reject header values containing invalid chars Tim Düsterhus reported an annoying problem in the H2 decoder related to an ambiguity in the H2 spec. The spec says in section 10.3 that HTTP/2 allows header field values that are not valid (since they're binary) and at the same time that an H2 to H1 gateway must be careful to reject headers whose values contain \0, \r or \n. Till now, and for the sake of the ability to maintain end-to-end binary transparency in H2-to-H2, the H2 mux wouldn't reject this since it does not know what version will be used on the other side. In theory we should in fact perform such a check when converting an HTX header to H1. But this causes a problem as it means that all our rule sets, sample fetches, captures, logs or redirects may still find an LF in a header coming from H2. Also in 2.0 and older in legacy mode, the frames are instantly converted to H1 and HTX couldn't help there. So this means that in practice we must refrain from delivering such a header upwards, regardless of any outgoing protocol consideration. Applying such a lookup on all headers leaving the mux comes with a significant performance hit, especially for large ones. A first attempt was made at placing this into the HPACK decoder to refrain from learning invalid literals but error reporting becomes more complicated. Additional tests show that doing this within the HTX transcoding loop benefits from the hot L1 cache, and that by skipping up to 8 bytes per iteration the CPU cost remains within noise margin, around ~0.5%. This patch must be backported as far as 1.8 since this bug could be exploited and serve as the base for an attack. In 2.0 and earlier the fix must also be added to functions h2_make_h1_request() and h2_make_h1_trailers() to handle legacy mode. It relies on previous patch "MINOR: ist: add ist_find_ctl()" to speed up the control bytes lookup. All credits go to Tim for his detailed bug report and his initial patch.
2019-11-22 10:02:43 -05:00
/* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
* rejecting NUL, CR and LF characters.
*/
ctl = ist_find_ctl(list[idx].v);
if (unlikely(ctl) && has_forbidden_char(list[idx].v, ctl))
goto fail;
if (!htx_add_trailer(htx, list[idx].n, list[idx].v))
goto fail;
}
if (!htx_add_endof(htx, HTX_BLK_EOT))
goto fail;
return 1;
fail:
return -1;
}