detect/byte: cache byte values in tx state

det_ctx->byte_values is shared across every transaction a worker touches.
Once an inspect engine finishes for a signature, its ID is recorded in
inspect_flags and it won't run again when the rule resumes on a later
packet. That means a value produced by byte_extract or byte_math can get
clobbered by another transaction before the consumer buffer ever runs.

Store produced values in a byte_values array on the per-TX
DetectEngineState to fix this:

- Save to the TX cache after byte_extract or byte_math succeeds.
- Copy (memcpy) into det_ctx->byte_values at the start of each content
  inspection pass (recursion.count == 0).
- Free the TX cache on detect engine reload -- local IDs can change with
  a new ruleset, so the next save reallocates at the right size.
- Free on state destruction.

Document the cache in devguide/transactions.rst.

Issue: 7801
This commit is contained in:
Jeff Lucovsky 2026-03-01 09:53:53 -05:00
parent 54322f38f8
commit 7970e1e8d7
7 changed files with 207 additions and 12 deletions

View file

@ -303,6 +303,55 @@ And in Rust:
return 0;
}
Per-Transaction Byte Value Cache
================================
A rule can produce a value with ``byte_extract`` or ``byte_math`` in one
buffer and then use it in another — for example, pull a length out of
``http.request_header`` and feed it to a ``byte_test`` in ``file.data``.
For that to work, the value has to stick around long enough for the
consumer to see it, which isn't guaranteed: ``det_ctx->byte_values`` is
per-worker, shared across every transaction the worker touches, and it
gets clobbered easily.
Two common ways it gets clobbered:
- **Bidirectional (``=>``) rules with pipelined HTTP.** Suricata inspects
every transaction's toserver buffers before it touches any toclient
buffer, so a later TX's ``byte_extract`` happily overwrites an earlier
TX's value before the earlier TX's toclient consumer ever runs.
- **Unidirectional rules split across packets.** If the producer and
consumer sit at different progress levels, the consumer may not run
until a later packet. In between, another TX on the same worker can
land and stomp on ``det_ctx->byte_values``. The detect engine won't
re-run the producer's inspect engine either — once it's done, its ID
is recorded in the sig's ``inspect_flags`` and it's skipped on resume.
To keep values safe, the per-TX ``DetectEngineState`` carries its own
``byte_values`` array that mirrors ``det_ctx->byte_values``:
- **Save.** After ``byte_extract`` or ``byte_math`` runs, the value is
copied into the TX's cache.
- **Restore.** At the top of each content inspection pass
(``recursion.count == 0``), the cached values are copied into
``det_ctx->byte_values``. Each side keeps its own allocation so that
values remain stable across multiple packets.
- **Reset.** The cache is cleared on detect engine reload
(``ResetTxState``), since byte variable local IDs can shift with a new
ruleset. It's freed when the ``DetectEngineState`` is destroyed.
The cache is on for any rule that uses byte variables, unidirectional
included. Save only fires when an extraction actually runs, and restore
early-returns on an empty cache, so rules that never touch byte
variables don't pay a measurable cost.
Key functions:
- ``DetectEngineStateSaveByteValue()`` in ``detect-engine-state.c``
- ``DetectEngineStateRestoreByteValues()`` in ``detect-engine-state.c``
- ``DetectEngineContentInspectionRestoreByteValues()`` in ``detect-engine-content-inspection.c``
- ``DetectEngineStateSaveByteValueFromTx()`` in ``detect-engine-content-inspection.c``
Work In Progress changes
========================

View file

@ -956,6 +956,9 @@ pub type DetectEngineStateDirection = DetectEngineStateDirection_;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct DetectEngineState_ {
pub dir_state: [DetectEngineStateDirection; 2usize],
#[doc = " Storage for byte_extract values across buffers (direction-independent)"]
pub byte_values: *mut u64,
pub byte_values_len: u32,
}
impl Default for DetectEngineState_ {
fn default() -> Self {

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2023 Open Information Security Foundation
/* Copyright (C) 2007-2026 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -30,9 +30,12 @@
#include "detect.h"
#include "detect-engine.h"
#include "detect-parse.h"
#include "detect-engine-state.h"
#include "rust.h"
#include "app-layer-parser.h"
#include "detect-asn1.h"
#include "detect-content.h"
#include "detect-pcre.h"
@ -74,8 +77,51 @@ struct DetectEngineContentInspectionCtx {
uint32_t count;
const uint32_t limit;
} recursion;
AppLayerTxData *tx_data; /**< cached TX data pointer for byte value save/restore */
};
static inline AppLayerTxData *DetectEngineContentInspectionRestoreByteValues(
DetectEngineThreadCtx *det_ctx, Flow *f)
{
if (unlikely(!(det_ctx->tx_id_set && f != NULL && f->alstate != NULL)))
return NULL;
void *tx = AppLayerParserGetTx(f->proto, f->alproto, f->alstate, det_ctx->tx_id);
if (unlikely(tx == NULL))
return NULL;
AppLayerTxData *tx_data = AppLayerParserGetTxData(f->proto, f->alproto, tx);
if (tx_data != NULL && tx_data->de_state != NULL) {
DetectEngineStateRestoreByteValues(det_ctx, tx_data->de_state);
}
return tx_data;
}
static inline void DetectEngineStateSaveByteValueFromTx(
DetectEngineThreadCtx *det_ctx, Flow *f, AppLayerTxData **tx_data, uint32_t local_id)
{
if (tx_data == NULL || det_ctx == NULL)
return;
if (*tx_data == NULL && det_ctx->tx_id_set && f != NULL && f->alstate != NULL) {
void *tx = AppLayerParserGetTx(f->proto, f->alproto, f->alstate, det_ctx->tx_id);
if (tx != NULL) {
*tx_data = AppLayerParserGetTxData(f->proto, f->alproto, tx);
}
}
if (*tx_data != NULL) {
if (unlikely((*tx_data)->de_state == NULL)) {
(*tx_data)->de_state = DetectEngineStateAlloc();
}
if (likely((*tx_data)->de_state != NULL)) {
DetectEngineStateSaveByteValue((*tx_data)->de_state, local_id,
det_ctx->byte_values[local_id], det_ctx->byte_values_len);
}
}
}
/**
* \brief Run the actual payload match functions
*
@ -113,6 +159,15 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx,
SCEnter();
KEYWORD_PROFILING_START;
/* Restore byte keyword values from detection state at the top level only.
* Needed for any rule where the producer engine runs on an earlier packet
* than the consumer engine (both bidirectional and unidirectional rules).
* ctx->tx_data is cached here so recursive frames reuse it without
* re-fetching the TX. */
if (ctx->recursion.count == 0) {
ctx->tx_data = DetectEngineContentInspectionRestoreByteValues(det_ctx, f);
}
ctx->recursion.count++;
if (unlikely(ctx->recursion.count == ctx->recursion.limit)) {
KEYWORD_PROFILING_END(det_ctx, smd->type, 0);
@ -594,6 +649,8 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx,
SCLogDebug("[BE] Fetched value for index %d: %"PRIu64,
bed->local_id, det_ctx->byte_values[bed->local_id]);
/* Save byte keyword value to detection state for cross-buffer use. */
DetectEngineStateSaveByteValueFromTx(det_ctx, f, &ctx->tx_data, bed->local_id);
goto match;
} else if (smd->type == DETECT_BYTEMATH) {
@ -630,6 +687,8 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx,
SCLogDebug("[BM] Fetched value for index %d: %"PRIu64,
bmd->local_id, det_ctx->byte_values[bmd->local_id]);
/* Save byte keyword value to detection state for cross-buffer use. */
DetectEngineStateSaveByteValueFromTx(det_ctx, f, &ctx->tx_data, bmd->local_id);
goto match;
} else if (smd->type == DETECT_BSIZE) {

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2021 Open Information Security Foundation
/* Copyright (C) 2007-2026 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -37,6 +37,7 @@
*/
#include "suricata-common.h"
#include "util-validate.h"
#include "decode.h"
@ -171,6 +172,7 @@ void SCDetectEngineStateFree(DetectEngineState *state)
DeStateStore *store_next;
int i = 0;
SCFree(state->byte_values);
for (i = 0; i < 2; i++) {
store = state->dir_state[i].head;
while (store != NULL) {
@ -235,6 +237,67 @@ void DetectRunStoreStateTx(
SCLogDebug("Stored for TX %"PRIu64, tx_id);
}
/**
* \brief Save a byte keyword value to detection state for cross-buffer use.
*
* \param state The detection engine state (must not be NULL)
* \param local_id The parse-time sequential index of the byte variable
* \param value The extracted value to save
*/
void DetectEngineStateSaveByteValue(
DetectEngineState *state, uint32_t local_id, uint64_t value, uint32_t byte_values_len)
{
DEBUG_VALIDATE_BUG_ON(state == NULL);
if (unlikely(state == NULL))
return;
DEBUG_VALIDATE_BUG_ON(local_id >= byte_values_len);
if (unlikely(local_id >= byte_values_len))
return;
/* Allocate the per-tx cache array on first use, matching det_ctx size */
if (unlikely(state->byte_values == NULL)) {
state->byte_values = SCCalloc(byte_values_len, sizeof(uint64_t));
if (unlikely(state->byte_values == NULL)) {
return;
}
state->byte_values_len = byte_values_len;
}
state->byte_values[local_id] = value;
SCLogDebug("Saved byte value: local_id=%u value=%" PRIu64, local_id, value);
}
/**
* \brief Retrieve byte keyword values from detection state
*
* \param det_ctx Thread detection context
* \param state The detection engine state
*/
void DetectEngineStateRestoreByteValues(DetectEngineThreadCtx *det_ctx, DetectEngineState *state)
{
DEBUG_VALIDATE_BUG_ON(state == NULL || det_ctx == NULL);
if (unlikely(state == NULL || det_ctx == NULL))
return;
if (state->byte_values == NULL || state->byte_values_len == 0)
return;
/* det_ctx->byte_values is preallocated at engine startup (detect-engine.c)
* and must never be NULL here. The sizes are always equal: the TX cache is
* allocated on first save using det_ctx->byte_values_len, and ResetTxState
* frees it on ruleset reload before any new save can occur with a different
* size. */
DEBUG_VALIDATE_BUG_ON(det_ctx->byte_values == NULL);
DEBUG_VALIDATE_BUG_ON(state->byte_values_len != det_ctx->byte_values_len);
/* Copy the per-tx cache into the worker scratch buffer. Each side keeps
* its own allocation so that values remain correct across multiple
* packets: a pointer swap would rotate ownership on every packet and
* leave each TX reading another TX's values on the second packet. */
memcpy(det_ctx->byte_values, state->byte_values, state->byte_values_len * sizeof(uint64_t));
}
static inline void ResetTxState(DetectEngineState *s)
{
if (s) {
@ -249,6 +312,14 @@ static inline void ResetTxState(DetectEngineState *s)
s->dir_state[1].flags = 0;
/* reset 'cur' back to the list head */
s->dir_state[1].cur = s->dir_state[1].head;
/* Free byte_values on reload: local_ids and byte_values_len may
* change with the new ruleset, so the next Save reallocates at
* the current det_ctx->byte_values_len. */
if (s->byte_values != NULL) {
SCFree(s->byte_values);
s->byte_values = NULL;
s->byte_values_len = 0;
}
}
}

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2013 Open Information Security Foundation
/* Copyright (C) 2007-2026 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -34,7 +34,10 @@
#ifndef SURICATA_DETECT_ENGINE_STATE_H
#define SURICATA_DETECT_ENGINE_STATE_H
// forward declaration for bindgen
/* forward declarations */
typedef struct DetectEngineThreadCtx_ DetectEngineThreadCtx;
/* forward declaration for bindgen */
#define SigIntId uint32_t
#define DETECT_ENGINE_INSPECT_SIG_NO_MATCH 0
@ -94,6 +97,10 @@ typedef struct DetectEngineStateDirection_ {
typedef struct DetectEngineState_ {
DetectEngineStateDirection dir_state[2];
/** Storage for byte_extract values across buffers (direction-independent) */
uint64_t *byte_values;
uint32_t byte_values_len;
} DetectEngineState;
/**
@ -110,6 +117,11 @@ DetectEngineState *DetectEngineStateAlloc(void);
*/
void SCDetectEngineStateFree(DetectEngineState *state);
void DetectEngineStateSaveByteValue(
DetectEngineState *state, uint32_t local_id, uint64_t value, uint32_t byte_values_len);
void DetectEngineStateRestoreByteValues(DetectEngineThreadCtx *det_ctx, DetectEngineState *state);
#endif /* SURICATA_DETECT_ENGINE_STATE_H */
/**

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2022 Open Information Security Foundation
/* Copyright (C) 2007-2026 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -3316,8 +3316,8 @@ static TmEcode ThreadCtxDoInit (DetectEngineCtx *de_ctx, DetectEngineThreadCtx *
AlertQueueInit(det_ctx);
/* byte_extract storage */
det_ctx->byte_values = SCMalloc(sizeof(*det_ctx->byte_values) *
(de_ctx->byte_extract_max_local_id + 1));
det_ctx->byte_values_len = de_ctx->byte_extract_max_local_id + 1;
det_ctx->byte_values = SCMalloc(sizeof(*det_ctx->byte_values) * det_ctx->byte_values_len);
if (det_ctx->byte_values == NULL) {
return TM_ECODE_FAILED;
}

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2025 Open Information Security Foundation
/* Copyright (C) 2007-2026 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -1044,7 +1044,7 @@ typedef struct DetectEngineCtx_ {
int32_t sgh_mpm_context_stream;
/* the max local id used amongst all sigs */
int32_t byte_extract_max_local_id;
uint32_t byte_extract_max_local_id;
/** version of the detect engine. The version is incremented on reloads */
uint32_t version;
@ -1315,9 +1315,6 @@ typedef struct DetectEngineThreadCtx_ {
* prototype held by DetectEngineCtx. */
SpmThreadCtx *spm_thread_ctx;
/* byte_* values */
uint64_t *byte_values;
SigJsonContent *json_content;
uint8_t json_content_capacity;
uint8_t json_content_len;
@ -1383,6 +1380,10 @@ typedef struct DetectEngineThreadCtx_ {
RuleMatchCandidateTx *tx_candidates;
uint32_t tx_candidates_size;
/* byte_* values */
uint32_t byte_values_len; /**< number of elements allocated in byte_values */
uint64_t *byte_values;
MpmThreadCtx mtc; /**< thread ctx for the mpm */
/* work queue for post-rule matching affecting prefilter */
PostRuleMatchWorkQueue post_rule_work_queue;