diff --git a/doc/userguide/devguide/extending/app-layer/transactions.rst b/doc/userguide/devguide/extending/app-layer/transactions.rst index f46cda9543..22a5d662df 100644 --- a/doc/userguide/devguide/extending/app-layer/transactions.rst +++ b/doc/userguide/devguide/extending/app-layer/transactions.rst @@ -303,6 +303,55 @@ And in Rust: return 0; } +Per-Transaction Byte Value Cache +================================ + +A rule can produce a value with ``byte_extract`` or ``byte_math`` in one +buffer and then use it in another — for example, pull a length out of +``http.request_header`` and feed it to a ``byte_test`` in ``file.data``. +For that to work, the value has to stick around long enough for the +consumer to see it, which isn't guaranteed: ``det_ctx->byte_values`` is +per-worker, shared across every transaction the worker touches, and it +gets clobbered easily. + +Two common ways it gets clobbered: + +- **Bidirectional (``=>``) rules with pipelined HTTP.** Suricata inspects + every transaction's toserver buffers before it touches any toclient + buffer, so a later TX's ``byte_extract`` happily overwrites an earlier + TX's value before the earlier TX's toclient consumer ever runs. +- **Unidirectional rules split across packets.** If the producer and + consumer sit at different progress levels, the consumer may not run + until a later packet. In between, another TX on the same worker can + land and stomp on ``det_ctx->byte_values``. The detect engine won't + re-run the producer's inspect engine either — once it's done, its ID + is recorded in the sig's ``inspect_flags`` and it's skipped on resume. + +To keep values safe, the per-TX ``DetectEngineState`` carries its own +``byte_values`` array that mirrors ``det_ctx->byte_values``: + +- **Save.** After ``byte_extract`` or ``byte_math`` runs, the value is + copied into the TX's cache. +- **Restore.** At the top of each content inspection pass + (``recursion.count == 0``), the cached values are copied into + ``det_ctx->byte_values``. Each side keeps its own allocation so that + values remain stable across multiple packets. +- **Reset.** The cache is cleared on detect engine reload + (``ResetTxState``), since byte variable local IDs can shift with a new + ruleset. It's freed when the ``DetectEngineState`` is destroyed. + +The cache is on for any rule that uses byte variables, unidirectional +included. Save only fires when an extraction actually runs, and restore +early-returns on an empty cache, so rules that never touch byte +variables don't pay a measurable cost. + +Key functions: + +- ``DetectEngineStateSaveByteValue()`` in ``detect-engine-state.c`` +- ``DetectEngineStateRestoreByteValues()`` in ``detect-engine-state.c`` +- ``DetectEngineContentInspectionRestoreByteValues()`` in ``detect-engine-content-inspection.c`` +- ``DetectEngineStateSaveByteValueFromTx()`` in ``detect-engine-content-inspection.c`` + Work In Progress changes ======================== diff --git a/rust/sys/src/sys.rs b/rust/sys/src/sys.rs index aecaf19090..6706e22c41 100644 --- a/rust/sys/src/sys.rs +++ b/rust/sys/src/sys.rs @@ -956,6 +956,9 @@ pub type DetectEngineStateDirection = DetectEngineStateDirection_; #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct DetectEngineState_ { pub dir_state: [DetectEngineStateDirection; 2usize], + #[doc = " Storage for byte_extract values across buffers (direction-independent)"] + pub byte_values: *mut u64, + pub byte_values_len: u32, } impl Default for DetectEngineState_ { fn default() -> Self { diff --git a/src/detect-engine-content-inspection.c b/src/detect-engine-content-inspection.c index cb80f3bd7e..04f6a04875 100644 --- a/src/detect-engine-content-inspection.c +++ b/src/detect-engine-content-inspection.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2023 Open Information Security Foundation +/* Copyright (C) 2007-2026 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -30,9 +30,12 @@ #include "detect.h" #include "detect-engine.h" #include "detect-parse.h" +#include "detect-engine-state.h" #include "rust.h" +#include "app-layer-parser.h" + #include "detect-asn1.h" #include "detect-content.h" #include "detect-pcre.h" @@ -74,8 +77,51 @@ struct DetectEngineContentInspectionCtx { uint32_t count; const uint32_t limit; } recursion; + AppLayerTxData *tx_data; /**< cached TX data pointer for byte value save/restore */ }; +static inline AppLayerTxData *DetectEngineContentInspectionRestoreByteValues( + DetectEngineThreadCtx *det_ctx, Flow *f) +{ + if (unlikely(!(det_ctx->tx_id_set && f != NULL && f->alstate != NULL))) + return NULL; + + void *tx = AppLayerParserGetTx(f->proto, f->alproto, f->alstate, det_ctx->tx_id); + if (unlikely(tx == NULL)) + return NULL; + + AppLayerTxData *tx_data = AppLayerParserGetTxData(f->proto, f->alproto, tx); + if (tx_data != NULL && tx_data->de_state != NULL) { + DetectEngineStateRestoreByteValues(det_ctx, tx_data->de_state); + } + + return tx_data; +} + +static inline void DetectEngineStateSaveByteValueFromTx( + DetectEngineThreadCtx *det_ctx, Flow *f, AppLayerTxData **tx_data, uint32_t local_id) +{ + if (tx_data == NULL || det_ctx == NULL) + return; + + if (*tx_data == NULL && det_ctx->tx_id_set && f != NULL && f->alstate != NULL) { + void *tx = AppLayerParserGetTx(f->proto, f->alproto, f->alstate, det_ctx->tx_id); + if (tx != NULL) { + *tx_data = AppLayerParserGetTxData(f->proto, f->alproto, tx); + } + } + + if (*tx_data != NULL) { + if (unlikely((*tx_data)->de_state == NULL)) { + (*tx_data)->de_state = DetectEngineStateAlloc(); + } + if (likely((*tx_data)->de_state != NULL)) { + DetectEngineStateSaveByteValue((*tx_data)->de_state, local_id, + det_ctx->byte_values[local_id], det_ctx->byte_values_len); + } + } +} + /** * \brief Run the actual payload match functions * @@ -113,6 +159,15 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx, SCEnter(); KEYWORD_PROFILING_START; + /* Restore byte keyword values from detection state at the top level only. + * Needed for any rule where the producer engine runs on an earlier packet + * than the consumer engine (both bidirectional and unidirectional rules). + * ctx->tx_data is cached here so recursive frames reuse it without + * re-fetching the TX. */ + if (ctx->recursion.count == 0) { + ctx->tx_data = DetectEngineContentInspectionRestoreByteValues(det_ctx, f); + } + ctx->recursion.count++; if (unlikely(ctx->recursion.count == ctx->recursion.limit)) { KEYWORD_PROFILING_END(det_ctx, smd->type, 0); @@ -594,6 +649,8 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx, SCLogDebug("[BE] Fetched value for index %d: %"PRIu64, bed->local_id, det_ctx->byte_values[bed->local_id]); + /* Save byte keyword value to detection state for cross-buffer use. */ + DetectEngineStateSaveByteValueFromTx(det_ctx, f, &ctx->tx_data, bed->local_id); goto match; } else if (smd->type == DETECT_BYTEMATH) { @@ -630,6 +687,8 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx, SCLogDebug("[BM] Fetched value for index %d: %"PRIu64, bmd->local_id, det_ctx->byte_values[bmd->local_id]); + /* Save byte keyword value to detection state for cross-buffer use. */ + DetectEngineStateSaveByteValueFromTx(det_ctx, f, &ctx->tx_data, bmd->local_id); goto match; } else if (smd->type == DETECT_BSIZE) { diff --git a/src/detect-engine-state.c b/src/detect-engine-state.c index 2bd725ea05..371b1f2266 100644 --- a/src/detect-engine-state.c +++ b/src/detect-engine-state.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2021 Open Information Security Foundation +/* Copyright (C) 2007-2026 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -37,6 +37,7 @@ */ #include "suricata-common.h" +#include "util-validate.h" #include "decode.h" @@ -171,6 +172,7 @@ void SCDetectEngineStateFree(DetectEngineState *state) DeStateStore *store_next; int i = 0; + SCFree(state->byte_values); for (i = 0; i < 2; i++) { store = state->dir_state[i].head; while (store != NULL) { @@ -235,6 +237,67 @@ void DetectRunStoreStateTx( SCLogDebug("Stored for TX %"PRIu64, tx_id); } +/** + * \brief Save a byte keyword value to detection state for cross-buffer use. + * + * \param state The detection engine state (must not be NULL) + * \param local_id The parse-time sequential index of the byte variable + * \param value The extracted value to save + */ +void DetectEngineStateSaveByteValue( + DetectEngineState *state, uint32_t local_id, uint64_t value, uint32_t byte_values_len) +{ + DEBUG_VALIDATE_BUG_ON(state == NULL); + if (unlikely(state == NULL)) + return; + + DEBUG_VALIDATE_BUG_ON(local_id >= byte_values_len); + if (unlikely(local_id >= byte_values_len)) + return; + + /* Allocate the per-tx cache array on first use, matching det_ctx size */ + if (unlikely(state->byte_values == NULL)) { + state->byte_values = SCCalloc(byte_values_len, sizeof(uint64_t)); + if (unlikely(state->byte_values == NULL)) { + return; + } + state->byte_values_len = byte_values_len; + } + + state->byte_values[local_id] = value; + SCLogDebug("Saved byte value: local_id=%u value=%" PRIu64, local_id, value); +} + +/** + * \brief Retrieve byte keyword values from detection state + * + * \param det_ctx Thread detection context + * \param state The detection engine state + */ +void DetectEngineStateRestoreByteValues(DetectEngineThreadCtx *det_ctx, DetectEngineState *state) +{ + DEBUG_VALIDATE_BUG_ON(state == NULL || det_ctx == NULL); + if (unlikely(state == NULL || det_ctx == NULL)) + return; + + if (state->byte_values == NULL || state->byte_values_len == 0) + return; + + /* det_ctx->byte_values is preallocated at engine startup (detect-engine.c) + * and must never be NULL here. The sizes are always equal: the TX cache is + * allocated on first save using det_ctx->byte_values_len, and ResetTxState + * frees it on ruleset reload before any new save can occur with a different + * size. */ + DEBUG_VALIDATE_BUG_ON(det_ctx->byte_values == NULL); + DEBUG_VALIDATE_BUG_ON(state->byte_values_len != det_ctx->byte_values_len); + + /* Copy the per-tx cache into the worker scratch buffer. Each side keeps + * its own allocation so that values remain correct across multiple + * packets: a pointer swap would rotate ownership on every packet and + * leave each TX reading another TX's values on the second packet. */ + memcpy(det_ctx->byte_values, state->byte_values, state->byte_values_len * sizeof(uint64_t)); +} + static inline void ResetTxState(DetectEngineState *s) { if (s) { @@ -249,6 +312,14 @@ static inline void ResetTxState(DetectEngineState *s) s->dir_state[1].flags = 0; /* reset 'cur' back to the list head */ s->dir_state[1].cur = s->dir_state[1].head; + /* Free byte_values on reload: local_ids and byte_values_len may + * change with the new ruleset, so the next Save reallocates at + * the current det_ctx->byte_values_len. */ + if (s->byte_values != NULL) { + SCFree(s->byte_values); + s->byte_values = NULL; + s->byte_values_len = 0; + } } } diff --git a/src/detect-engine-state.h b/src/detect-engine-state.h index 5952776293..1dceaa8d80 100644 --- a/src/detect-engine-state.h +++ b/src/detect-engine-state.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 Open Information Security Foundation +/* Copyright (C) 2007-2026 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -34,7 +34,10 @@ #ifndef SURICATA_DETECT_ENGINE_STATE_H #define SURICATA_DETECT_ENGINE_STATE_H -// forward declaration for bindgen +/* forward declarations */ +typedef struct DetectEngineThreadCtx_ DetectEngineThreadCtx; + +/* forward declaration for bindgen */ #define SigIntId uint32_t #define DETECT_ENGINE_INSPECT_SIG_NO_MATCH 0 @@ -94,6 +97,10 @@ typedef struct DetectEngineStateDirection_ { typedef struct DetectEngineState_ { DetectEngineStateDirection dir_state[2]; + + /** Storage for byte_extract values across buffers (direction-independent) */ + uint64_t *byte_values; + uint32_t byte_values_len; } DetectEngineState; /** @@ -110,6 +117,11 @@ DetectEngineState *DetectEngineStateAlloc(void); */ void SCDetectEngineStateFree(DetectEngineState *state); +void DetectEngineStateSaveByteValue( + DetectEngineState *state, uint32_t local_id, uint64_t value, uint32_t byte_values_len); + +void DetectEngineStateRestoreByteValues(DetectEngineThreadCtx *det_ctx, DetectEngineState *state); + #endif /* SURICATA_DETECT_ENGINE_STATE_H */ /** diff --git a/src/detect-engine.c b/src/detect-engine.c index 66b9697627..9dc10328b8 100644 --- a/src/detect-engine.c +++ b/src/detect-engine.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2022 Open Information Security Foundation +/* Copyright (C) 2007-2026 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -3316,8 +3316,8 @@ static TmEcode ThreadCtxDoInit (DetectEngineCtx *de_ctx, DetectEngineThreadCtx * AlertQueueInit(det_ctx); /* byte_extract storage */ - det_ctx->byte_values = SCMalloc(sizeof(*det_ctx->byte_values) * - (de_ctx->byte_extract_max_local_id + 1)); + det_ctx->byte_values_len = de_ctx->byte_extract_max_local_id + 1; + det_ctx->byte_values = SCMalloc(sizeof(*det_ctx->byte_values) * det_ctx->byte_values_len); if (det_ctx->byte_values == NULL) { return TM_ECODE_FAILED; } diff --git a/src/detect.h b/src/detect.h index 75cdd016ea..2d4b9a3401 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2025 Open Information Security Foundation +/* Copyright (C) 2007-2026 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -1044,7 +1044,7 @@ typedef struct DetectEngineCtx_ { int32_t sgh_mpm_context_stream; /* the max local id used amongst all sigs */ - int32_t byte_extract_max_local_id; + uint32_t byte_extract_max_local_id; /** version of the detect engine. The version is incremented on reloads */ uint32_t version; @@ -1315,9 +1315,6 @@ typedef struct DetectEngineThreadCtx_ { * prototype held by DetectEngineCtx. */ SpmThreadCtx *spm_thread_ctx; - /* byte_* values */ - uint64_t *byte_values; - SigJsonContent *json_content; uint8_t json_content_capacity; uint8_t json_content_len; @@ -1383,6 +1380,10 @@ typedef struct DetectEngineThreadCtx_ { RuleMatchCandidateTx *tx_candidates; uint32_t tx_candidates_size; + /* byte_* values */ + uint32_t byte_values_len; /**< number of elements allocated in byte_values */ + uint64_t *byte_values; + MpmThreadCtx mtc; /**< thread ctx for the mpm */ /* work queue for post-rule matching affecting prefilter */ PostRuleMatchWorkQueue post_rule_work_queue;