#!/usr/bin/env python3 """Extract Suricata rule examples from documentation RST files. This script scans a documentation tree for ``.. container:: example-rule`` blocks, converts role markup such as ``:example-rule-emphasis:`any``` back to plain text, and try to load them with Suricata, outputing invalid rules """ from __future__ import annotations import argparse import re import shutil import subprocess import tempfile from pathlib import Path from typing import Iterable, Iterator, List, Optional, Tuple ROLE_RE = re.compile( r"`?:example-rule-(?:action|header|options|emphasis):`([^`]*)`" ) RuleWithOrigin = Tuple[str, Path, int] def indent_width(line: str) -> int: return len(line) - len(line.lstrip(" ")) def clean_rule_text(text: str) -> str: # Unescape custom role markup used by docs around rule fragments. cleaned = ROLE_RE.sub(r"\1", text) # In docs, trailing '\\' is often used to wrap long rules across lines. cleaned = re.sub(r"\\\s*\n\s*", " ", cleaned) # RST often escapes pipe characters in examples. cleaned = cleaned.replace("\\|", "|") cleaned = cleaned.replace("\\*", "*") cleaned = re.sub(r"\s+", " ", cleaned) return cleaned.strip() def collect_container_body(lines: List[str], start_idx: int) -> Tuple[str, int]: container_indent = indent_width(lines[start_idx]) body_lines: List[str] = [] i = start_idx + 1 while i < len(lines): line = lines[i] if line.strip() == "": body_lines.append("") i += 1 continue if indent_width(line) <= container_indent: break body_lines.append(line) i += 1 non_empty = [line for line in body_lines if line.strip()] if non_empty: min_indent = min(indent_width(line) for line in non_empty) dedented = [line[min_indent:] if line.strip() else "" for line in body_lines] else: dedented = [] return "\n".join(dedented).strip(), i def extract_rules_from_rst(path: Path) -> Iterator[Tuple[str, int]]: lines = path.read_text(encoding="utf-8").splitlines() i = 0 while i < len(lines): if lines[i].strip() == ".. container:: example-rule": block_text, i = collect_container_body(lines, i) if block_text: cleaned = clean_rule_text(block_text) yield cleaned, i continue i += 1 def iter_rst_files(path: Path) -> Iterable[Path]: if path.is_file() and path.suffix == ".rst": return [path] if path.is_dir(): return sorted(path.rglob("*.rst")) return [] def resolve_suricata_bin(repo_root: Path, configured: Optional[str]) -> Path: if configured: return Path(configured) in_path = shutil.which("suricata") if in_path: return Path(in_path) candidates = [repo_root / "src" / "suricata", repo_root / "suricata"] for candidate in candidates: if candidate.exists(): return candidate raise SystemExit( "Unable to find Suricata binary. Use --suricata-bin to provide it." ) def check_rule_with_suricata( rule: str, suricata_bin: Path, suricata_yaml: Path, ) -> Tuple[bool, str]: with tempfile.TemporaryDirectory(prefix="doc-rule-check-") as tmpdir: rule_file = Path(tmpdir) / "rule.rules" rule_file.write_text(rule + "\n", encoding="utf-8") cmd = [ str(suricata_bin), "-T", "-c", str(suricata_yaml), "--data-dir="+tmpdir, "-S", str(rule_file), '--strict-rule-keywords=all', "-l", tmpdir, ] proc = subprocess.run( cmd, check=False, capture_output=True, text=True, ) combined = proc.stderr.strip() return proc.returncode == 0, combined def main() -> int: parser = argparse.ArgumentParser( description="Check Suricata rules from doc RST example-rule containers." ) parser.add_argument( "doc_path", nargs="?", default="doc", help="Path to doc directory or .rst file (default: doc)", ) parser.add_argument( "--suricata-bin", default=None, help="Path to Suricata binary (default: auto-detect)", ) parser.add_argument( "--suricata-yaml", default=None, help="Path to suricata.yaml (default: /suricata.yaml)", ) args = parser.parse_args() doc_path = Path(args.doc_path) if not doc_path.exists(): raise SystemExit(f"Invalid doc path: {doc_path}") repo_root = Path(__file__).resolve().parents[1] suricata_bin = resolve_suricata_bin(repo_root, args.suricata_bin) suricata_yaml = ( Path(args.suricata_yaml) if args.suricata_yaml else (repo_root / "scripts" / "docrules" / "docrules.yaml") ) if not suricata_yaml.exists(): raise SystemExit( f"suricata.yaml not found: {suricata_yaml}. Use --suricata-yaml." ) rules_with_origin: List[RuleWithOrigin] = [] for rst_file in iter_rst_files(doc_path): for rule, line_number in extract_rules_from_rst(rst_file): rules_with_origin.append((rule, rst_file, line_number)) invalid_rules = 0 for index, (rule, source_file, line_number) in enumerate(rules_with_origin, start=1): is_valid, output_text = check_rule_with_suricata( rule, suricata_bin, suricata_yaml, ) if not is_valid: print( ( f"Invalid rule at #{index} ({source_file}:{line_number})\n" f"Rule: {rule}\n" f"Suricata stderr:\n{output_text}\n" ), end="\n", ) invalid_rules = invalid_rules + 1 if invalid_rules: print( f"Found {invalid_rules} invalid rule(s) out of {len(rules_with_origin)} checked.", end="\n\n", ) return 1 print( f"Found no invalid rule out of {len(rules_with_origin)} checked.", end="\n\n", ) return 0 if __name__ == "__main__": raise SystemExit(main())