ipfw: migrate ipfw to 32-bit size rule numbers

This changes ABI due to the changed opcodes and includes the
following:
 * rule numbers and named object indexes converted to 32-bits
 * all hardcoded maximum rule number was replaced with
   IPFW_DEFAULT_RULE macro
 * now it is possible to grow maximum numbers or rules in
   build time
 * several opcodes converted to ipfw_insn_u32 to keep rulenum:
   O_CALL, O_SKIPTO
 * call stack modified to keep u32 rulenum. The behaviour of
   O_CALL opcode was changed to avoid possible packets looping.
   Now when call stack is overflowed or mbuf tag allocation
   failed, a packet will be dropped instead of skipping to next
   rule.
 * 'return' action now have two modes to specify return point:
   'next-rulenum' and 'next-rule'
 * new lookup key added for O_IP_DST_LOOKUP opcode 'lookup rulenum'
 * several opcodes converted to keep u32 named object indexes
   in special structure ipfw_insn_kidx
 * tables related opcodes modified to use two structures:
   ipfw_insn_kidx and ipfw_insn_table
 * added ability for table value matching for specific value type
   in 'table(name,valtype=value)' opcode
 * dynamic states and eaction code converted to use u32 rulenum
   and named objects indexes
 * added insntod() and insntoc() macros to cast to specific
   ipfw instruction type
 * default sockopt version was changed to IP_FW3_OPVER=1
 * FreeBSD 7-11 rule format support was removed
 * added ability to generate special rtsock messages via log opcode
 * added IP_FW_SKIPTO_CACHE sockopt to enable/disable skipto cache.
   It helps to reduce overhead when many rules are modified in batch.
 * added ability to keep NAT64LSN states during sets swapping

Obtained from:	Yandex LLC
Relnotes:	yes
Sponsored by:	Yandex LLC
Differential Revision:	https://reviews.freebsd.org/D46183
This commit is contained in:
Andrey V. Elsukov 2025-03-03 21:15:17 +03:00
parent d4c81623ac
commit 4a77657cbc
35 changed files with 3418 additions and 3261 deletions

View file

@ -1,5 +1,5 @@
.\"
.Dd December 6, 2024
.Dd March 3, 2025
.Dt IPFW 8
.Os
.Sh NAME
@ -40,10 +40,10 @@ in-kernel NAT.
.Ss SYSCTL SHORTCUTS
.Nm
.Cm enable
.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive | skipto_cache
.Nm
.Cm disable
.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive | skipto_cache
.Ss LOOKUP TABLES
.Nm
.Oo Cm set Ar N Oc Cm table Ar name Cm create Ar create-options
@ -1395,6 +1395,16 @@ Matches any IPv4 or IPv6 address for which an entry exists in the lookup table
If an optional 32-bit unsigned
.Ar value
is also specified, an entry will match only if it has this value.
If
.Ar value
is specified in form
.Ar valtype=value ,
then specified value type field will be checked.
It can be
.Ar skipto, pipe, fib, nat, dscp, tag, divert, netgraph, limit, nh4
and
.Ar mark.
See the
.Sx LOOKUP TABLES
section below for more information on lookup tables.
@ -1865,7 +1875,7 @@ One or more
of source and destination addresses and ports can be
specified.
.It Cm lookup Bro Cm dst-ip | dst-port | dst-mac | src-ip | src-port | src-mac | uid |
.Cm jail | dscp | mark Brc Ar name
.Cm jail | dscp | mark | rulenum Brc Ar name
Search an entry in lookup table
.Ar name
that matches the field specified as argument.

File diff suppressed because it is too large Load diff

View file

@ -305,6 +305,8 @@ enum tokens {
TOK_LOGOFF,
TOK_PRIVATE,
TOK_PRIVATEOFF,
TOK_SWAPCONF,
TOK_SWAPCONFOFF,
/* NAT64 CLAT tokens */
TOK_NAT64CLAT,
@ -463,5 +465,5 @@ int table_check_name(const char *tablename);
void ipfw_list_ta(int ac, char *av[]);
void ipfw_list_values(int ac, char *av[]);
void table_fill_ntlv(struct _ipfw_obj_ntlv *ntlv, const char *name,
uint8_t set, uint16_t uidx);
uint8_t set, uint32_t uidx);

View file

@ -380,6 +380,8 @@ static struct _s_x nat64newcmds[] = {
{ "-log", TOK_LOGOFF },
{ "allow_private", TOK_PRIVATE },
{ "-allow_private", TOK_PRIVATEOFF },
{ "swap_conf", TOK_SWAPCONF },
{ "-swap_conf", TOK_SWAPCONFOFF },
/* for compatibility with old configurations */
{ "max_ports", TOK_MAX_PORTS }, /* unused */
{ NULL, 0 }
@ -514,6 +516,12 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
case TOK_PRIVATEOFF:
cfg->flags &= ~NAT64_ALLOW_PRIVATE;
break;
case TOK_SWAPCONF:
cfg->flags |= NAT64LSN_ALLOW_SWAPCONF;
break;
case TOK_SWAPCONFOFF:
cfg->flags &= ~NAT64LSN_ALLOW_SWAPCONF;
break;
}
}
@ -631,6 +639,12 @@ nat64lsn_config(const char *name, uint8_t set, int ac, char **av)
case TOK_PRIVATEOFF:
cfg->flags &= ~NAT64_ALLOW_PRIVATE;
break;
case TOK_SWAPCONF:
cfg->flags |= NAT64LSN_ALLOW_SWAPCONF;
break;
case TOK_SWAPCONFOFF:
cfg->flags &= ~NAT64LSN_ALLOW_SWAPCONF;
break;
default:
errx(EX_USAGE, "Can't change %s option", opt);
}
@ -796,6 +810,8 @@ nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
printf(" icmp_age %u", cfg->st_icmp_ttl);
if (g_co.verbose || cfg->jmaxlen != NAT64LSN_JMAXLEN)
printf(" jmaxlen %u", cfg->jmaxlen);
if (cfg->flags & NAT64LSN_ALLOW_SWAPCONF)
printf(" swap_conf");
if (cfg->flags & NAT64_LOG)
printf(" log");
if (cfg->flags & NAT64_ALLOW_PRIVATE)

View file

@ -312,7 +312,7 @@ ipfw_table_handler(int ac, char *av[])
void
table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set,
uint16_t uidx)
uint32_t uidx)
{
ntlv->head.type = IPFW_TLV_TBL_NAME;

View file

@ -1663,6 +1663,7 @@ static const char *const msgtypes[] = {
"RTM_DELMADDR: multicast group membership removed from iface",
"RTM_IFANNOUNCE: interface arrival/departure",
"RTM_IEEE80211: IEEE 802.11 wireless event",
"RTM_IPFWLOG: IPFW log",
};
static const char metricnames[] =

View file

@ -295,6 +295,7 @@ struct rt_msghdr {
#define RTM_DELMADDR 0x10 /* (4) mcast group membership being deleted */
#define RTM_IFANNOUNCE 0x11 /* (5) iface arrival/departure */
#define RTM_IEEE80211 0x12 /* (5) IEEE80211 wireless event */
#define RTM_IPFWLOG 0x13 /* (1) IPFW rule match log event */
#endif /* NETLINK_COMPAT*/

View file

@ -75,6 +75,10 @@ typedef struct _ip_fw3_opheader {
uint16_t reserved[2]; /* Align to 64-bit boundary */
} ip_fw3_opheader;
#define IP_FW3_OPVER_0 0
#define IP_FW3_OPVER_1 1 /* 32bit rulenum */
#define IP_FW3_OPVER IP_FW3_OPVER_1
/* IP_FW3 opcodes */
#define IP_FW_TABLE_XADD 86 /* add entry */
#define IP_FW_TABLE_XDEL 87 /* delete entry */
@ -109,6 +113,7 @@ typedef struct _ip_fw3_opheader {
#define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */
#define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */
#define IP_FW_SKIPTO_CACHE 118 /* Manage skipto cache */
#define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */
#define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */
@ -211,8 +216,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_VERREVPATH, /* none */
O_VERSRCREACH, /* none */
O_PROBE_STATE, /* none */
O_KEEP_STATE, /* none */
O_PROBE_STATE, /* v0:arg1=kidx, v1:kidx=kidx */
O_KEEP_STATE, /* v0:arg1=kidx, v1:kidx=kidx */
O_LIMIT, /* ipfw_insn_limit */
O_LIMIT_PARENT, /* dyn_type, not an opcode. */
@ -223,12 +228,13 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_LOG, /* ipfw_insn_log */
O_PROB, /* u32 = match probability */
O_CHECK_STATE, /* none */
O_CHECK_STATE, /* v0:arg1=kidx, v1:kidx=kidx */
O_ACCEPT, /* none */
O_DENY, /* none */
O_REJECT, /* arg1=icmp arg (same as deny) */
O_COUNT, /* none */
O_SKIPTO, /* arg1=next rule number */
O_SKIPTO, /* v0:arg1=next rule number */
/* v1:kidx= next rule number */
O_PIPE, /* arg1=pipe number */
O_QUEUE, /* arg1=queue number */
O_DIVERT, /* arg1=port number */
@ -242,8 +248,10 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
* More opcodes.
*/
O_IPSEC, /* has ipsec history */
O_IP_SRC_LOOKUP, /* arg1=table number, u32=value */
O_IP_SRC_LOOKUP, /* v0:arg1=table number, u32=value */
/* v1:kidx=name, u32=value, arg1=key */
O_IP_DST_LOOKUP, /* arg1=table number, u32=value */
/* v1:kidx=name, u32=value, arg1=key */
O_ANTISPOOF, /* none */
O_JAIL, /* u32 = id */
O_ALTQ, /* u32 = altq classif. qid */
@ -278,23 +286,27 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_SOCKARG, /* socket argument */
O_CALLRETURN, /* arg1=called rule number */
O_CALLRETURN, /* v0:arg1=called rule number */
/* v1:kidx=called rule number */
O_FORWARD_IP6, /* fwd sockaddr_in6 */
O_DSCP, /* 2 u32 = DSCP mask */
O_SETDSCP, /* arg1=DSCP value */
O_IP_FLOW_LOOKUP, /* arg1=table number, u32=value */
O_IP_FLOW_LOOKUP, /* v0:arg1=table number, u32=value */
/* v1:kidx=name, u32=value */
O_EXTERNAL_ACTION, /* arg1=id of external action handler */
O_EXTERNAL_INSTANCE, /* arg1=id of eaction handler instance */
O_EXTERNAL_ACTION, /* v0:arg1=id of external action handler */
/* v1:kidx=id of external action handler */
O_EXTERNAL_INSTANCE, /* v0:arg1=id of eaction handler instance */
/* v1:kidx=id of eaction handler instance */
O_EXTERNAL_DATA, /* variable length data */
O_SKIP_ACTION, /* none */
O_TCPMSS, /* arg1=MSS value */
O_MAC_SRC_LOOKUP, /* arg1=table number, u32=value */
O_MAC_DST_LOOKUP, /* arg1=table number, u32=value */
O_MAC_SRC_LOOKUP, /* kidx=name, u32=value, arg1=key */
O_MAC_DST_LOOKUP, /* kidx=name, u32=value, arg1=key */
O_SETMARK, /* u32 = value */
O_MARK, /* 2 u32 = value, bitmask */
@ -302,22 +314,6 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_LAST_OPCODE /* not an opcode! */
};
/*
* Defines key types used by lookup instruction
*/
enum ipfw_table_lookup_type {
LOOKUP_DST_IP,
LOOKUP_SRC_IP,
LOOKUP_DST_PORT,
LOOKUP_SRC_PORT,
LOOKUP_UID,
LOOKUP_JAIL,
LOOKUP_DSCP,
LOOKUP_DST_MAC,
LOOKUP_SRC_MAC,
LOOKUP_MARK,
};
/*
* The extension header are filtered only for presence using a bit
* vector with a flag for each header.
@ -392,6 +388,11 @@ typedef struct _ipfw_insn_u32 {
u_int32_t d[1]; /* one or more */
} ipfw_insn_u32;
typedef struct _ipfw_insn_kidx {
ipfw_insn o;
uint32_t kidx;
} ipfw_insn_kidx;
/*
* This is used to store IP addr-mask pairs.
*/
@ -401,6 +402,47 @@ typedef struct _ipfw_insn_ip {
struct in_addr mask;
} ipfw_insn_ip;
typedef struct _ipfw_insn_table {
ipfw_insn o; /* arg1 is optional lookup key */
uint32_t kidx; /* table name index */
uint32_t value; /* table value */
} ipfw_insn_table;
#define IPFW_LOOKUP_TYPE_MASK 0x00FF
#define IPFW_LOOKUP_TYPE(insn) ((insn)->arg1 & IPFW_LOOKUP_TYPE_MASK)
#define IPFW_SET_LOOKUP_TYPE(insn, type) do { \
(insn)->arg1 &= ~IPFW_LOOKUP_TYPE_MASK; \
(insn)->arg1 |= (type) & IPFW_LOOKUP_TYPE_MASK; \
} while (0)
/*
* Defines key types used by lookup instruction
*/
enum ipfw_table_lookup_type {
LOOKUP_NONE = 0,
LOOKUP_DST_IP,
LOOKUP_SRC_IP,
LOOKUP_DST_PORT,
LOOKUP_SRC_PORT,
LOOKUP_UID,
LOOKUP_JAIL,
LOOKUP_DSCP,
LOOKUP_DST_MAC,
LOOKUP_SRC_MAC,
LOOKUP_MARK,
LOOKUP_RULENUM,
};
enum ipfw_return_type {
RETURN_NEXT_RULENUM = 0,
RETURN_NEXT_RULE,
};
enum ipfw_skipto_cache_op {
SKIPTO_CACHE_DISABLE = 0,
SKIPTO_CACHE_ENABLE,
};
/*
* This is used to forward to a given address (ip).
*/
@ -434,7 +476,8 @@ typedef struct _ipfw_insn_if {
union {
struct in_addr ip;
int glob;
uint16_t kidx;
uint16_t kidx_v0;
uint32_t kidx;
} p;
char name[IFNAMSIZ];
} ipfw_insn_if;
@ -452,6 +495,7 @@ typedef struct _ipfw_insn_altq {
*/
typedef struct _ipfw_insn_limit {
ipfw_insn o;
u_int32_t kidx;
u_int8_t _pad;
u_int8_t limit_mask; /* combination of DYN_* below */
#define DYN_SRC_ADDR 0x1
@ -462,6 +506,9 @@ typedef struct _ipfw_insn_limit {
u_int16_t conn_limit;
} ipfw_insn_limit;
/* MAC/InfiniBand/etc address length */
#define IPFW_MAX_L2_ADDR_LEN 20
/*
* This is used for log instructions.
*/
@ -471,6 +518,22 @@ typedef struct _ipfw_insn_log {
u_int32_t log_left; /* how many left to log */
} ipfw_insn_log;
/* ipfw_insn_log->o.arg1 bitmasks */
#define IPFW_LOG_DEFAULT 0x0000
#define IPFW_LOG_SYSLOG (1 << 15)
#define IPFW_LOG_IPFW0 (1 << 14)
#define IPFW_LOG_RTSOCK (1 << 13)
typedef struct _ipfwlog_rtsock_hdr_v2 {
uint32_t rulenum;
uint32_t tablearg;
ipfw_insn cmd;
u_char ether_shost[IPFW_MAX_L2_ADDR_LEN];
u_char ether_dhost[IPFW_MAX_L2_ADDR_LEN];
uint32_t mark;
char comment[0];
} ipfwlog_rtsock_hdr_v2;
/* Legacy NAT structures, compat only */
#ifndef _KERNEL
/*
@ -604,6 +667,10 @@ typedef struct _ipfw_insn_icmp6 {
*/
} ipfw_insn_icmp6;
/* Convert pointer to instruction with specified type */
#define insntod(p, type) ((ipfw_insn_ ## type *)(p))
#define insntoc(p, type) ((const ipfw_insn_ ## type *)(p))
/*
* Here we have the structure representing an ipfw rule.
*
@ -719,30 +786,29 @@ struct ipfw_flow_id {
/*
* Dynamic ipfw rule.
*/
typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
#define IPFW_DYN_ORPHANED 0x40000 /* state's parent rule was deleted */
struct _ipfw_dyn_rule {
ipfw_dyn_rule *next; /* linked list of rules. */
struct ip_fw *rule; /* pointer to rule */
/* 'rule' is used to pass up the rule number (from the parent) */
ipfw_dyn_rule *parent; /* pointer to parent rule */
u_int64_t pcnt; /* packet match counter */
u_int64_t bcnt; /* byte match counter */
typedef struct _ipfw_dyn_rule {
struct ipfw_flow_id id; /* (masked) flow id */
u_int32_t expire; /* expire time */
u_int32_t bucket; /* which bucket in hash table */
u_int32_t state; /* state of this rule (typically a
uint8_t set;
uint8_t type; /* rule type */
uint16_t pad;
uint32_t expire; /* expire time */
uint32_t rulenum; /* parent's rule number */
uint32_t kidx; /* index of named object */
uint64_t pcnt; /* packet match counter */
uint64_t bcnt; /* byte match counter */
uint32_t hashval; /* hash value */
union {
uint32_t state; /* state of this rule (typically a
* combination of TCP flags)
*/
#define IPFW_DYN_ORPHANED 0x40000 /* state's parent rule was deleted */
u_int32_t ack_fwd; /* most recent ACKs in forward */
u_int32_t ack_rev; /* and reverse directions (used */
uint32_t count; /* number of linked states */
};
uint32_t ack_fwd; /* most recent ACKs in forward */
uint32_t ack_rev; /* and reverse directions (used */
/* to generate keepalives) */
u_int16_t dyn_type; /* rule type */
u_int16_t count; /* refcount */
u_int16_t kidx; /* index of named object */
} __packed __aligned(8);
} __packed __aligned(8) ipfw_dyn_rule;
/*
* Definitions for IP option names.
@ -794,16 +860,6 @@ struct _ipfw_dyn_rule {
#define IPFW_VTYPE_NH6 0x00000400 /* IPv6 nexthop */
#define IPFW_VTYPE_MARK 0x00000800 /* [fw]mark */
/* MAC/InfiniBand/etc address length */
#define IPFW_MAX_L2_ADDR_LEN 20
typedef struct _ipfw_table_entry {
in_addr_t addr; /* network address */
u_int32_t value; /* value */
u_int16_t tbl; /* table number */
u_int8_t masklen; /* mask length */
} ipfw_table_entry;
typedef struct _ipfw_table_xentry {
uint16_t len; /* Total entry length */
uint8_t type; /* entry type */
@ -819,13 +875,6 @@ typedef struct _ipfw_table_xentry {
} ipfw_table_xentry;
#define IPFW_TCF_INET 0x01 /* CIDR flags: IPv4 record */
typedef struct _ipfw_table {
u_int32_t size; /* size of entries in bytes */
u_int32_t cnt; /* # of entries */
u_int16_t tbl; /* table number */
ipfw_table_entry ent[0]; /* entries */
} ipfw_table;
typedef struct _ipfw_xtable {
ip_fw3_opheader opheader; /* IP_FW3 opcode */
uint32_t size; /* size of entries in bytes */
@ -865,10 +914,10 @@ typedef struct _ipfw_obj_data {
/* Object name TLV */
typedef struct _ipfw_obj_ntlv {
ipfw_obj_tlv head; /* TLV header */
uint16_t idx; /* Name index */
uint32_t idx; /* Name index */
uint8_t set; /* set, if applicable */
uint8_t type; /* object type, if applicable */
uint32_t spare; /* unused */
uint16_t spare; /* unused */
char name[64]; /* Null-terminated name */
} ipfw_obj_ntlv;
@ -891,19 +940,40 @@ struct tflow_entry {
} a;
};
#define IPFW_TVALUE_TYPE_MASK 0xFF00
#define IPFW_TVALUE_TYPE(insn) (((insn)->arg1 & IPFW_TVALUE_TYPE_MASK) >> 8)
#define IPFW_SET_TVALUE_TYPE(insn, type) do { \
(insn)->arg1 &= ~IPFW_TVALUE_TYPE_MASK; \
(insn)->arg1 |= ((type) << 8) & IPFW_TVALUE_TYPE_MASK; \
} while (0)
enum ipfw_table_value_type {
TVALUE_TAG = 0,
TVALUE_PIPE,
TVALUE_DIVERT,
TVALUE_SKIPTO,
TVALUE_NETGRAPH,
TVALUE_FIB,
TVALUE_NAT,
TVALUE_NH4,
TVALUE_DSCP,
TVALUE_LIMIT,
TVALUE_MARK,
};
/* 64-byte structure representing multi-field table value */
typedef struct _ipfw_table_value {
uint32_t tag; /* O_TAG/O_TAGGED */
uint32_t pipe; /* O_PIPE/O_QUEUE */
uint16_t pipe; /* O_PIPE/O_QUEUE */
uint16_t divert; /* O_DIVERT/O_TEE */
uint16_t skipto; /* skipto, CALLRET */
uint32_t skipto; /* skipto, CALLRET */
uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */
uint32_t fib; /* O_SETFIB */
uint32_t nat; /* O_NAT */
uint32_t nh4;
uint16_t fib; /* O_SETFIB */
uint8_t dscp;
uint8_t spare0;
uint16_t kidx; /* value kernel index */
uint32_t kidx; /* value kernel index */
struct in6_addr nh6;
uint32_t limit; /* O_LIMIT */
uint32_t zoneid; /* scope zone id for nh6 */
@ -918,8 +988,7 @@ typedef struct _ipfw_obj_tentry {
uint8_t masklen; /* mask length */
uint8_t result; /* request result */
uint8_t spare0;
uint16_t idx; /* Table name index */
uint16_t spare1;
uint32_t idx; /* Table name index */
union {
/* Longest field needs to be aligned by 8-byte boundary */
struct in_addr addr; /* IPv4 address */
@ -966,8 +1035,8 @@ typedef struct _ipfw_obj_ctlv {
typedef struct _ipfw_range_tlv {
ipfw_obj_tlv head; /* TLV header */
uint32_t flags; /* Range flags */
uint16_t start_rule; /* Range start */
uint16_t end_rule; /* Range end */
uint32_t start_rule; /* Range start */
uint32_t end_rule; /* Range end */
uint32_t set; /* Range set to match */
uint32_t new_set; /* New set to move/swap to */
} ipfw_range_tlv;
@ -1051,10 +1120,16 @@ typedef struct _ipfw_ta_info {
uint64_t spare1;
} ipfw_ta_info;
typedef struct _ipfw_cmd_header { /* control command header */
ip_fw3_opheader opheader; /* IP_FW3 opcode */
uint32_t size; /* Total size (incl. header) */
uint32_t cmd; /* command */
} ipfw_cmd_header;
typedef struct _ipfw_obj_header {
ip_fw3_opheader opheader; /* IP_FW3 opcode */
uint32_t spare;
uint16_t idx; /* object name index */
uint32_t idx; /* object name index */
uint16_t spare;
uint8_t objtype; /* object type */
uint8_t objsubtype; /* object subtype */
ipfw_obj_ntlv ntlv; /* object name tlv */

View file

@ -84,9 +84,9 @@ struct ipfw_nat64lsn_stats {
uint64_t spgcreated; /* Number of portgroups created */
uint64_t spgdeleted; /* Number of portgroups deleted */
uint64_t hostcount; /* Number of hosts */
uint64_t tcpchunks; /* Number of TCP chunks */
uint64_t udpchunks; /* Number of UDP chunks */
uint64_t icmpchunks; /* Number of ICMP chunks */
uint64_t tcpchunks; /* Number of TCP portgroups */
uint64_t udpchunks; /* Number of UDP portgroups */
uint64_t icmpchunks; /* Number of ICMP portgroups */
uint64_t _reserved[4];
};
@ -95,6 +95,10 @@ struct ipfw_nat64lsn_stats {
#define NAT64_ALLOW_PRIVATE 0x0002 /* Allow private IPv4 address
* translation
*/
#define NAT64LSN_ALLOW_SWAPCONF 0x0004 /* Allow configuration exchange
* between NAT64LSN instances
* during the sets swapping.
*/
typedef struct _ipfw_nat64stl_cfg {
char name[64]; /* NAT name */
ipfw_obj_ntlv ntlv6; /* object name tlv */

View file

@ -144,16 +144,15 @@ VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */
/* Use 128 tables by default */
static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
static int jump_lookup_pos(struct ip_fw_chain *chain, struct ip_fw *f, int num,
int tablearg, int jump_backwards);
#ifndef LINEAR_SKIPTO
static int jump_cached(struct ip_fw_chain *chain, struct ip_fw *f, int num,
int tablearg, int jump_backwards);
#define JUMP(ch, f, num, targ, back) jump_cached(ch, f, num, targ, back)
#ifndef IPFIREWALL_LINEAR_SKIPTO
VNET_DEFINE(int, skipto_cache) = 0;
#else
#define JUMP(ch, f, num, targ, back) jump_lookup_pos(ch, f, num, targ, back)
VNET_DEFINE(int, skipto_cache) = 1;
#endif
static uint32_t jump(struct ip_fw_chain *chain, struct ip_fw *f,
uint32_t num, int tablearg, bool jump_backwards);
/*
* Each rule belongs to one of 32 different sets (0..31).
* The variable set_disable contains one bit per set.
@ -207,6 +206,9 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
"Set upper limit of matches of ipfw rules logged");
SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, skipto_cache,
CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(skipto_cache), 0,
"Status of linear skipto cache: 1 - enabled, 0 - disabled.");
SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
&dummy_def, 0,
"The default/max possible rule number.");
@ -1228,38 +1230,36 @@ set_match(struct ip_fw_args *args, int slot,
args->flags |= IPFW_ARGS_REF;
}
static int
jump_lookup_pos(struct ip_fw_chain *chain, struct ip_fw *f, int num,
int tablearg, int jump_backwards)
static uint32_t
jump_lookup_pos(struct ip_fw_chain *chain, struct ip_fw *f, uint32_t num,
int tablearg, bool jump_backwards)
{
int f_pos, i;
/*
* Make sure we do not jump backward.
*/
i = IP_FW_ARG_TABLEARG(chain, num, skipto);
/* make sure we do not jump backward */
if (jump_backwards == 0 && i <= f->rulenum)
if (!jump_backwards && i <= f->rulenum)
i = f->rulenum + 1;
#ifndef LINEAR_SKIPTO
if (chain->idxmap != NULL)
f_pos = chain->idxmap[i];
else
if (V_skipto_cache == 0)
f_pos = ipfw_find_rule(chain, i, 0);
#else
f_pos = chain->idxmap[i];
#endif /* LINEAR_SKIPTO */
else {
/*
* Make sure we do not do out of bounds access.
*/
if (i >= IPFW_DEFAULT_RULE)
i = IPFW_DEFAULT_RULE - 1;
f_pos = chain->idxmap[i];
}
return (f_pos);
}
#ifndef LINEAR_SKIPTO
/*
* Helper function to enable cached rule lookups using
* cache.id and cache.pos fields in ipfw rule.
*/
static int
jump_cached(struct ip_fw_chain *chain, struct ip_fw *f, int num,
int tablearg, int jump_backwards)
static uint32_t
jump(struct ip_fw_chain *chain, struct ip_fw *f, uint32_t num,
int tablearg, bool jump_backwards)
{
int f_pos;
@ -1304,9 +1304,54 @@ jump_cached(struct ip_fw_chain *chain, struct ip_fw *f, int num,
#endif /* !__LP64__ */
return (f_pos);
}
#endif /* !LINEAR_SKIPTO */
#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f)
static inline int
tvalue_match(struct ip_fw_chain *ch, const ipfw_insn_table *cmd,
uint32_t tablearg)
{
uint32_t tvalue;
switch (IPFW_TVALUE_TYPE(&cmd->o)) {
case TVALUE_PIPE:
tvalue = TARG_VAL(ch, tablearg, pipe);
break;
case TVALUE_DIVERT:
tvalue = TARG_VAL(ch, tablearg, divert);
break;
case TVALUE_SKIPTO:
tvalue = TARG_VAL(ch, tablearg, skipto);
break;
case TVALUE_NETGRAPH:
tvalue = TARG_VAL(ch, tablearg, netgraph);
break;
case TVALUE_FIB:
tvalue = TARG_VAL(ch, tablearg, fib);
break;
case TVALUE_NAT:
tvalue = TARG_VAL(ch, tablearg, nat);
break;
case TVALUE_NH4:
tvalue = TARG_VAL(ch, tablearg, nh4);
break;
case TVALUE_DSCP:
tvalue = TARG_VAL(ch, tablearg, dscp);
break;
case TVALUE_LIMIT:
tvalue = TARG_VAL(ch, tablearg, limit);
break;
case TVALUE_MARK:
tvalue = TARG_VAL(ch, tablearg, mark);
break;
case TVALUE_TAG:
default:
tvalue = TARG_VAL(ch, tablearg, tag);
break;
}
return (tvalue == cmd->value);
}
/*
* The main check routine for the firewall.
*
@ -1381,8 +1426,8 @@ ipfw_chk(struct ip_fw_args *args)
#else
struct ucred *ucred_cache = NULL;
#endif
uint32_t f_pos = 0; /* index of current rule in the array */
int ucred_lookup = 0;
int f_pos = 0; /* index of current rule in the array */
int retval = 0;
struct ifnet *oif, *iif;
@ -2045,104 +2090,133 @@ do { \
break;
case O_IP_DST_LOOKUP:
{
if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
void *pkey;
uint32_t vidx, key;
uint16_t keylen = 0; /* zero if can't match the packet */
if (IPFW_LOOKUP_TYPE(cmd) != LOOKUP_NONE) {
void *pkey = NULL;
uint32_t key, vidx;
uint16_t keylen = 0; /* zero if can't match the packet */
uint8_t lookup_type;
/* Determine lookup key type */
vidx = ((ipfw_insn_u32 *)cmd)->d[1];
switch (vidx) {
case LOOKUP_DST_IP:
case LOOKUP_SRC_IP:
/* Need IP frame */
if (is_ipv6 == 0 && is_ipv4 == 0)
break;
if (vidx == LOOKUP_DST_IP)
pkey = is_ipv6 ?
(void *)&args->f_id.dst_ip6:
(void *)&dst_ip;
lookup_type = IPFW_LOOKUP_TYPE(cmd);
switch (lookup_type) {
case LOOKUP_DST_IP:
case LOOKUP_SRC_IP:
if (is_ipv4) {
keylen = sizeof(in_addr_t);
if (lookup_type == LOOKUP_DST_IP)
pkey = &dst_ip;
else
pkey = is_ipv6 ?
(void *)&args->f_id.src_ip6:
(void *)&src_ip;
keylen = is_ipv6 ?
sizeof(struct in6_addr):
sizeof(in_addr_t);
break;
case LOOKUP_DST_PORT:
case LOOKUP_SRC_PORT:
/* Need IP frame */
if (is_ipv6 == 0 && is_ipv4 == 0)
break;
/* Skip fragments */
if (offset != 0)
break;
/* Skip proto without ports */
if (proto != IPPROTO_TCP &&
proto != IPPROTO_UDP &&
proto != IPPROTO_UDPLITE &&
proto != IPPROTO_SCTP)
break;
key = vidx == LOOKUP_DST_PORT ?
dst_port:
src_port;
pkey = &key;
keylen = sizeof(key);
break;
case LOOKUP_UID:
case LOOKUP_JAIL:
check_uidgid(
(ipfw_insn_u32 *)cmd,
args, &ucred_lookup,
&ucred_cache);
key = vidx == LOOKUP_UID ?
ucred_cache->cr_uid:
ucred_cache->cr_prison->pr_id;
pkey = &key;
keylen = sizeof(key);
break;
case LOOKUP_DSCP:
/* Need IP frame */
if (is_ipv6 == 0 && is_ipv4 == 0)
break;
if (is_ipv6)
key = IPV6_DSCP(
(struct ip6_hdr *)ip) >> 2;
pkey = &src_ip;
} else if (is_ipv6) {
keylen = sizeof(struct in6_addr);
if (lookup_type == LOOKUP_DST_IP)
pkey = &args->f_id.dst_ip6;
else
key = ip->ip_tos >> 2;
pkey = &key;
keylen = sizeof(key);
pkey = &args->f_id.src_ip6;
} else /* only for L3 */
break;
case LOOKUP_DST_MAC:
case LOOKUP_SRC_MAC:
/* Need ether frame */
if ((args->flags & IPFW_ARGS_ETHER) == 0)
break;
pkey = vidx == LOOKUP_DST_MAC ?
eh->ether_dhost:
eh->ether_shost;
keylen = ETHER_ADDR_LEN;
break;
case LOOKUP_MARK:
key = args->rule.pkt_mark;
pkey = &key;
keylen = sizeof(key);
case LOOKUP_DSCP:
if (is_ipv4)
key = ip->ip_tos >> 2;
else if (is_ipv6)
key = IPV6_DSCP(
(struct ip6_hdr *)ip) >> 2;
else
break; /* only for L3 */
key &= 0x3f;
if (cmdlen == F_INSN_SIZE(ipfw_insn_table))
key &= insntod(cmd, table)->value;
pkey = &key;
keylen = sizeof(key);
break;
case LOOKUP_DST_PORT:
case LOOKUP_SRC_PORT:
/* only for L3 */
if (is_ipv6 == 0 && is_ipv4 == 0) {
break;
}
if (keylen == 0)
/* Skip fragments */
if (offset != 0) {
break;
match = ipfw_lookup_table(chain,
cmd->arg1, keylen, pkey, &vidx);
if (!match)
}
/* Skip proto without ports */
if (proto != IPPROTO_TCP &&
proto != IPPROTO_UDP &&
proto != IPPROTO_UDPLITE &&
proto != IPPROTO_SCTP)
break;
tablearg = vidx;
if (lookup_type == LOOKUP_DST_PORT)
key = dst_port;
else
key = src_port;
pkey = &key;
if (cmdlen == F_INSN_SIZE(ipfw_insn_table))
key &= insntod(cmd, table)->value;
keylen = sizeof(key);
break;
case LOOKUP_DST_MAC:
case LOOKUP_SRC_MAC:
/* only for L2 */
if ((args->flags & IPFW_ARGS_ETHER) == 0)
break;
pkey = lookup_type == LOOKUP_DST_MAC ?
eh->ether_dhost : eh->ether_shost;
keylen = ETHER_ADDR_LEN;
break;
#ifndef USERSPACE
case LOOKUP_UID:
case LOOKUP_JAIL:
check_uidgid(insntod(cmd, u32),
args, &ucred_lookup,
#ifdef __FreeBSD__
&ucred_cache);
if (lookup_type == LOOKUP_UID)
key = ucred_cache->cr_uid;
else if (lookup_type == LOOKUP_JAIL)
key = ucred_cache->cr_prison->pr_id;
#else /* !__FreeBSD__ */
(void *)&ucred_cache);
if (lookup_type == LOOKUP_UID)
key = ucred_cache.uid;
else if (lookup_type == LOOKUP_JAIL)
key = ucred_cache.xid;
#endif /* !__FreeBSD__ */
pkey = &key;
if (cmdlen == F_INSN_SIZE(ipfw_insn_table))
key &= insntod(cmd, table)->value;
keylen = sizeof(key);
break;
#endif /* !USERSPACE */
case LOOKUP_MARK:
key = args->rule.pkt_mark;
if (cmdlen == F_INSN_SIZE(ipfw_insn_table))
key &= insntod(cmd, table)->value;
pkey = &key;
keylen = sizeof(key);
break;
case LOOKUP_RULENUM:
key = f->rulenum;
if (cmdlen == F_INSN_SIZE(ipfw_insn_table))
key &= insntod(cmd, table)->value;
pkey = &key;
keylen = sizeof(key);
break;
}
/* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */
/* FALLTHROUGH */
/* unknown key type */
if (keylen == 0)
break;
match = ipfw_lookup_table(chain,
insntod(cmd, kidx)->kidx, keylen,
pkey, &vidx);
if (match)
tablearg = vidx;
break;
}
/* LOOKUP_NONE */
/* FALLTHROUGH */
case O_IP_SRC_LOOKUP:
{
void *pkey;
@ -2163,13 +2237,14 @@ do { \
pkey = &args->f_id.src_ip6;
} else
break;
match = ipfw_lookup_table(chain, cmd->arg1,
match = ipfw_lookup_table(chain,
insntod(cmd, kidx)->kidx,
keylen, pkey, &vidx);
if (!match)
break;
if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) {
match = ((ipfw_insn_u32 *)cmd)->d[0] ==
TARG_VAL(chain, vidx, tag);
if (cmdlen == F_INSN_SIZE(ipfw_insn_table)) {
match = tvalue_match(chain,
insntod(cmd, table), vidx);
if (!match)
break;
}
@ -2193,13 +2268,14 @@ do { \
else
pkey = eh->ether_shost;
match = ipfw_lookup_table(chain, cmd->arg1,
match = ipfw_lookup_table(chain,
insntod(cmd, kidx)->kidx,
keylen, pkey, &vidx);
if (!match)
break;
if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) {
match = ((ipfw_insn_u32 *)cmd)->d[0] ==
TARG_VAL(chain, vidx, tag);
if (cmdlen == F_INSN_SIZE(ipfw_insn_table)) {
match = tvalue_match(chain,
insntod(cmd, table), vidx);
if (!match)
break;
}
@ -2208,19 +2284,22 @@ do { \
}
case O_IP_FLOW_LOOKUP:
{
uint32_t v = 0;
match = ipfw_lookup_table(chain,
cmd->arg1, 0, &args->f_id, &v);
if (!match)
break;
if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
match = ((ipfw_insn_u32 *)cmd)->d[0] ==
TARG_VAL(chain, v, tag);
if (match)
tablearg = v;
}
{
uint32_t vidx = 0;
match = ipfw_lookup_table(chain,
insntod(cmd, kidx)->kidx, 0,
&args->f_id, &vidx);
if (!match)
break;
if (cmdlen == F_INSN_SIZE(ipfw_insn_table))
match = tvalue_match(chain,
insntod(cmd, table), vidx);
if (match)
tablearg = vidx;
break;
}
case O_IP_SRC_MASK:
case O_IP_DST_MASK:
if (is_ipv4) {
@ -2548,7 +2627,7 @@ do { \
case O_LOG:
ipfw_log(chain, f, hlen, args,
offset | ip6f_mf, tablearg, ip);
offset | ip6f_mf, tablearg, ip, eh);
match = 1;
break;
@ -2787,14 +2866,14 @@ do { \
if (cmd->arg1 == IP_FW_TARG)
mark = TARG_VAL(chain, tablearg, mark);
else
mark = ((ipfw_insn_u32 *)cmd)->d[0];
mark = insntoc(cmd, u32)->d[0];
match =
(args->rule.pkt_mark &
((ipfw_insn_u32 *)cmd)->d[1]) ==
(mark & ((ipfw_insn_u32 *)cmd)->d[1]);
insntoc(cmd, u32)->d[1]) ==
(mark & insntoc(cmd, u32)->d[1]);
break;
}
/*
* The second set of opcodes represents 'actions',
* i.e. the terminal part of a rule once the packet
@ -2942,7 +3021,8 @@ do { \
case O_SKIPTO:
IPFW_INC_RULE_COUNTER(f, pktlen);
f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
f_pos = jump(chain, f,
insntod(cmd, u32)->d[0], tablearg, false);
/*
* Skip disabled rules, and re-enter
* the inner loop with the correct
@ -2976,7 +3056,7 @@ do { \
* stack pointer.
*/
struct m_tag *mtag;
uint16_t jmpto, *stack;
uint32_t jmpto, *stack;
#define IS_CALL ((cmd->len & F_NOT) == 0)
#define IS_RETURN ((cmd->len & F_NOT) != 0)
@ -2992,51 +3072,83 @@ do { \
break;
mtag = m_tag_next(m, mtag);
}
if (mtag == NULL && IS_CALL) {
mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
IPFW_CALLSTACK_SIZE *
sizeof(uint16_t), M_NOWAIT);
if (mtag != NULL)
m_tag_prepend(m, mtag);
/*
* We keep ruleset id in the first element
* of stack. If it doesn't match chain->id,
* then we can't trust information in the
* stack, since rules were changed.
* We reset stack pointer to be able reuse
* tag if it will be needed.
*/
if (mtag != NULL) {
stack = (uint32_t *)(mtag + 1);
if (stack[0] != chain->id) {
stack[0] = chain->id;
mtag->m_tag_id = 0;
}
}
/*
* On error both `call' and `return' just
* continue with next rule.
* If there is no mtag or stack is empty,
* `return` continues with next rule.
*/
if (IS_RETURN && (mtag == NULL ||
mtag->m_tag_id == 0)) {
l = 0; /* exit inner loop */
break;
}
if (IS_CALL && (mtag == NULL ||
mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
printf("ipfw: call stack error, "
"go to next rule\n");
if (mtag == NULL) {
MPASS(IS_CALL);
mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
IPFW_CALLSTACK_SIZE *
sizeof(uint32_t), M_NOWAIT);
if (mtag != NULL) {
m_tag_prepend(m, mtag);
stack = (uint32_t *)(mtag + 1);
stack[0] = chain->id;
}
}
if (mtag == NULL) {
printf("ipfw: rule %u: failed to "
"allocate call stack. "
"Denying packet.\n",
f->rulenum);
l = 0; /* exit inner loop */
done = 1; /* exit outer loop */
retval = IP_FW_DENY; /* drop packet */
break;
}
IPFW_INC_RULE_COUNTER(f, pktlen);
stack = (uint16_t *)(mtag + 1);
if (IS_CALL && mtag->m_tag_id >=
IPFW_CALLSTACK_SIZE - 1) {
printf("ipfw: rule %u: call stack "
"overflow. Denying packet.\n",
f->rulenum);
l = 0; /* exit inner loop */
done = 1; /* exit outer loop */
retval = IP_FW_DENY; /* drop packet */
break;
}
MPASS(stack == (uint32_t *)(mtag + 1));
IPFW_INC_RULE_COUNTER(f, pktlen);
/*
* The `call' action may use cached f_pos
* (in f->next_rule), whose version is written
* in f->next_rule.
* The `return' action, however, doesn't have
* fixed jump address in cmd->arg1 and can't use
* cache.
*/
if (IS_CALL) {
stack[mtag->m_tag_id] = f->rulenum;
mtag->m_tag_id++;
f_pos = JUMP(chain, f, cmd->arg1,
tablearg, 1);
stack[++mtag->m_tag_id] = f_pos;
f_pos = jump(chain, f,
insntod(cmd, u32)->d[0],
tablearg, true);
} else { /* `return' action */
mtag->m_tag_id--;
jmpto = stack[mtag->m_tag_id] + 1;
f_pos = ipfw_find_rule(chain, jmpto, 0);
jmpto = stack[mtag->m_tag_id--];
if (cmd->arg1 == RETURN_NEXT_RULE)
f_pos = jmpto + 1;
else /* RETURN_NEXT_RULENUM */
f_pos = ipfw_find_rule(chain,
chain->map[
jmpto]->rulenum + 1, 0);
}
/*
@ -3045,11 +3157,15 @@ do { \
* f_pos, f, l and cmd.
* Also clear cmdlen and skip_or
*/
MPASS(f_pos < chain->n_rules - 1);
for (; f_pos < chain->n_rules - 1 &&
(V_set_disable &
(1 << chain->map[f_pos]->set)); f_pos++)
;
/* Re-enter the inner loop at the dest rule. */
/*
* Re-enter the inner loop at the dest
* rule.
*/
f = chain->map[f_pos];
l = f->cmd_len;
cmd = f->cmd;
@ -3312,7 +3428,7 @@ do { \
args->rule.pkt_mark = (
(cmd->arg1 == IP_FW_TARG) ?
TARG_VAL(chain, tablearg, mark) :
((ipfw_insn_u32 *)cmd)->d[0]);
insntoc(cmd, u32)->d[0]);
IPFW_INC_RULE_COUNTER(f, pktlen);
break;
@ -3341,7 +3457,8 @@ do { \
break;
default:
panic("-- unknown opcode %d\n", cmd->opcode);
panic("ipfw: rule %u: unknown opcode %d\n",
f->rulenum, cmd->opcode);
} /* end of switch() on opcodes */
/*
* if we get here with l=0, then match is irrelevant.
@ -3574,9 +3691,7 @@ vnet_ipfw_init(const void *unused)
ipfw_dyn_init(chain);
ipfw_eaction_init(chain, first);
#ifdef LINEAR_SKIPTO
ipfw_init_skipto_cache(chain);
#endif
ipfw_bpf_init(first);
/* First set up some values that are compile time options */
@ -3633,9 +3748,7 @@ vnet_ipfw_uninit(const void *unused)
for (i = 0; i < chain->n_rules; i++)
ipfw_reap_add(chain, &reap, chain->map[i]);
free(chain->map, M_IPFW);
#ifdef LINEAR_SKIPTO
ipfw_destroy_skipto_cache(chain);
#endif
IPFW_WUNLOCK(chain);
IPFW_UH_WUNLOCK(chain);
ipfw_destroy_tables(chain, last);

View file

@ -0,0 +1,714 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 Yandex LLC
* Copyright (c) 2025 Andrey V. Elsukov <ae@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
/*
* Example of compatibility layer for ipfw's rule management routines.
*/
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipfw.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/fnv_hash.h>
#include <net/if.h>
#include <net/pfil.h>
#include <net/route.h>
#include <net/vnet.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <netinet/in.h>
#include <netinet/ip_var.h> /* hooks */
#include <netinet/ip_fw.h>
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/ip_fw_table.h>
#ifdef MAC
#include <security/mac/mac_framework.h>
#endif
/*
* These structures were used by IP_FW3 socket option with version 0.
*/
typedef struct _ipfw_dyn_rule_v0 {
ipfw_dyn_rule *next; /* linked list of rules. */
struct ip_fw *rule; /* pointer to rule */
/* 'rule' is used to pass up the rule number (from the parent) */
ipfw_dyn_rule *parent; /* pointer to parent rule */
u_int64_t pcnt; /* packet match counter */
u_int64_t bcnt; /* byte match counter */
struct ipfw_flow_id id; /* (masked) flow id */
u_int32_t expire; /* expire time */
u_int32_t bucket; /* which bucket in hash table */
u_int32_t state; /* state of this rule (typically a
* combination of TCP flags)
*/
u_int32_t ack_fwd; /* most recent ACKs in forward */
u_int32_t ack_rev; /* and reverse directions (used */
/* to generate keepalives) */
u_int16_t dyn_type; /* rule type */
u_int16_t count; /* refcount */
u_int16_t kidx; /* index of named object */
} __packed __aligned(8) ipfw_dyn_rule_v0;
typedef struct _ipfw_obj_dyntlv_v0 {
ipfw_obj_tlv head;
ipfw_dyn_rule_v0 state;
} ipfw_obj_dyntlv_v0;
typedef struct _ipfw_obj_ntlv_v0 {
ipfw_obj_tlv head; /* TLV header */
uint16_t idx; /* Name index */
uint8_t set; /* set, if applicable */
uint8_t type; /* object type, if applicable */
uint32_t spare; /* unused */
char name[64]; /* Null-terminated name */
} ipfw_obj_ntlv_v0;
typedef struct _ipfw_range_tlv_v0 {
ipfw_obj_tlv head; /* TLV header */
uint32_t flags; /* Range flags */
uint16_t start_rule; /* Range start */
uint16_t end_rule; /* Range end */
uint32_t set; /* Range set to match */
uint32_t new_set; /* New set to move/swap to */
} ipfw_range_tlv_v0;
typedef struct _ipfw_range_header_v0 {
ip_fw3_opheader opheader; /* IP_FW3 opcode */
ipfw_range_tlv_v0 range;
} ipfw_range_header_v0;
typedef struct _ipfw_insn_limit_v0 {
ipfw_insn o;
uint8_t _pad;
uint8_t limit_mask;
uint16_t conn_limit;
} ipfw_insn_limit_v0;
typedef struct _ipfw_obj_tentry_v0 {
ipfw_obj_tlv head; /* TLV header */
uint8_t subtype; /* subtype (IPv4,IPv6) */
uint8_t masklen; /* mask length */
uint8_t result; /* request result */
uint8_t spare0;
uint16_t idx; /* Table name index */
uint16_t spare1;
union {
/* Longest field needs to be aligned by 8-byte boundary */
struct in_addr addr; /* IPv4 address */
uint32_t key; /* uid/gid/port */
struct in6_addr addr6; /* IPv6 address */
char iface[IF_NAMESIZE]; /* interface name */
struct tflow_entry flow;
} k;
union {
ipfw_table_value value; /* value data */
uint32_t kidx; /* value kernel index */
} v;
} ipfw_obj_tentry_v0;
static sopt_handler_f dump_config_v0, add_rules_v0, del_rules_v0,
clear_rules_v0, move_rules_v0, manage_sets_v0, dump_soptcodes_v0,
dump_srvobjects_v0;
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_XGET, IP_FW3_OPVER_0, HDIR_GET, dump_config_v0 },
{ IP_FW_XADD, IP_FW3_OPVER_0, HDIR_BOTH, add_rules_v0 },
{ IP_FW_XDEL, IP_FW3_OPVER_0, HDIR_BOTH, del_rules_v0 },
{ IP_FW_XZERO, IP_FW3_OPVER_0, HDIR_SET, clear_rules_v0 },
{ IP_FW_XRESETLOG, IP_FW3_OPVER_0, HDIR_SET, clear_rules_v0 },
{ IP_FW_XMOVE, IP_FW3_OPVER_0, HDIR_SET, move_rules_v0 },
{ IP_FW_SET_SWAP, IP_FW3_OPVER_0, HDIR_SET, manage_sets_v0 },
{ IP_FW_SET_MOVE, IP_FW3_OPVER_0, HDIR_SET, manage_sets_v0 },
{ IP_FW_SET_ENABLE, IP_FW3_OPVER_0, HDIR_SET, manage_sets_v0 },
{ IP_FW_DUMP_SOPTCODES, IP_FW3_OPVER_0, HDIR_GET, dump_soptcodes_v0 },
{ IP_FW_DUMP_SRVOBJECTS, IP_FW3_OPVER_0, HDIR_GET, dump_srvobjects_v0 },
};
static int
dump_config_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
return (EOPNOTSUPP);
}
/*
* Calculate the size adjust needed to store opcodes converted from v0
* to v1.
*/
static int
adjust_size_v0(ipfw_insn *cmd)
{
int cmdlen, adjust;
cmdlen = F_LEN(cmd);
switch (cmd->opcode) {
case O_CHECK_STATE:
case O_KEEP_STATE:
case O_PROBE_STATE:
case O_EXTERNAL_ACTION:
case O_EXTERNAL_INSTANCE:
adjust = F_INSN_SIZE(ipfw_insn_kidx) - cmdlen;
break;
case O_LIMIT:
adjust = F_INSN_SIZE(ipfw_insn_limit) - cmdlen;
break;
case O_IP_SRC_LOOKUP:
case O_IP_DST_LOOKUP:
case O_IP_FLOW_LOOKUP:
case O_MAC_SRC_LOOKUP:
case O_MAC_DST_LOOKUP:
if (cmdlen == F_INSN_SIZE(ipfw_insn))
adjust = F_INSN_SIZE(ipfw_insn_kidx) - cmdlen;
else
adjust = F_INSN_SIZE(ipfw_insn_table) - cmdlen;
break;
case O_SKIPTO:
case O_CALLRETURN:
adjust = F_INSN_SIZE(ipfw_insn_u32) - cmdlen;
break;
default:
adjust = 0;
}
return (adjust);
}
static int
parse_rules_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd, ipfw_obj_ctlv **prtlv,
struct rule_check_info **pci)
{
ipfw_obj_ctlv *ctlv, *rtlv, *tstate;
ipfw_obj_ntlv_v0 *ntlv;
struct rule_check_info *ci, *cbuf;
struct ip_fw_rule *r;
size_t count, clen, read, rsize;
uint32_t rulenum;
int idx, error;
op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize);
ctlv = (ipfw_obj_ctlv *)(op3 + 1);
read = sizeof(ip_fw3_opheader);
if (read + sizeof(*ctlv) > sd->valsize)
return (EINVAL);
rtlv = NULL;
tstate = NULL;
cbuf = NULL;
/* Table names or other named objects. */
if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) {
/* Check size and alignment. */
clen = ctlv->head.length;
if (read + clen > sd->valsize || clen < sizeof(*ctlv) ||
(clen % sizeof(uint64_t)) != 0)
return (EINVAL);
/* Check for validness. */
count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv);
if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv))
return (EINVAL);
/*
* Check each TLV.
* Ensure TLVs are sorted ascending and
* there are no duplicates.
*/
idx = -1;
ntlv = (ipfw_obj_ntlv_v0 *)(ctlv + 1);
while (count > 0) {
if (ntlv->head.length != sizeof(ipfw_obj_ntlv_v0))
return (EINVAL);
error = ipfw_check_object_name_generic(ntlv->name);
if (error != 0)
return (error);
if (ntlv->idx <= idx)
return (EINVAL);
idx = ntlv->idx;
count--;
ntlv++;
}
tstate = ctlv;
read += ctlv->head.length;
ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
if (read + sizeof(*ctlv) > sd->valsize)
return (EINVAL);
}
/* List of rules. */
if (ctlv->head.type == IPFW_TLV_RULE_LIST) {
clen = ctlv->head.length;
if (read + clen > sd->valsize || clen < sizeof(*ctlv) ||
(clen % sizeof(uint64_t)) != 0)
return (EINVAL);
clen -= sizeof(*ctlv);
if (ctlv->count == 0 ||
ctlv->count > clen / sizeof(struct ip_fw_rule))
return (EINVAL);
/* Allocate state for each rule */
cbuf = malloc(ctlv->count * sizeof(struct rule_check_info),
M_TEMP, M_WAITOK | M_ZERO);
/*
* Check each rule for validness.
* Ensure numbered rules are sorted ascending
* and properly aligned
*/
rulenum = 0;
count = 0;
error = 0;
ci = cbuf;
r = (struct ip_fw_rule *)(ctlv + 1);
while (clen > 0) {
rsize = RULEUSIZE1(r);
if (rsize > clen || count > ctlv->count) {
error = EINVAL;
break;
}
ci->ctlv = tstate;
ci->version = IP_FW3_OPVER_0;
error = ipfw_check_rule(r, rsize, ci);
if (error != 0)
break;
/* Check sorting */
if (r->rulenum != 0 && r->rulenum < rulenum) {
printf("ipfw: wrong order: rulenum %u"
" vs %u\n", r->rulenum, rulenum);
error = EINVAL;
break;
}
rulenum = r->rulenum;
ci->urule = (caddr_t)r;
clen -= rsize;
r = (struct ip_fw_rule *)((caddr_t)r + rsize);
count++;
ci++;
}
if (ctlv->count != count || error != 0) {
free(cbuf, M_TEMP);
return (EINVAL);
}
rtlv = ctlv;
read += ctlv->head.length;
ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
}
if (read != sd->valsize || rtlv == NULL) {
free(cbuf, M_TEMP);
return (EINVAL);
}
*prtlv = rtlv;
*pci = cbuf;
return (0);
}
static void
convert_v0_to_v1(struct rule_check_info *ci, int rule_len)
{
struct ip_fw_rule *urule;
struct ip_fw *krule;
ipfw_insn *src, *dst;
int l, cmdlen, newlen;
urule = (struct ip_fw_rule *)ci->urule;
krule = ci->krule;
for (l = urule->cmd_len, src = urule->cmd, dst = krule->cmd;
l > 0 && rule_len > 0;
l -= cmdlen, src += cmdlen,
rule_len -= newlen, dst += newlen) {
cmdlen = F_LEN(src);
switch (src->opcode) {
case O_CHECK_STATE:
case O_KEEP_STATE:
case O_PROBE_STATE:
case O_EXTERNAL_ACTION:
case O_EXTERNAL_INSTANCE:
newlen = F_INSN_SIZE(ipfw_insn_kidx);
insntod(dst, kidx)->kidx = src->arg1;
break;
case O_LIMIT:
newlen = F_INSN_SIZE(ipfw_insn_limit);
insntod(dst, limit)->kidx = src->arg1;
insntod(dst, limit)->limit_mask =
insntoc(src, limit)->limit_mask;
insntod(dst, limit)->conn_limit =
insntoc(src, limit)->conn_limit;
break;
case O_IP_DST_LOOKUP:
if (cmdlen == F_INSN_SIZE(ipfw_insn) + 2) {
/* lookup type stored in d[1] */
dst->arg1 = insntoc(src, table)->value;
}
case O_IP_SRC_LOOKUP:
case O_IP_FLOW_LOOKUP:
case O_MAC_SRC_LOOKUP:
case O_MAC_DST_LOOKUP:
if (cmdlen == F_INSN_SIZE(ipfw_insn)) {
newlen = F_INSN_SIZE(ipfw_insn_kidx);
insntod(dst, kidx)->kidx = src->arg1;
} else {
newlen = F_INSN_SIZE(ipfw_insn_table);
insntod(dst, table)->kidx = src->arg1;
insntod(dst, table)->value =
insntoc(src, u32)->d[0];
}
break;
case O_CALLRETURN:
case O_SKIPTO:
newlen = F_INSN_SIZE(ipfw_insn_u32);
insntod(dst, u32)->d[0] = src->arg1;
break;
default:
newlen = cmdlen;
memcpy(dst, src, sizeof(uint32_t) * newlen);
continue;
}
dst->opcode = src->opcode;
dst->len = (src->len & (F_NOT | F_OR)) | newlen;
}
}
/*
* Copy rule @urule from v0 userland format to kernel @krule.
*/
static void
import_rule_v0(struct ip_fw_chain *chain, struct rule_check_info *ci)
{
struct ip_fw_rule *urule;
struct ip_fw *krule;
ipfw_insn *cmd;
int l, cmdlen, adjust, aadjust;
urule = (struct ip_fw_rule *)ci->urule;
l = urule->cmd_len;
cmd = urule->cmd;
adjust = aadjust = 0;
/* Scan all opcodes and determine the needed size */
while (l > 0) {
adjust += adjust_size_v0(cmd);
if (ACTION_PTR(urule) < cmd)
aadjust = adjust;
cmdlen = F_LEN(cmd);
l -= cmdlen;
cmd += cmdlen;
}
cmdlen = urule->cmd_len + adjust;
krule = ci->krule = ipfw_alloc_rule(chain, /* RULEKSIZE1(cmdlen) */
roundup2(sizeof(struct ip_fw) + cmdlen * 4 - 4, 8));
krule->act_ofs = urule->act_ofs + aadjust;
krule->cmd_len = urule->cmd_len + adjust;
if (adjust != 0)
printf("%s: converted rule %u: cmd_len %u -> %u, "
"act_ofs %u -> %u\n", __func__, urule->rulenum,
urule->cmd_len, krule->cmd_len, urule->act_ofs,
krule->act_ofs);
krule->rulenum = urule->rulenum;
krule->set = urule->set;
krule->flags = urule->flags;
/* Save rulenum offset */
ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum);
convert_v0_to_v1(ci, cmdlen);
}
static int
add_rules_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_obj_ctlv *rtlv;
struct rule_check_info *ci, *nci;
int i, ret;
/*
* Check rules buffer for validness.
*/
ret = parse_rules_v0(chain, op3, sd, &rtlv, &nci);
if (ret != 0)
return (ret);
/*
* Allocate storage for the kernel representation of rules.
*/
for (i = 0, ci = nci; i < rtlv->count; i++, ci++)
import_rule_v0(chain, ci);
/*
* Try to add new rules to the chain.
*/
if ((ret = ipfw_commit_rules(chain, nci, rtlv->count)) != 0) {
for (i = 0, ci = nci; i < rtlv->count; i++, ci++)
ipfw_free_rule(ci->krule);
}
/* Cleanup after ipfw_parse_rules() */
free(nci, M_TEMP);
return (ret);
}
static int
check_range_tlv_v0(const ipfw_range_tlv_v0 *rt, ipfw_range_tlv *crt)
{
if (rt->head.length != sizeof(*rt))
return (1);
if (rt->start_rule > rt->end_rule)
return (1);
if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS)
return (1);
if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags)
return (1);
crt->head = rt->head;
crt->head.length = sizeof(*crt);
crt->flags = rt->flags;
crt->start_rule = rt->start_rule;
crt->end_rule = rt->end_rule;
crt->set = rt->set;
crt->new_set = rt->new_set;
return (0);
}
static int
del_rules_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_range_tlv rv;
ipfw_range_header_v0 *rh;
int error, ndel;
if (sd->valsize != sizeof(*rh))
return (EINVAL);
rh = (ipfw_range_header_v0 *)ipfw_get_sopt_space(sd, sd->valsize);
if (check_range_tlv_v0(&rh->range, &rv) != 0)
return (EINVAL);
ndel = 0;
if ((error = delete_range(chain, &rv, &ndel)) != 0)
return (error);
/* Save number of rules deleted */
rh->range.new_set = ndel;
return (0);
}
static int
clear_rules_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
return (EOPNOTSUPP);
}
static int
move_rules_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
return (EOPNOTSUPP);
}
static int
manage_sets_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
return (EOPNOTSUPP);
}
static int
dump_soptcodes_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
return (EOPNOTSUPP);
}
static int
dump_srvobjects_v0(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
return (EOPNOTSUPP);
}
static enum ipfw_opcheck_result
check_opcode_compat(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci)
{
ipfw_insn *cmd;
size_t cmdlen;
if (ci->version != IP_FW3_OPVER_0)
return (FAILED);
cmd = *pcmd;
cmdlen = F_LEN(cmd);
switch (cmd->opcode) {
case O_PROBE_STATE:
case O_KEEP_STATE:
if (cmdlen != F_INSN_SIZE(ipfw_insn))
return (BAD_SIZE);
ci->object_opcodes++;
break;
case O_LIMIT:
if (cmdlen != F_INSN_SIZE(ipfw_insn_limit_v0))
return (BAD_SIZE);
ci->object_opcodes++;
break;
case O_IP_SRC_LOOKUP:
if (cmdlen > F_INSN_SIZE(ipfw_insn_u32))
return (BAD_SIZE);
/* FALLTHROUGH */
case O_IP_DST_LOOKUP:
if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
cmdlen != F_INSN_SIZE(ipfw_insn_u32))
return (BAD_SIZE);
if (cmd->arg1 >= V_fw_tables_max) {
printf("ipfw: invalid table number %u\n",
cmd->arg1);
return (FAILED);
}
ci->object_opcodes++;
break;
case O_IP_FLOW_LOOKUP:
if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
cmdlen != F_INSN_SIZE(ipfw_insn_u32))
return (BAD_SIZE);
if (cmd->arg1 >= V_fw_tables_max) {
printf("ipfw: invalid table number %u\n",
cmd->arg1);
return (FAILED);
}
ci->object_opcodes++;
break;
case O_CHECK_STATE:
ci->object_opcodes++;
/* FALLTHROUGH */
case O_SKIPTO:
case O_CALLRETURN:
if (cmdlen != F_INSN_SIZE(ipfw_insn))
return (BAD_SIZE);
return (CHECK_ACTION);
case O_EXTERNAL_ACTION:
if (cmd->arg1 == 0 ||
cmdlen != F_INSN_SIZE(ipfw_insn)) {
printf("ipfw: invalid external "
"action opcode\n");
return (FAILED);
}
ci->object_opcodes++;
/*
* Do we have O_EXTERNAL_INSTANCE or O_EXTERNAL_DATA
* opcode?
*/
if (*plen != cmdlen) {
*plen -= cmdlen;
*pcmd = cmd += cmdlen;
cmdlen = F_LEN(cmd);
if (cmd->opcode == O_EXTERNAL_DATA)
return (CHECK_ACTION);
if (cmd->opcode != O_EXTERNAL_INSTANCE) {
printf("ipfw: invalid opcode "
"next to external action %u\n",
cmd->opcode);
return (FAILED);
}
if (cmd->arg1 == 0 ||
cmdlen != F_INSN_SIZE(ipfw_insn)) {
printf("ipfw: invalid external "
"action instance opcode\n");
return (FAILED);
}
ci->object_opcodes++;
}
return (CHECK_ACTION);
default:
return (ipfw_check_opcode(pcmd, plen, ci));
}
return (SUCCESS);
}
static int
ipfw_compat_modevent(module_t mod, int type, void *unused)
{
switch (type) {
case MOD_LOAD:
IPFW_ADD_SOPT_HANDLER(1, scodes);
ipfw_register_compat(check_opcode_compat);
break;
case MOD_UNLOAD:
ipfw_unregister_compat();
IPFW_DEL_SOPT_HANDLER(1, scodes);
break;
default:
return (EOPNOTSUPP);
}
return (0);
}
static moduledata_t ipfw_compat_mod = {
"ipfw_compat",
ipfw_compat_modevent,
0
};
/* Define startup order. */
#define IPFW_COMPAT_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL
#define IPFW_COMPAT_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
#define IPFW_COMPAT_MODULE_ORDER (IPFW_COMPAT_MODEVENT_ORDER + 1)
DECLARE_MODULE(ipfw_compat, ipfw_compat_mod, IPFW_COMPAT_SI_SUB_FIREWALL,
IPFW_COMPAT_MODULE_ORDER);
MODULE_DEPEND(ipfw_compat, ipfw, 3, 3, 3);
MODULE_VERSION(ipfw_compat, 1);

View file

@ -1,8 +1,8 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2017-2018 Yandex LLC
* Copyright (c) 2017-2018 Andrey V. Elsukov <ae@FreeBSD.org>
* Copyright (c) 2017-2025 Yandex LLC
* Copyright (c) 2017-2025 Andrey V. Elsukov <ae@FreeBSD.org>
* Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
*
* Redistribution and use in source and binary forms, with or without
@ -57,8 +57,8 @@
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */
#ifdef INET6
@ -132,9 +132,9 @@ struct dyn_data {
uint32_t hashval; /* hash value used for hash resize */
uint16_t fibnum; /* fib used to send keepalives */
uint8_t _pad[3];
uint8_t _pad;
uint8_t flags; /* internal flags */
uint16_t rulenum; /* parent rule number */
uint32_t rulenum; /* parent rule number */
uint32_t ruleid; /* parent rule id */
uint32_t state; /* TCP session state and flags */
@ -159,8 +159,7 @@ struct dyn_data {
struct dyn_parent {
void *parent; /* pointer to parent rule */
uint32_t count; /* number of linked states */
uint8_t _pad[2];
uint16_t rulenum; /* parent rule number */
uint32_t rulenum; /* parent rule number */
uint32_t ruleid; /* parent rule id */
uint32_t hashval; /* hash value used for hash resize */
uint32_t expire; /* expire time */
@ -169,7 +168,8 @@ struct dyn_parent {
struct dyn_ipv4_state {
uint8_t type; /* State type */
uint8_t proto; /* UL Protocol */
uint16_t kidx; /* named object index */
uint16_t spare;
uint32_t kidx; /* named object index */
uint16_t sport, dport; /* ULP source and destination ports */
in_addr_t src, dst; /* IPv4 source and destination */
@ -501,12 +501,12 @@ static struct dyn_ipv6_state *dyn_lookup_ipv6_state(
const struct ipfw_flow_id *, uint32_t, const void *,
struct ipfw_dyn_info *, int);
static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *,
uint32_t, const void *, int, uint32_t, uint16_t);
uint32_t, const void *, int, uint32_t, uint32_t);
static struct dyn_ipv6_state *dyn_alloc_ipv6_state(
const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t);
static int dyn_add_ipv6_state(void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, uint32_t, uint32_t, uint8_t);
static int dyn_add_ipv6_state(void *, uint32_t, uint32_t,
const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t,
struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t);
struct ipfw_dyn_info *, uint16_t, uint32_t, uint8_t);
static void dyn_export_ipv6_state(const struct dyn_ipv6_state *,
ipfw_dyn_rule *);
@ -519,33 +519,33 @@ static void dyn_enqueue_keepalive_ipv6(struct mbufq *,
static void dyn_send_keepalive_ipv6(struct ip_fw_chain *);
static struct dyn_ipv6_state *dyn_lookup_ipv6_parent(
const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint32_t,
uint32_t);
static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked(
const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint32_t,
uint32_t);
static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, uint16_t);
static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint32_t,
const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, uint32_t);
#endif /* INET6 */
/* Functions to work with limit states */
static void *dyn_get_parent_state(const struct ipfw_flow_id *, uint32_t,
struct ip_fw *, uint32_t, uint32_t, uint16_t);
struct ip_fw *, uint32_t, uint32_t, uint32_t);
static struct dyn_ipv4_state *dyn_lookup_ipv4_parent(
const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t);
const struct ipfw_flow_id *, const void *, uint32_t, uint32_t, uint32_t);
static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked(
const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t);
static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, const void *, uint32_t, uint32_t, uint32_t);
static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint32_t,
uint32_t);
static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t);
static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint32_t,
const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t);
static void dyn_tick(void *);
static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *);
static void dyn_free_states(struct ip_fw_chain *);
static void dyn_export_parent(const struct dyn_parent *, uint16_t, uint8_t,
static void dyn_export_parent(const struct dyn_parent *, uint32_t, uint8_t,
ipfw_dyn_rule *);
static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t,
static void dyn_export_data(const struct dyn_data *, uint32_t, uint8_t,
uint8_t, ipfw_dyn_rule *);
static uint32_t dyn_update_tcp_state(struct dyn_data *,
const struct ipfw_flow_id *, const struct tcphdr *, int);
@ -556,12 +556,12 @@ static void dyn_update_proto_state(struct dyn_data *,
struct dyn_ipv4_state *dyn_lookup_ipv4_state(const struct ipfw_flow_id *,
const void *, struct ipfw_dyn_info *, int);
static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *,
const void *, int, uint32_t, uint16_t);
const void *, int, uint32_t, uint32_t);
static struct dyn_ipv4_state *dyn_alloc_ipv4_state(
const struct ipfw_flow_id *, uint16_t, uint8_t);
static int dyn_add_ipv4_state(void *, uint32_t, uint16_t,
const struct ipfw_flow_id *, uint32_t, uint8_t);
static int dyn_add_ipv4_state(void *, uint32_t, uint32_t,
const struct ipfw_flow_id *, const void *, int, uint32_t,
struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t);
struct ipfw_dyn_info *, uint16_t, uint32_t, uint8_t);
static void dyn_export_ipv4_state(const struct dyn_ipv4_state *,
ipfw_dyn_rule *);
@ -574,34 +574,41 @@ struct dyn_state_obj {
char name[64];
};
#define DYN_STATE_OBJ(ch, cmd) \
((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1))
/*
* Classifier callback.
* Return 0 if opcode contains object that should be referenced
* or rewritten.
*/
static int
dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
dyn_classify(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
ipfw_insn_kidx *cmd;
DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
if (F_LEN(cmd0) < 2)
return (EINVAL);
/*
* NOTE: ipfw_insn_kidx and ipfw_insn_limit has overlapped kidx
* field, so we can use one type to get access to kidx field.
*/
cmd = insntod(cmd0, kidx);
DYN_DEBUG("opcode %u, kidx %u", cmd0->opcode, cmd->kidx);
/* Don't rewrite "check-state any" */
if (cmd->arg1 == 0 &&
cmd->opcode == O_CHECK_STATE)
if (cmd->kidx == 0 &&
cmd0->opcode == O_CHECK_STATE)
return (1);
*puidx = cmd->arg1;
*puidx = cmd->kidx;
*ptype = 0;
return (0);
}
static void
dyn_update(ipfw_insn *cmd, uint16_t idx)
dyn_update(ipfw_insn *cmd0, uint32_t idx)
{
cmd->arg1 = idx;
DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
insntod(cmd0, kidx)->kidx = idx;
DYN_DEBUG("opcode %u, kidx %u", cmd0->opcode, idx);
}
static int
@ -611,7 +618,7 @@ dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
ipfw_obj_ntlv *ntlv;
const char *name;
DYN_DEBUG("uidx %d", ti->uidx);
DYN_DEBUG("uidx %u", ti->uidx);
if (ti->uidx != 0) {
if (ti->tlvs == NULL)
return (EINVAL);
@ -639,16 +646,16 @@ dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
}
static struct named_object *
dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
dyn_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
{
DYN_DEBUG("kidx %d", idx);
DYN_DEBUG("kidx %u", idx);
return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx));
}
static int
dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
uint16_t *pkidx)
uint32_t *pkidx)
{
struct namedobj_instance *ni;
struct dyn_state_obj *obj;
@ -656,7 +663,7 @@ dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
ipfw_obj_ntlv *ntlv;
char *name;
DYN_DEBUG("uidx %d", ti->uidx);
DYN_DEBUG("uidx %u", ti->uidx);
if (ti->uidx != 0) {
if (ti->tlvs == NULL)
return (EINVAL);
@ -686,7 +693,7 @@ dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
no->refcnt++;
IPFW_UH_WUNLOCK(ch);
free(obj, M_IPFW);
DYN_DEBUG("\tfound kidx %d", *pkidx);
DYN_DEBUG("\tfound kidx %u for name '%s'", *pkidx, no->name);
return (0);
}
if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) {
@ -700,7 +707,7 @@ dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
obj->no.refcnt++;
*pkidx = obj->no.kidx;
IPFW_UH_WUNLOCK(ch);
DYN_DEBUG("\tcreated kidx %d", *pkidx);
DYN_DEBUG("\tcreated kidx %u for name '%s'", *pkidx, name);
return (0);
}
@ -716,7 +723,7 @@ dyn_destroy(struct ip_fw_chain *ch, struct named_object *no)
KASSERT(no->refcnt == 1,
("Destroying object '%s' (type %u, idx %u) with refcnt %u",
no->name, no->etlv, no->kidx, no->refcnt));
DYN_DEBUG("kidx %d", no->kidx);
DYN_DEBUG("kidx %u", no->kidx);
obj = SRV_OBJECT(ch, no->kidx);
SRV_OBJECT(ch, no->kidx) = NULL;
ipfw_objhash_del(CHAIN_TO_SRV(ch), no);
@ -1079,7 +1086,7 @@ restart:
*/
static int
dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *pkt,
const void *ulp, int pktlen, uint32_t bucket, uint16_t kidx)
const void *ulp, int pktlen, uint32_t bucket, uint32_t kidx)
{
struct dyn_ipv4_state *s;
int dir;
@ -1109,7 +1116,7 @@ dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *pkt,
struct dyn_ipv4_state *
dyn_lookup_ipv4_parent(const struct ipfw_flow_id *pkt, const void *rule,
uint32_t ruleid, uint16_t rulenum, uint32_t hashval)
uint32_t ruleid, uint32_t rulenum, uint32_t hashval)
{
struct dyn_ipv4_state *s;
uint32_t version, bucket;
@ -1145,7 +1152,7 @@ restart:
static struct dyn_ipv4_state *
dyn_lookup_ipv4_parent_locked(const struct ipfw_flow_id *pkt,
const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket)
const void *rule, uint32_t ruleid, uint32_t rulenum, uint32_t bucket)
{
struct dyn_ipv4_state *s;
@ -1227,7 +1234,7 @@ restart:
*/
static int
dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid,
const void *ulp, int pktlen, uint32_t bucket, uint16_t kidx)
const void *ulp, int pktlen, uint32_t bucket, uint32_t kidx)
{
struct dyn_ipv6_state *s;
int dir;
@ -1258,7 +1265,7 @@ dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid,
static struct dyn_ipv6_state *
dyn_lookup_ipv6_parent(const struct ipfw_flow_id *pkt, uint32_t zoneid,
const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t hashval)
const void *rule, uint32_t ruleid, uint32_t rulenum, uint32_t hashval)
{
struct dyn_ipv6_state *s;
uint32_t version, bucket;
@ -1295,7 +1302,7 @@ restart:
static struct dyn_ipv6_state *
dyn_lookup_ipv6_parent_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid,
const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket)
const void *rule, uint32_t ruleid, uint32_t rulenum, uint32_t bucket)
{
struct dyn_ipv6_state *s;
@ -1333,10 +1340,11 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp,
struct ip_fw *rule;
IPFW_RLOCK_ASSERT(&V_layer3_chain);
MPASS(F_LEN(cmd) >= F_INSN_SIZE(ipfw_insn_kidx));
data = NULL;
rule = NULL;
info->kidx = cmd->arg1;
info->kidx = insntoc(cmd, kidx)->kidx;
info->direction = MATCH_NONE;
info->hashval = hash_packet(&args->f_id);
@ -1411,7 +1419,6 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp,
*/
if (V_layer3_chain.map[data->f_pos] == rule) {
data->chain_id = V_layer3_chain.id;
info->f_pos = data->f_pos;
} else if (V_dyn_keep_states != 0) {
/*
* The original rule pointer is still usable.
@ -1421,7 +1428,6 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp,
MPASS(V_layer3_chain.n_rules > 1);
data->chain_id = V_layer3_chain.id;
data->f_pos = V_layer3_chain.n_rules - 2;
info->f_pos = data->f_pos;
} else {
rule = NULL;
info->direction = MATCH_NONE;
@ -1430,8 +1436,8 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp,
data->rulenum, data);
/* info->f_pos doesn't matter here. */
}
} else
info->f_pos = data->f_pos;
}
info->f_pos = data->f_pos;
}
DYNSTATE_CRITICAL_EXIT();
#if 0
@ -1452,7 +1458,7 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp,
}
static struct dyn_parent *
dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum,
dyn_alloc_parent(void *parent, uint32_t ruleid, uint32_t rulenum,
uint32_t hashval)
{
struct dyn_parent *limit;
@ -1478,7 +1484,7 @@ dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum,
}
static struct dyn_data *
dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum,
dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint32_t rulenum,
const struct ipfw_flow_id *pkt, const void *ulp, int pktlen,
uint32_t hashval, uint16_t fibnum)
{
@ -1506,7 +1512,7 @@ dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum,
}
static struct dyn_ipv4_state *
dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx,
dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint32_t kidx,
uint8_t type)
{
struct dyn_ipv4_state *s;
@ -1533,9 +1539,9 @@ dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx,
* is not needed.
*/
static struct dyn_ipv4_state *
dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum,
dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint32_t rulenum,
const struct ipfw_flow_id *pkt, uint32_t hashval, uint32_t version,
uint16_t kidx)
uint32_t kidx)
{
struct dyn_ipv4_state *s;
struct dyn_parent *limit;
@ -1586,10 +1592,10 @@ dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum,
}
static int
dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum,
dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint32_t rulenum,
const struct ipfw_flow_id *pkt, const void *ulp, int pktlen,
uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum,
uint16_t kidx, uint8_t type)
uint32_t kidx, uint8_t type)
{
struct dyn_ipv4_state *s;
void *data;
@ -1637,7 +1643,7 @@ dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum,
#ifdef INET6
static struct dyn_ipv6_state *
dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
uint16_t kidx, uint8_t type)
uint32_t kidx, uint8_t type)
{
struct dyn_ipv6_state *s;
@ -1664,9 +1670,9 @@ dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
* is not needed.
*/
static struct dyn_ipv6_state *
dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum,
dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint32_t rulenum,
const struct ipfw_flow_id *pkt, uint32_t zoneid, uint32_t hashval,
uint32_t version, uint16_t kidx)
uint32_t version, uint32_t kidx)
{
struct dyn_ipv6_state *s;
struct dyn_parent *limit;
@ -1717,10 +1723,10 @@ dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum,
}
static int
dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum,
dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint32_t rulenum,
const struct ipfw_flow_id *pkt, uint32_t zoneid, const void *ulp,
int pktlen, uint32_t hashval, struct ipfw_dyn_info *info,
uint16_t fibnum, uint16_t kidx, uint8_t type)
uint16_t fibnum, uint32_t kidx, uint8_t type)
{
struct dyn_ipv6_state *s;
struct dyn_data *data;
@ -1768,7 +1774,7 @@ dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum,
static void *
dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
struct ip_fw *rule, uint32_t hashval, uint32_t limit, uint16_t kidx)
struct ip_fw *rule, uint32_t hashval, uint32_t limit, uint32_t kidx)
{
char sbuf[24];
struct dyn_parent *p;
@ -1862,7 +1868,7 @@ static int
dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
uint16_t fibnum, const void *ulp, int pktlen, struct ip_fw *rule,
struct ipfw_dyn_info *info, uint32_t limit, uint16_t limit_mask,
uint16_t kidx, uint8_t type)
uint32_t kidx, uint8_t type)
{
struct ipfw_flow_id id;
uint32_t hashval, parent_hashval, ruleid, rulenum;
@ -1999,12 +2005,16 @@ ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
limit = 0;
limit_mask = 0;
}
/*
* NOTE: we assume that kidx field of struct ipfw_insn_kidx
* located in the same place as kidx field of ipfw_insn_limit.
*/
return (dyn_install_state(&args->f_id,
#ifdef INET6
IS_IP6_FLOW_ID(&args->f_id) ? dyn_getscopeid(args):
#endif
0, M_GETFIB(args->m), ulp, pktlen, rule, info, limit,
limit_mask, cmd->o.arg1, cmd->o.opcode));
limit_mask, cmd->kidx, cmd->o.opcode));
}
/*
@ -2108,7 +2118,7 @@ dyn_free_states(struct ip_fw_chain *chain)
* dynamic states.
*/
static int
dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt)
dyn_match_range(uint32_t rulenum, uint8_t set, const ipfw_range_tlv *rt)
{
MPASS(rt != NULL);
@ -2130,7 +2140,7 @@ dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt)
static void
dyn_acquire_rule(struct ip_fw_chain *ch, struct dyn_data *data,
struct ip_fw *rule, uint16_t kidx)
struct ip_fw *rule, uint32_t kidx)
{
struct dyn_state_obj *obj;
@ -2157,7 +2167,7 @@ dyn_acquire_rule(struct ip_fw_chain *ch, struct dyn_data *data,
static void
dyn_release_rule(struct ip_fw_chain *ch, struct dyn_data *data,
struct ip_fw *rule, uint16_t kidx)
struct ip_fw *rule, uint32_t kidx)
{
struct dyn_state_obj *obj;
@ -2819,8 +2829,8 @@ ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
* Pass through all states and reset eaction for orphaned rules.
*/
void
ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
uint16_t default_id, uint16_t instance_id)
ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint32_t eaction_id,
uint32_t default_id, uint32_t instance_id)
{
#ifdef INET6
struct dyn_ipv6_state *s6;
@ -2931,70 +2941,47 @@ ipfw_is_dyn_rule(struct ip_fw *rule)
}
static void
dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, uint8_t set,
dyn_export_parent(const struct dyn_parent *p, uint32_t kidx, uint8_t set,
ipfw_dyn_rule *dst)
{
dst->dyn_type = O_LIMIT_PARENT;
dst->type = O_LIMIT_PARENT;
dst->set = set;
dst->kidx = kidx;
dst->count = (uint16_t)DPARENT_COUNT(p);
dst->rulenum = p->rulenum;
dst->count = DPARENT_COUNT(p);
dst->expire = TIME_LEQ(p->expire, time_uptime) ? 0:
p->expire - time_uptime;
/* 'rule' is used to pass up the rule number and set */
memcpy(&dst->rule, &p->rulenum, sizeof(p->rulenum));
/* store set number into high word of dst->rule pointer. */
memcpy((char *)&dst->rule + sizeof(p->rulenum), &set, sizeof(set));
dst->hashval = p->hashval;
/* unused fields */
dst->pad = 0;
dst->pcnt = 0;
dst->bcnt = 0;
dst->parent = NULL;
dst->state = 0;
dst->ack_fwd = 0;
dst->ack_rev = 0;
dst->bucket = p->hashval;
/*
* The legacy userland code will interpret a NULL here as a marker
* for the last dynamic rule.
*/
dst->next = (ipfw_dyn_rule *)1;
}
static void
dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type,
dyn_export_data(const struct dyn_data *data, uint32_t kidx, uint8_t type,
uint8_t set, ipfw_dyn_rule *dst)
{
dst->dyn_type = type;
dst->type = type;
dst->set = set;
dst->kidx = kidx;
dst->rulenum = data->rulenum;
dst->pcnt = data->pcnt_fwd + data->pcnt_rev;
dst->bcnt = data->bcnt_fwd + data->bcnt_rev;
dst->expire = TIME_LEQ(data->expire, time_uptime) ? 0:
data->expire - time_uptime;
/* 'rule' is used to pass up the rule number and set */
memcpy(&dst->rule, &data->rulenum, sizeof(data->rulenum));
/* store set number into high word of dst->rule pointer. */
memcpy((char *)&dst->rule + sizeof(data->rulenum), &set, sizeof(set));
dst->state = data->state;
if (data->flags & DYN_REFERENCED)
dst->state |= IPFW_DYN_ORPHANED;
/* unused fields */
dst->parent = NULL;
dst->ack_fwd = data->ack_fwd;
dst->ack_rev = data->ack_rev;
dst->count = 0;
dst->bucket = data->hashval;
/*
* The legacy userland code will interpret a NULL here as a marker
* for the last dynamic rule.
*/
dst->next = (ipfw_dyn_rule *)1;
dst->hashval = data->hashval;
}
static void
@ -3122,52 +3109,6 @@ ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd)
#undef DYN_EXPORT_STATES
}
/*
* Fill given buffer with dynamic states (legacy format).
* IPFW_UH_RLOCK has to be held while calling.
*/
void
ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep)
{
#ifdef INET6
struct dyn_ipv6_state *s6;
#endif
struct dyn_ipv4_state *s4;
ipfw_dyn_rule *p, *last = NULL;
char *bp;
uint32_t bucket;
if (V_dyn_count == 0)
return;
bp = *pbp;
IPFW_UH_RLOCK_ASSERT(chain);
#define DYN_EXPORT_STATES(s, af, head, b) \
CK_SLIST_FOREACH(s, &V_dyn_ ## head[b], entry) { \
if (bp + sizeof(*p) > ep) \
break; \
p = (ipfw_dyn_rule *)bp; \
dyn_export_ ## af ## _state(s, p); \
last = p; \
bp += sizeof(*p); \
}
for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket);
DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket);
#ifdef INET6
DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket);
DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket);
#endif /* INET6 */
}
if (last != NULL) /* mark last dynamic rule */
last->next = NULL;
*pbp = bp;
#undef DYN_EXPORT_STATES
}
void
ipfw_dyn_init(struct ip_fw_chain *chain)
{

View file

@ -1,6 +1,8 @@
/*-
* Copyright (c) 2016-2017 Yandex LLC
* Copyright (c) 2016-2017 Andrey V. Elsukov <ae@FreeBSD.org>
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2016-2025 Yandex LLC
* Copyright (c) 2016-2025 Andrey V. Elsukov <ae@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -68,7 +70,7 @@
* It is possible to pass some additional information to external
* action handler using O_EXTERNAL_INSTANCE and O_EXTERNAL_DATA opcodes.
* Such opcodes should be next after the O_EXTERNAL_ACTION opcode.
* For the O_EXTERNAL_INSTANCE opcode the cmd->arg1 contains index of named
* For the O_EXTERNAL_INSTANCE opcode the cmd->kidx contains index of named
* object related to an instance of external action.
* For the O_EXTERNAL_DATA opcode the cmd contains the data that can be used
* by external action handler without needing to create named instance.
@ -76,7 +78,7 @@
* In case when eaction module uses named instances, it should register
* opcode rewriting routines for O_EXTERNAL_INSTANCE opcode. The
* classifier callback can look back into O_EXTERNAL_ACTION opcode (it
* must be in the (ipfw_insn *)(cmd - 1)). By arg1 from O_EXTERNAL_ACTION
* must be in the (ipfw_insn *)(cmd - 2)). By kidx from O_EXTERNAL_ACTION
* it can deteremine eaction_id and compare it with its own.
* The macro IPFW_TLV_EACTION_NAME(eaction_id) can be used to deteremine
* the type of named_object related to external action instance.
@ -92,7 +94,7 @@ struct eaction_obj {
};
#define EACTION_OBJ(ch, cmd) \
((struct eaction_obj *)SRV_OBJECT((ch), (cmd)->arg1))
((struct eaction_obj *)SRV_OBJECT((ch), insntod((cmd), kidx)->kidx))
#if 0
#define EACTION_DEBUG(fmt, ...) do { \
@ -116,21 +118,28 @@ default_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
* Opcode rewriting callbacks.
*/
static int
eaction_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
eaction_classify(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
ipfw_insn_kidx *cmd;
EACTION_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
*puidx = cmd->arg1;
if (F_LEN(cmd0) <= 1)
return (EINVAL);
cmd = insntod(cmd0, kidx);
EACTION_DEBUG("opcode %u, kidx %u", cmd0->opcode, cmd->kidx);
*puidx = cmd->kidx;
*ptype = 0;
return (0);
}
static void
eaction_update(ipfw_insn *cmd, uint16_t idx)
eaction_update(ipfw_insn *cmd0, uint32_t idx)
{
ipfw_insn_kidx *cmd;
cmd->arg1 = idx;
EACTION_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
cmd = insntod(cmd0, kidx);
cmd->kidx = idx;
EACTION_DEBUG("opcode %u, kidx -> %u", cmd0->opcode, cmd->kidx);
}
static int
@ -162,7 +171,7 @@ eaction_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
}
static struct named_object *
eaction_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
eaction_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
{
EACTION_DEBUG("kidx %u", idx);
@ -182,7 +191,7 @@ static struct opcode_obj_rewrite eaction_opcodes[] = {
static int
create_eaction_obj(struct ip_fw_chain *ch, ipfw_eaction_t handler,
const char *name, uint16_t *eaction_id)
const char *name, uint32_t *eaction_id)
{
struct namedobj_instance *ni;
struct eaction_obj *obj;
@ -249,8 +258,8 @@ destroy_eaction_obj(struct ip_fw_chain *ch, struct named_object *no)
* Resets all eaction opcodes to default handlers.
*/
static void
reset_eaction_rules(struct ip_fw_chain *ch, uint16_t eaction_id,
uint16_t instance_id, bool reset_rules)
reset_eaction_rules(struct ip_fw_chain *ch, uint32_t eaction_id,
uint32_t instance_id, bool reset_rules)
{
struct named_object *no;
int i;
@ -332,11 +341,11 @@ ipfw_eaction_uninit(struct ip_fw_chain *ch, int last)
* Registers external action handler to the global array.
* On success it returns eaction id, otherwise - zero.
*/
uint16_t
uint32_t
ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
const char *name)
{
uint16_t eaction_id;
uint32_t eaction_id;
eaction_id = 0;
if (ipfw_check_object_name_generic(name) == 0) {
@ -351,7 +360,7 @@ ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
* Deregisters external action handler with id eaction_id.
*/
int
ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id)
ipfw_del_eaction(struct ip_fw_chain *ch, uint32_t eaction_id)
{
struct named_object *no;
@ -371,7 +380,7 @@ ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id)
int
ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
uint16_t eaction_id, uint16_t default_id, uint16_t instance_id)
uint32_t eaction_id, uint32_t default_id, uint32_t instance_id)
{
ipfw_insn *cmd, *icmd;
int l;
@ -385,22 +394,23 @@ ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
*/
cmd = ipfw_get_action(rule);
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != eaction_id)
insntod(cmd, kidx)->kidx != eaction_id)
return (0);
/*
* Check if there is O_EXTERNAL_INSTANCE opcode, we need
* to truncate the rule length.
*
* NOTE: F_LEN(cmd) must be 1 for O_EXTERNAL_ACTION opcode,
* NOTE: F_LEN(cmd) must be 2 for O_EXTERNAL_ACTION opcode,
* and rule length should be enough to keep O_EXTERNAL_INSTANCE
* opcode, thus we do check for l > 1.
* opcode, thus we do check for l > 2.
*/
l = rule->cmd + rule->cmd_len - cmd;
if (l > 1) {
MPASS(F_LEN(cmd) == 1);
icmd = cmd + 1;
if (l > 2) {
MPASS(F_LEN(cmd) == 2);
icmd = cmd + F_LEN(cmd);
if (icmd->opcode == O_EXTERNAL_INSTANCE &&
instance_id != 0 && icmd->arg1 != instance_id)
instance_id != 0 &&
insntod(icmd, kidx)->kidx != instance_id)
return (0);
/*
* Since named_object related to this instance will be
@ -408,7 +418,7 @@ ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
* the rest of cmd chain just after O_EXTERNAL_ACTION
* opcode.
*/
EACTION_DEBUG("truncate rule %d: len %u -> %u",
EACTION_DEBUG("truncate rule %u: len %u -> %u",
rule->rulenum, rule->cmd_len,
rule->cmd_len - F_LEN(icmd));
rule->cmd_len -= F_LEN(icmd);
@ -416,7 +426,7 @@ ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
(uint32_t *)rule->cmd) == rule->cmd_len);
}
cmd->arg1 = default_id; /* Set to default id */
insntod(cmd, kidx)->kidx = default_id; /* Set to default id */
/*
* Return 1 when reset successfully happened.
*/
@ -429,8 +439,8 @@ ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
* eaction has instance with id == kidx.
*/
int
ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id,
uint16_t kidx)
ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint32_t eaction_id,
uint32_t kidx)
{
struct named_object *no;
@ -448,5 +458,6 @@ ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done)
{
MPASS(F_LEN(cmd) == 2);
return (EACTION_OBJ(ch, cmd)->handler(ch, args, cmd, done));
}

View file

@ -1,5 +1,7 @@
/*-
* Copyright (c) 2014 Yandex LLC.
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2014-2025 Yandex LLC.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -69,7 +71,7 @@ static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd);
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_XIFLIST, 0, HDIR_GET, list_ifaces },
{ IP_FW_XIFLIST, IP_FW3_OPVER, HDIR_GET, list_ifaces },
};
/*
@ -231,7 +233,7 @@ vnet_ipfw_iface_init(struct ip_fw_chain *ch)
{
struct namedobj_instance *ii;
ii = ipfw_objhash_create(DEFAULT_IFACES);
ii = ipfw_objhash_create(DEFAULT_IFACES, DEFAULT_OBJHASH_SIZE);
IPFW_UH_WLOCK(ch);
if (ch->ifcfg == NULL) {
ch->ifcfg = ii;
@ -485,7 +487,7 @@ export_iface_internal(struct namedobj_instance *ii, struct named_object *no,
/*
* Lists all interface currently tracked by ipfw.
* Data layout (v0)(current):
* Data layout (v1)(current):
* Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
* Reply: [ ipfw_obj_lheader ipfw_iface_info x N ]
*

View file

@ -46,9 +46,12 @@
#include <sys/syslog.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_var.h>
#include <net/if_private.h>
#include <net/vnet.h>
#include <net/route.h>
#include <net/route/route_var.h>
#include <netinet/in.h>
#include <netinet/ip.h>
@ -92,42 +95,43 @@
#endif /* !__APPLE__ */
#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f)
static void
ipfw_log_ipfw0(struct ip_fw_args *args, struct ip *ip)
{
if (args->flags & IPFW_ARGS_LENMASK)
ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags));
else if (args->flags & IPFW_ARGS_ETHER)
/* layer2, use orig hdr */
ipfw_bpf_mtap(args->m);
else {
/* Add fake header. Later we will store
* more info in the header.
*/
if (ip->ip_v == 4)
ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
ETHER_HDR_LEN, args->m);
else if (ip->ip_v == 6)
ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
ETHER_HDR_LEN, args->m);
else
/* Obviously bogus EtherType. */
ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
ETHER_HDR_LEN, args->m);
}
}
/*
* We enter here when we have a rule with O_LOG.
* XXX this function alone takes about 2Kbytes of code!
*/
void
ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
static void
ipfw_log_syslog(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip)
{
char *action;
int limit_reached = 0;
char action2[92], proto[128], fragment[32], mark_str[24];
if (V_fw_verbose == 0) {
if (args->flags & IPFW_ARGS_LENMASK)
ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags));
else if (args->flags & IPFW_ARGS_ETHER)
/* layer2, use orig hdr */
ipfw_bpf_mtap(args->m);
else {
/* Add fake header. Later we will store
* more info in the header.
*/
if (ip->ip_v == 4)
ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
ETHER_HDR_LEN, args->m);
else if (ip->ip_v == 6)
ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
ETHER_HDR_LEN, args->m);
else
/* Obviously bogus EtherType. */
ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
ETHER_HDR_LEN, args->m);
}
return;
}
/* the old 'log' function */
fragment[0] = '\0';
proto[0] = '\0';
@ -210,7 +214,7 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
break;
case O_SKIPTO:
snprintf(SNPARGS(action2, 0), "SkipTo %d",
TARG(cmd->arg1, skipto));
TARG(insntod(cmd, u32)->d[0], skipto));
break;
case O_PIPE:
snprintf(SNPARGS(action2, 0), "Pipe %d",
@ -269,23 +273,25 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
break;
case O_CALLRETURN:
if (cmd->len & F_NOT)
action = "Return";
snprintf(SNPARGS(action2, 0), "Return %s",
cmd->arg1 == RETURN_NEXT_RULENUM ?
"next-rulenum": "next-rule");
else
snprintf(SNPARGS(action2, 0), "Call %d",
cmd->arg1);
TARG(insntod(cmd, u32)->d[0], skipto));
break;
case O_SETMARK:
if (cmd->arg1 == IP_FW_TARG)
snprintf(SNPARGS(action2, 0), "SetMark %#x",
snprintf(SNPARGS(action2, 0), "SetMark %#010x",
TARG(cmd->arg1, mark));
else
snprintf(SNPARGS(action2, 0), "SetMark %#x",
((ipfw_insn_u32 *)cmd)->d[0]);
snprintf(SNPARGS(action2, 0), "SetMark %#010x",
insntoc(cmd, u32)->d[0]);
break;
case O_EXTERNAL_ACTION:
snprintf(SNPARGS(action2, 0), "Eaction %s",
((struct named_object *)SRV_OBJECT(chain,
cmd->arg1))->name);
insntod(cmd, kidx)->kidx))->name);
break;
default:
action = "UNKNOWN";
@ -438,4 +444,245 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
"ipfw: limit %d reached on entry %d\n",
limit_reached, f ? f->rulenum : -1);
}
static void
ipfw_rtsocklog_fill_l3(struct ip_fw_args *args,
char **buf, struct sockaddr **src, struct sockaddr **dst)
{
struct sockaddr_in *v4src, *v4dst;
#ifdef INET6
struct sockaddr_in6 *v6src, *v6dst;
if (IS_IP6_FLOW_ID(&(args->f_id))) {
v6src = (struct sockaddr_in6 *)*buf;
*buf += sizeof(*v6src);
v6dst = (struct sockaddr_in6 *)*buf;
*buf += sizeof(*v6dst);
v6src->sin6_len = v6dst->sin6_len = sizeof(*v6src);
v6src->sin6_family = v6dst->sin6_family = AF_INET6;
v6src->sin6_addr = args->f_id.src_ip6;
v6dst->sin6_addr = args->f_id.dst_ip6;
*src = (struct sockaddr *)v6src;
*dst = (struct sockaddr *)v6dst;
} else
#endif
{
v4src = (struct sockaddr_in *)*buf;
*buf += sizeof(*v4src);
v4dst = (struct sockaddr_in *)*buf;
*buf += sizeof(*v4dst);
v4src->sin_len = v4dst->sin_len = sizeof(*v4src);
v4src->sin_family = v4dst->sin_family = AF_INET;
v4src->sin_addr.s_addr = htonl(args->f_id.src_ip);
v4dst->sin_addr.s_addr = htonl(args->f_id.dst_ip);
*src = (struct sockaddr *)v4src;
*dst = (struct sockaddr *)v4dst;
}
}
static struct sockaddr *
ipfw_rtsocklog_handle_tablearg(struct ip_fw_chain *chain, ipfw_insn *cmd,
uint32_t tablearg, uint32_t *targ_value, char **buf)
{
struct sockaddr_in *v4nh = NULL;
/* handle tablearg now */
switch (cmd->opcode) {
case O_DIVERT:
case O_TEE:
*targ_value = TARG(cmd->arg1, divert);
break;
case O_NETGRAPH:
case O_NGTEE:
*targ_value = TARG(cmd->arg1, netgraph);
break;
case O_SETDSCP:
*targ_value = (TARG(cmd->arg1, dscp) & 0x3F);
break;
case O_SETFIB:
*targ_value = (TARG(cmd->arg1, fib) & 0x7FFF);
break;
case O_SKIPTO:
case O_CALLRETURN:
if (cmd->opcode == O_CALLRETURN && (cmd->len & F_NOT))
break;
*targ_value = (TARG(insntod(cmd, u32)->d[0], skipto));
break;
case O_PIPE:
case O_QUEUE:
*targ_value = TARG(cmd->arg1, pipe);
break;
case O_MARK:
*targ_value = TARG(cmd->arg1, mark);
break;
case O_FORWARD_IP:
v4nh = (struct sockaddr_in *)buf;
buf += sizeof(*v4nh);
*v4nh = ((ipfw_insn_sa *)cmd)->sa;
if (v4nh->sin_addr.s_addr == INADDR_ANY)
v4nh->sin_addr.s_addr = htonl(tablearg);
return (struct sockaddr *)v4nh;
#ifdef INET6
case O_FORWARD_IP6:
return (struct sockaddr *)&(((ipfw_insn_sa6 *)cmd)->sa);
#endif
default:
break;
}
return (NULL);
}
#define MAX_COMMENT_LEN 80
static size_t
ipfw_copy_rule_comment(struct ip_fw *f, char *dst)
{
ipfw_insn *cmd;
size_t rcomment_len = 0;
int l, cmdlen;
for (l = f->cmd_len, cmd = f->cmd; l > 0; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
if (cmd->opcode != O_NOP) {
continue;
} else if (cmd->len == 1) {
return (0);
}
break;
}
if (l <= 0) {
return (0);
}
rcomment_len = strnlen((char *)(cmd + 1), MAX_COMMENT_LEN - 1) + 1;
strlcpy(dst, (char *)(cmd + 1), rcomment_len);
return (rcomment_len);
}
static void
ipfw_log_rtsock(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, u_short offset, uint32_t tablearg,
void *_eh)
{
struct sockaddr_dl *sdl_ipfwcmd;
struct ether_header *eh = _eh;
struct rt_addrinfo *info;
uint32_t *targ_value;
ipfwlog_rtsock_hdr_v2 *hdr;
ipfw_insn *cmd;
ipfw_insn_log *l;
char *buf, *orig_buf;
/* at least 4 x sizeof(struct sockaddr_dl) + rule comment (80) */
size_t buflen = 512;
/* Should we log? O_LOG is the first one */
cmd = ACTION_PTR(f);
l = (ipfw_insn_log *)cmd;
if (l->max_log != 0 && l->log_left == 0)
return;
l->log_left--;
if (V_fw_verbose != 0 && l->log_left == 0) {
log(LOG_SECURITY | LOG_NOTICE,
"ipfw: limit %d reached on entry %d\n",
l->max_log, f ? f->rulenum : -1);
}
buf = orig_buf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
if (buf == NULL)
return;
info = (struct rt_addrinfo *)buf;
buf += sizeof (*info);
cmd = ipfw_get_action(f);
sdl_ipfwcmd = (struct sockaddr_dl *)buf;
sdl_ipfwcmd->sdl_family = AF_IPFWLOG;
sdl_ipfwcmd->sdl_index = f->set;
sdl_ipfwcmd->sdl_type = 2; /* version */
sdl_ipfwcmd->sdl_alen = sizeof(*hdr);
hdr = (ipfwlog_rtsock_hdr_v2 *)(sdl_ipfwcmd->sdl_data);
/* fill rule comment in if any */
sdl_ipfwcmd->sdl_nlen = ipfw_copy_rule_comment(f, hdr->comment);
targ_value = &hdr->tablearg;
hdr->rulenum = f->rulenum;
hdr->mark = args->rule.pkt_mark;
hdr->cmd = *cmd;
sdl_ipfwcmd->sdl_len = sizeof(*sdl_ipfwcmd);
if (sizeof(*hdr) + sdl_ipfwcmd->sdl_nlen > sizeof(sdl_ipfwcmd->sdl_data)) {
sdl_ipfwcmd->sdl_len += sizeof(*hdr) + sdl_ipfwcmd->sdl_nlen -
sizeof(sdl_ipfwcmd->sdl_data);
}
buf += sdl_ipfwcmd->sdl_len;
/* fill L2 in if present */
if (args->flags & IPFW_ARGS_ETHER && eh != NULL) {
sdl_ipfwcmd->sdl_slen = sizeof(eh->ether_shost);
memcpy(hdr->ether_shost, eh->ether_shost,
sdl_ipfwcmd->sdl_slen);
memcpy(hdr->ether_dhost, eh->ether_dhost,
sdl_ipfwcmd->sdl_slen);
}
info->rti_info[RTAX_DST] = (struct sockaddr *)sdl_ipfwcmd;
/* Warn if we're about to stop sending messages */
if (l->max_log != 0 && l->log_left < (l->max_log >> 1)) {
info->rti_flags |= RTF_PROTO1;
}
/* handle tablearg */
info->rti_info[RTAX_GENMASK] = ipfw_rtsocklog_handle_tablearg(
chain, cmd, tablearg, targ_value, &buf);
/* L3 */
ipfw_rtsocklog_fill_l3(args, &buf,
&info->rti_info[RTAX_GATEWAY],
&info->rti_info[RTAX_NETMASK]);
info->rti_ifp = args->ifp;
rtsock_routemsg_info(RTM_IPFWLOG, info, RT_ALL_FIBS);
free(orig_buf, M_TEMP);
}
/*
* We enter here when we have a rule with O_LOG.
*/
void
ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, u_short offset, uint32_t tablearg,
struct ip *ip, void *eh)
{
ipfw_insn *cmd;
if (f == NULL || hlen == 0)
return;
/* O_LOG is the first action */
cmd = ACTION_PTR(f);
if (cmd->arg1 == IPFW_LOG_DEFAULT) {
if (V_fw_verbose == 0) {
ipfw_log_ipfw0(args, ip);
return;
}
ipfw_log_syslog(chain, f, hlen, args, offset, tablearg, ip);
return;
}
if (cmd->arg1 & IPFW_LOG_SYSLOG)
ipfw_log_syslog(chain, f, hlen, args, offset, tablearg, ip);
if (cmd->arg1 & IPFW_LOG_RTSOCK)
ipfw_log_rtsock(chain, f, hlen, args, offset, tablearg, eh);
if (cmd->arg1 & IPFW_LOG_IPFW0)
ipfw_log_ipfw0(args, ip);
}
/* end of file */

View file

@ -881,11 +881,11 @@ nat44_get_log(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
}
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT44_XCONFIG, 0, HDIR_SET, nat44_cfg },
{ IP_FW_NAT44_DESTROY, 0, HDIR_SET, nat44_destroy },
{ IP_FW_NAT44_XGETCONFIG, 0, HDIR_GET, nat44_get_cfg },
{ IP_FW_NAT44_LIST_NAT, 0, HDIR_GET, nat44_list_nat },
{ IP_FW_NAT44_XGETLOG, 0, HDIR_GET, nat44_get_log },
{ IP_FW_NAT44_XCONFIG, IP_FW3_OPVER, HDIR_SET, nat44_cfg },
{ IP_FW_NAT44_DESTROY, IP_FW3_OPVER, HDIR_SET, nat44_destroy },
{ IP_FW_NAT44_XGETCONFIG, IP_FW3_OPVER, HDIR_GET, nat44_get_cfg },
{ IP_FW_NAT44_LIST_NAT, IP_FW3_OPVER, HDIR_GET, nat44_list_nat },
{ IP_FW_NAT44_XGETLOG, IP_FW3_OPVER, HDIR_GET, nat44_get_log },
};
/*

View file

@ -156,6 +156,7 @@ void ipfw_nat_destroy(void);
/* In ip_fw_log.c */
struct ip;
struct ip_fw;
struct ip_fw_chain;
void ipfw_bpf_init(int);
@ -164,7 +165,8 @@ void ipfw_bpf_tap(u_char *, u_int);
void ipfw_bpf_mtap(struct mbuf *);
void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip);
struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip,
void *eh);
VNET_DECLARE(u_int64_t, norule_counter);
#define V_norule_counter VNET(norule_counter)
VNET_DECLARE(int, verbose_limit);
@ -196,8 +198,8 @@ enum { /* result for matching dynamic rules */
(p)->kidx = 0; \
} while (0)
struct ipfw_dyn_info {
uint16_t direction; /* match direction */
uint16_t kidx; /* state name kidx */
uint32_t direction; /* match direction */
uint32_t kidx; /* state name kidx */
uint32_t hashval; /* hash value */
uint32_t version; /* bucket version */
uint32_t f_pos;
@ -219,8 +221,8 @@ void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */
void ipfw_dyn_uninit(int); /* per-vnet deinitialization */
int ipfw_dyn_len(void);
uint32_t ipfw_dyn_get_count(uint32_t *, int *);
void ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
uint16_t default_id, uint16_t instance_id);
void ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint32_t eaction_id,
uint32_t default_id, uint32_t instance_id);
/* common variables */
VNET_DECLARE(int, fw_one_pass);
@ -235,6 +237,9 @@ VNET_DECLARE(struct ip_fw_chain, layer3_chain);
VNET_DECLARE(int, ipfw_vnet_ready);
#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
VNET_DECLARE(int, skipto_cache);
#define V_skipto_cache VNET(skipto_cache)
VNET_DECLARE(u_int32_t, set_disable);
#define V_set_disable VNET(set_disable)
@ -276,9 +281,10 @@ struct ip_fw_jump_cache {
struct ip_fw {
uint16_t act_ofs; /* offset of action in 32-bit units */
uint16_t cmd_len; /* # of 32-bit words in cmd */
uint16_t rulenum; /* rule number */
uint32_t rulenum; /* rule number */
uint8_t set; /* rule set (0..31) */
uint8_t flags; /* currently unused */
uint16_t _pad;
counter_u64_t cntr; /* Pointer to rule counters */
struct ip_fw_jump_cache cache; /* used by jump_fast */
uint32_t timestamp; /* tv_sec of last match */
@ -306,7 +312,6 @@ struct ip_fw_chain {
#else
struct rmlock rwmtx;
#endif
int static_len; /* total len of static rules (v0) */
uint32_t gencnt; /* NAT generation count */
LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
struct ip_fw *default_rule;
@ -324,9 +329,9 @@ struct ip_fw_chain {
/* 64-byte structure representing multi-field table value */
struct table_value {
uint32_t tag; /* O_TAG/O_TAGGED */
uint32_t pipe; /* O_PIPE/O_QUEUE */
uint16_t pipe; /* O_PIPE/O_QUEUE */
uint16_t divert; /* O_DIVERT/O_TEE */
uint16_t skipto; /* skipto, CALLRET */
uint32_t skipto; /* skipto, CALLRET */
uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */
uint16_t fib; /* O_SETFIB */
uint16_t nat; /* O_NAT */
@ -349,8 +354,7 @@ struct named_object {
uint16_t etlv; /* Export TLV id */
uint8_t subtype;/* object subtype within class */
uint8_t set; /* set object belongs to */
uint16_t kidx; /* object kernel index */
uint16_t spare;
uint32_t kidx; /* object kernel index */
uint32_t ocnt; /* object counter for internal use */
uint32_t refcnt; /* number of references */
};
@ -476,8 +480,8 @@ struct ipfw_ifc {
#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
struct obj_idx {
uint16_t uidx; /* internal index supplied by userland */
uint16_t kidx; /* kernel object index */
uint32_t uidx; /* internal index supplied by userland */
uint32_t kidx; /* kernel object index */
uint16_t off; /* tlv offset from rule end in 4-byte words */
uint8_t spare;
uint8_t type; /* object type within its category */
@ -495,36 +499,6 @@ struct rule_check_info {
struct obj_idx obuf[8]; /* table references storage */
};
/* Legacy interface support */
/*
* FreeBSD 8 export rule format
*/
struct ip_fw_rule0 {
struct ip_fw *x_next; /* linked list of rules */
struct ip_fw *next_rule; /* ptr to next [skipto] rule */
/* 'next_rule' is used to pass up 'set_disable' status */
uint16_t act_ofs; /* offset of action in 32-bit units */
uint16_t cmd_len; /* # of 32-bit words in cmd */
uint16_t rulenum; /* rule number */
uint8_t set; /* rule set (0..31) */
uint8_t _pad; /* padding */
uint32_t id; /* rule id */
/* These fields are present in all rules. */
uint64_t pcnt; /* Packet counter */
uint64_t bcnt; /* Byte counter */
uint32_t timestamp; /* tv_sec of last match */
ipfw_insn cmd[1]; /* storage for commands */
};
struct ip_fw_bcounter0 {
uint64_t pcnt; /* Packet counter */
uint64_t bcnt; /* Byte counter */
uint32_t timestamp; /* tv_sec of last match */
};
/* Kernel rule length */
/*
* RULE _K_ SIZE _V_ ->
@ -534,9 +508,6 @@ struct ip_fw_bcounter0 {
* RULESIZE _V_ ->
* get user size rule length
*/
/* FreeBSD8 <> current kernel format */
#define RULEUSIZE0(r) (sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4)
#define RULEKSIZE0(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
/* FreeBSD11 <> current kernel format */
#define RULEUSIZE1(r) (roundup2(sizeof(struct ip_fw_rule) + \
(r)->cmd_len * 4 - 4, 8))
@ -550,17 +521,17 @@ struct ip_fw_bcounter0 {
#define IPFW_TABLES_MAX 65536
#define IPFW_TABLES_DEFAULT 128
#define IPFW_OBJECTS_MAX 65536
#define IPFW_OBJECTS_DEFAULT 1024
#define IPFW_OBJECTS_DEFAULT 4096
#define CHAIN_TO_SRV(ch) ((ch)->srvmap)
#define SRV_OBJECT(ch, idx) ((ch)->srvstate[(idx)])
struct tid_info {
uint32_t set; /* table set */
uint16_t uidx; /* table index */
uint32_t uidx; /* table index */
uint8_t type; /* table type */
uint8_t atype;
uint8_t spare;
uint16_t spare;
int tlen; /* Total TLV size block */
void *tlvs; /* Pointer to first TLV */
};
@ -570,11 +541,11 @@ struct tid_info {
* If true, returns its index and type.
* Returns 0 if match is found, 1 overwise.
*/
typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint32_t *puidx, uint8_t *ptype);
/*
* Updater callback. Sets kernel object reference index to @puidx
*/
typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx);
typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint32_t puidx);
/*
* Finder callback. Tries to find named object by name (specified via @ti).
* Stores found named object pointer in @pno.
@ -590,7 +561,7 @@ typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch,
* Returns pointer to named object or NULL.
*/
typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
uint16_t kidx);
uint32_t kidx);
/*
* Object creator callback. Tries to create object specified by @ti.
* Stores newly-allocated object index in @pkidx.
@ -598,7 +569,7 @@ typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
* Returns 0 on success.
*/
typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti,
uint16_t *pkidx);
uint32_t *pkidx);
/*
* Object destroy callback. Intended to free resources allocated by
* create_object callback.
@ -618,7 +589,7 @@ enum ipfw_sets_cmd {
SWAP_ALL = 0, TEST_ALL, MOVE_ALL, COUNT_ONE, TEST_ONE, MOVE_ONE
};
typedef int (ipfw_obj_sets_cb)(struct ip_fw_chain *ch,
uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
uint32_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
struct opcode_obj_rewrite {
uint32_t opcode; /* Opcode to act upon */
@ -654,8 +625,23 @@ void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
/* In ip_fw_sockopt.c */
enum ipfw_opcheck_result {
SUCCESS = 0,
FAILED,
BAD_SIZE,
CHECK_ACTION,
};
typedef enum ipfw_opcheck_result (*ipfw_check_opcode_t)(ipfw_insn **,
int *, struct rule_check_info *);
void ipfw_register_compat(ipfw_check_opcode_t);
void ipfw_unregister_compat(void);
enum ipfw_opcheck_result ipfw_check_opcode(ipfw_insn **, int *,
struct rule_check_info *);
void ipfw_init_skipto_cache(struct ip_fw_chain *chain);
void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain);
void ipfw_enable_skipto_cache(struct ip_fw_chain *chain);
int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
int ipfw_ctl3(struct sockopt *sopt);
int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
@ -665,11 +651,16 @@ void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
void ipfw_reap_rules(struct ip_fw *head);
void ipfw_init_counters(void);
void ipfw_destroy_counters(void);
int ipfw_commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci,
int count);
int delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel);
struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
void ipfw_free_rule(struct ip_fw *rule);
int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx);
int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint32_t kidx);
ipfw_insn *ipfw_get_action(struct ip_fw *);
int ipfw_check_rule(struct ip_fw_rule *rule, size_t size,
struct rule_check_info *ci);
typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
ip_fw3_opheader *op3, struct sockopt_data *sd);
@ -701,6 +692,7 @@ caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed);
sizeof(c) / sizeof(c[0])); \
} while(0)
#define DEFAULT_OBJHASH_SIZE 32
struct namedobj_instance;
typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *,
void *arg);
@ -708,7 +700,7 @@ typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key,
uint32_t kopt);
typedef int (objhash_cmp_f)(struct named_object *no, const void *key,
uint32_t kopt);
struct namedobj_instance *ipfw_objhash_create(uint32_t items);
struct namedobj_instance *ipfw_objhash_create(uint32_t items, size_t hash_size);
void ipfw_objhash_destroy(struct namedobj_instance *);
void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks);
void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni,
@ -722,7 +714,7 @@ struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni,
struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni,
uint32_t set, uint32_t type, const char *name);
struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni,
uint16_t idx);
uint32_t idx);
int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
struct named_object *b);
void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no);
@ -733,14 +725,14 @@ int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f,
void *arg);
int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
void *arg, uint16_t type);
int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx);
int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx);
int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint32_t idx);
int ipfw_objhash_alloc_idx(void *n, uint32_t *pidx);
void ipfw_objhash_set_funcs(struct namedobj_instance *ni,
objhash_hash_f *hash_f, objhash_cmp_f *cmp_f);
int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
uint32_t etlv, struct named_object **pno);
void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv);
ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx,
ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint32_t uidx,
uint32_t etlv);
void ipfw_init_obj_rewriter(void);
void ipfw_destroy_obj_rewriter(void);
@ -749,13 +741,13 @@ int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti);
void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx);
int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx);
void update_opcode_kidx(ipfw_insn *cmd, uint32_t idx);
int classify_opcode_kidx(ipfw_insn *cmd, uint32_t *puidx);
void ipfw_init_srv(struct ip_fw_chain *ch);
void ipfw_destroy_srv(struct ip_fw_chain *ch);
int ipfw_check_object_name_generic(const char *name);
int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
uint32_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
/* In ip_fw_eaction.c */
typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args,
@ -763,15 +755,15 @@ typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args,
int ipfw_eaction_init(struct ip_fw_chain *ch, int first);
void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last);
uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
uint32_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
const char *name);
int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id);
int ipfw_del_eaction(struct ip_fw_chain *ch, uint32_t eaction_id);
int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done);
int ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
uint16_t eaction_id, uint16_t default_id, uint16_t instance_id);
int ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id,
uint16_t instance_id);
uint32_t eaction_id, uint32_t default_id, uint32_t instance_id);
int ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint32_t eaction_id,
uint32_t instance_id);
/* In ip_fw_table.c */
struct table_info;
@ -779,12 +771,12 @@ struct table_info;
typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
uint32_t *val);
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
int ipfw_lookup_table(struct ip_fw_chain *ch, uint32_t tbl, uint16_t plen,
void *paddr, uint32_t *val);
struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
uint16_t kidx);
int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
uint32_t kidx);
int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint32_t *kidx);
void ipfw_unref_table(struct ip_fw_chain *ch, uint32_t kidx);
int ipfw_init_tables(struct ip_fw_chain *ch, int first);
int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
* Copyright (c) 2014 Yandex LLC
* Copyright (c) 2014-2024 Yandex LLC
* Copyright (c) 2014 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
@ -98,7 +98,7 @@ static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
static void free_table_config(struct namedobj_instance *ni,
struct table_config *tc);
static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int ref);
static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
@ -110,7 +110,6 @@ static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
ipfw_xtable_info *i);
static int dump_table_tentry(void *e, void *arg);
static int dump_table_xentry(void *e, void *arg);
static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
struct tid_info *b);
@ -257,7 +256,7 @@ store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
*/
static int
create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
uint16_t *pkidx)
uint32_t *pkidx)
{
ipfw_xtable_info xi;
int error;
@ -289,7 +288,7 @@ find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
{
struct namedobj_instance *ni;
struct table_config *tc;
uint16_t kidx;
uint32_t kidx;
int error;
IPFW_UH_WLOCK_ASSERT(ch);
@ -330,7 +329,7 @@ find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
return (error);
tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
KASSERT(tc != NULL, ("create_table_compat returned bad idx %u", kidx));
/* OK, now we've got referenced table. */
*ptc = tc;
@ -545,13 +544,12 @@ add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
{
struct table_config *tc;
struct table_algo *ta;
uint16_t kidx;
int error, first_error, i, rollback;
uint32_t num, numadd;
struct tentry_info *ptei;
struct tableop_state ts;
char ta_buf[TA_BUF_SZ];
caddr_t ta_buf_m, v;
uint32_t kidx, num, numadd;
int error, first_error, i, rollback;
memset(&ts, 0, sizeof(ts));
ta = NULL;
@ -717,11 +715,10 @@ del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
struct table_config *tc;
struct table_algo *ta;
struct tentry_info *ptei;
uint16_t kidx;
int error, first_error, i;
uint32_t num, numdel;
char ta_buf[TA_BUF_SZ];
caddr_t ta_buf_m, v;
uint32_t kidx, num, numdel;
int error, first_error, i;
/*
* Find and reference existing table.
@ -895,61 +892,6 @@ check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
return (error);
}
/*
* Adds or deletes record in table.
* Data layout (v0):
* Request: [ ip_fw3_opheader ipfw_table_xentry ]
*
* Returns 0 on success
*/
static int
manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_table_xentry *xent;
struct tentry_info tei;
struct tid_info ti;
struct table_value v;
int error, hdrlen, read;
hdrlen = offsetof(ipfw_table_xentry, k);
/* Check minimum header size */
if (sd->valsize < (sizeof(*op3) + hdrlen))
return (EINVAL);
read = sizeof(ip_fw3_opheader);
/* Check if xentry len field is valid */
xent = (ipfw_table_xentry *)(op3 + 1);
if (xent->len < hdrlen || xent->len + read > sd->valsize)
return (EINVAL);
memset(&tei, 0, sizeof(tei));
tei.paddr = &xent->k;
tei.masklen = xent->masklen;
ipfw_import_table_value_legacy(xent->value, &v);
tei.pvalue = &v;
/* Old requests compatibility */
tei.flags = TEI_FLAGS_COMPAT;
if (xent->type == IPFW_TABLE_ADDR) {
if (xent->len - hdrlen == sizeof(in_addr_t))
tei.subtype = AF_INET;
else
tei.subtype = AF_INET6;
}
memset(&ti, 0, sizeof(ti));
ti.uidx = xent->tbl;
ti.type = xent->type;
error = (op3->opcode == IP_FW_TABLE_XADD) ?
add_table_entry(ch, &ti, &tei, 0, 1) :
del_table_entry(ch, &ti, &tei, 0, 1);
return (error);
}
/*
* Adds or deletes record in table.
* Data layout (v1)(current):
@ -968,7 +910,8 @@ manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
ipfw_obj_header *oh;
struct tentry_info *ptei, tei, *tei_buf;
struct tid_info ti;
int error, i, kidx, read;
uint32_t kidx;
int error, i, read;
/* Check minimum header size */
if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
@ -1210,7 +1153,7 @@ flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
char algostate[64], *pstate;
struct tableop_state ts;
int error, need_gc;
uint16_t kidx;
uint32_t kidx;
uint8_t tflags;
/*
@ -1500,7 +1443,7 @@ destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
/* Free obj index */
if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
printf("Error unlinking kidx %d from table %s\n",
printf("Error unlinking kidx %u from table %s\n",
tc->no.kidx, tc->tablename);
/* Unref values used in tables while holding UH lock */
@ -1533,7 +1476,7 @@ ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
if (ntables > IPFW_TABLES_MAX)
ntables = IPFW_TABLES_MAX;
/* Alight to nearest power of 2 */
ntables = roundup_pow_of_two(ntables);
ntables = roundup_pow_of_two(ntables);
/* Allocate new pointers */
tablestate = malloc(ntables * sizeof(struct table_info),
@ -1595,7 +1538,7 @@ ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
* Lookup table's named object by its @kidx.
*/
struct named_object *
ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint32_t kidx)
{
return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
@ -1606,7 +1549,7 @@ ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
* On success return its @kidx.
*/
int
ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint32_t *kidx)
{
struct tid_info ti;
struct table_config *tc;
@ -1629,7 +1572,7 @@ ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
}
void
ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
ipfw_unref_table(struct ip_fw_chain *ch, uint32_t kidx)
{
struct namedobj_instance *ni;
@ -1638,7 +1581,7 @@ ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
IPFW_UH_WLOCK_ASSERT(ch);
ni = CHAIN_TO_NI(ch);
no = ipfw_objhash_lookup_kidx(ni, kidx);
KASSERT(no != NULL, ("Table with index %d not found", kidx));
KASSERT(no != NULL, ("Table with index %u not found", kidx));
no->refcnt--;
}
@ -1649,7 +1592,7 @@ ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
* Returns 1 if key was found.
*/
int
ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
ipfw_lookup_table(struct ip_fw_chain *ch, uint32_t tbl, uint16_t plen,
void *paddr, uint32_t *val)
{
struct table_info *ti;
@ -1853,12 +1796,12 @@ create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
*/
static int
create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
char *aname, ipfw_xtable_info *i, uint32_t *pkidx, int compat)
{
struct namedobj_instance *ni;
struct table_config *tc, *tc_new, *tmp;
struct table_algo *ta;
uint16_t kidx;
uint32_t kidx;
ni = CHAIN_TO_NI(ch);
@ -1958,7 +1901,7 @@ ipfw_get_table_objhash(struct ip_fw_chain *ch)
* Returns 0 on success.
*/
int
ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint32_t kidx,
struct sockopt_data *sd)
{
struct namedobj_instance *ni;
@ -1991,7 +1934,6 @@ struct dump_args {
uint16_t uidx;
int error;
uint32_t size;
ipfw_table_entry *ent;
ta_foreach_f *f;
void *farg;
ipfw_obj_tentry tent;
@ -2199,202 +2141,6 @@ dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
return (da.error);
}
/*
* Dumps all table data
* Data layout (version 0)(legacy):
* Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
* Reply: [ ipfw_xtable ipfw_table_xentry x N ]
*
* Returns 0 on success
*/
static int
dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_xtable *xtbl;
struct tid_info ti;
struct table_config *tc;
struct table_algo *ta;
struct dump_args da;
size_t sz, count;
xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
if (xtbl == NULL)
return (EINVAL);
memset(&ti, 0, sizeof(ti));
ti.uidx = xtbl->tbl;
IPFW_UH_RLOCK(ch);
if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
IPFW_UH_RUNLOCK(ch);
return (0);
}
count = table_get_count(ch, tc);
sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
xtbl->cnt = count;
xtbl->size = sz;
xtbl->type = tc->no.subtype;
xtbl->tbl = ti.uidx;
if (sd->valsize < sz) {
/*
* Submitted buffer size is not enough.
* WE've already filled in @i structure with
* relevant table info including size, so we
* can return. Buffer will be flushed automatically.
*/
IPFW_UH_RUNLOCK(ch);
return (ENOMEM);
}
/* Do the actual dump in eXtended format */
memset(&da, 0, sizeof(da));
da.ch = ch;
da.ti = KIDX_TO_TI(ch, tc->no.kidx);
da.tc = tc;
da.sd = sd;
ta = tc->ta;
ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
IPFW_UH_RUNLOCK(ch);
return (0);
}
/*
* Legacy function to retrieve number of items in table.
*/
static int
get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
uint32_t *tbl;
struct tid_info ti;
size_t sz;
int error;
sz = sizeof(*op3) + sizeof(uint32_t);
op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
if (op3 == NULL)
return (EINVAL);
tbl = (uint32_t *)(op3 + 1);
memset(&ti, 0, sizeof(ti));
ti.uidx = *tbl;
IPFW_UH_RLOCK(ch);
error = ipfw_count_xtable(ch, &ti, tbl);
IPFW_UH_RUNLOCK(ch);
return (error);
}
/*
* Legacy IP_FW_TABLE_GETSIZE handler
*/
int
ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
{
struct table_config *tc;
if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
return (ESRCH);
*cnt = table_get_count(ch, tc);
return (0);
}
/*
* Legacy IP_FW_TABLE_XGETSIZE handler
*/
int
ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
{
struct table_config *tc;
uint32_t count;
if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
*cnt = 0;
return (0); /* 'table all list' requires success */
}
count = table_get_count(ch, tc);
*cnt = count * sizeof(ipfw_table_xentry);
if (count > 0)
*cnt += sizeof(ipfw_xtable);
return (0);
}
static int
dump_table_entry(void *e, void *arg)
{
struct dump_args *da;
struct table_config *tc;
struct table_algo *ta;
ipfw_table_entry *ent;
struct table_value *pval;
int error;
da = (struct dump_args *)arg;
tc = da->tc;
ta = tc->ta;
/* Out of memory, returning */
if (da->cnt == da->size)
return (1);
ent = da->ent++;
ent->tbl = da->uidx;
da->cnt++;
error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
if (error != 0)
return (error);
ent->addr = da->tent.k.addr.s_addr;
ent->masklen = da->tent.masklen;
pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
ent->value = ipfw_export_table_value_legacy(pval);
return (0);
}
/*
* Dumps table in pre-8.1 legacy format.
*/
int
ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
ipfw_table *tbl)
{
struct table_config *tc;
struct table_algo *ta;
struct dump_args da;
tbl->cnt = 0;
if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
return (0); /* XXX: We should return ESRCH */
ta = tc->ta;
/* This dump format supports IPv4 only */
if (tc->no.subtype != IPFW_TABLE_ADDR)
return (0);
memset(&da, 0, sizeof(da));
da.ch = ch;
da.ti = KIDX_TO_TI(ch, tc->no.kidx);
da.tc = tc;
da.ent = &tbl->ent[0];
da.size = tbl->size;
tbl->cnt = 0;
ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
tbl->cnt = da.cnt;
return (0);
}
/*
* Dumps table entry in eXtended format (v1)(current).
*/
@ -2432,52 +2178,6 @@ dump_table_tentry(void *e, void *arg)
return (0);
}
/*
* Dumps table entry in eXtended format (v0).
*/
static int
dump_table_xentry(void *e, void *arg)
{
struct dump_args *da;
struct table_config *tc;
struct table_algo *ta;
ipfw_table_xentry *xent;
ipfw_obj_tentry *tent;
struct table_value *pval;
int error;
da = (struct dump_args *)arg;
tc = da->tc;
ta = tc->ta;
xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
/* Out of memory, returning */
if (xent == NULL)
return (1);
xent->len = sizeof(ipfw_table_xentry);
xent->tbl = da->uidx;
memset(&da->tent, 0, sizeof(da->tent));
tent = &da->tent;
error = ta->dump_tentry(tc->astate, da->ti, e, tent);
if (error != 0)
return (error);
/* Convert current format to previous one */
xent->masklen = tent->masklen;
pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
xent->value = ipfw_export_table_value_legacy(pval);
/* Apply some hacks */
if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
xent->flags = IPFW_TCF_INET;
} else
memcpy(&xent->k, &tent->k, sizeof(xent->k));
return (0);
}
/*
* Helper function to export table algo data
* to tentry format before calling user function.
@ -2510,7 +2210,7 @@ prepare_table_tentry(void *e, void *arg)
* Allow external consumers to read table entries in standard format.
*/
int
ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint32_t kidx,
ta_foreach_f *f, void *arg)
{
struct namedobj_instance *ni;
@ -2540,7 +2240,7 @@ ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
/*
* Table algorithms
*/
*/
/*
* Finds algorithm by index, table type or supplied name.
@ -2720,90 +2420,78 @@ list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
static int
classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
classify_srcdst(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
/* Basic IPv4/IPv6 or u32 lookups */
*puidx = cmd->arg1;
/* Assume ADDR by default */
*ptype = IPFW_TABLE_ADDR;
int v;
if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
/*
* generic lookup. The key must be
* in 32bit big-endian format.
*/
v = ((ipfw_insn_u32 *)cmd)->d[1];
switch (v) {
case LOOKUP_DST_IP:
case LOOKUP_SRC_IP:
break;
case LOOKUP_DST_PORT:
case LOOKUP_SRC_PORT:
case LOOKUP_UID:
case LOOKUP_JAIL:
case LOOKUP_DSCP:
case LOOKUP_MARK:
*ptype = IPFW_TABLE_NUMBER;
break;
case LOOKUP_DST_MAC:
case LOOKUP_SRC_MAC:
*ptype = IPFW_TABLE_MAC;
break;
}
}
ipfw_insn_table *cmd;
/* Basic IPv4/IPv6 or u32 lookups */
cmd = insntod(cmd0, table);
*puidx = cmd->kidx;
switch(cmd0->arg1) {
case LOOKUP_DST_IP:
case LOOKUP_SRC_IP:
default:
/* IPv4 src/dst */
*ptype = IPFW_TABLE_ADDR;
break;
case LOOKUP_DST_PORT:
case LOOKUP_SRC_PORT:
case LOOKUP_UID:
case LOOKUP_JAIL:
case LOOKUP_DSCP:
case LOOKUP_MARK:
case LOOKUP_RULENUM:
*ptype = IPFW_TABLE_NUMBER;
break;
case LOOKUP_DST_MAC:
case LOOKUP_SRC_MAC:
*ptype = IPFW_TABLE_MAC;
break;
}
return (0);
}
static int
classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
classify_via(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
ipfw_insn_if *cmdif;
/* Interface table, possibly */
cmdif = (ipfw_insn_if *)cmd;
cmdif = insntod(cmd0, if);
if (cmdif->name[0] != '\1')
return (1);
*ptype = IPFW_TABLE_INTERFACE;
*puidx = cmdif->p.kidx;
*puidx = cmdif->p.kidx; /* XXXAE */
return (0);
}
static int
classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
classify_flow(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
*puidx = cmd->arg1;
*puidx = insntod(cmd0, table)->kidx;
*ptype = IPFW_TABLE_FLOW;
return (0);
}
static int
classify_mac_lookup(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
classify_mac_lookup(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
*puidx = cmd->arg1;
*puidx = insntod(cmd0, table)->kidx;
*ptype = IPFW_TABLE_MAC;
return (0);
}
static void
update_arg1(ipfw_insn *cmd, uint16_t idx)
update_kidx(ipfw_insn *cmd0, uint32_t idx)
{
cmd->arg1 = idx;
insntod(cmd0, table)->kidx = idx;
}
static void
update_via(ipfw_insn *cmd, uint16_t idx)
update_via(ipfw_insn *cmd0, uint32_t idx)
{
ipfw_insn_if *cmdif;
cmdif = (ipfw_insn_if *)cmd;
cmdif->p.kidx = idx;
insntod(cmd0, if)->p.kidx = idx;
}
static int
@ -2825,7 +2513,7 @@ table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
/* XXX: sets-sets! */
static struct named_object *
table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
table_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
{
struct namedobj_instance *ni;
struct table_config *tc;
@ -2833,13 +2521,13 @@ table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
IPFW_UH_WLOCK_ASSERT(ch);
ni = CHAIN_TO_NI(ch);
tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
KASSERT(tc != NULL, ("Table with index %d not found", idx));
KASSERT(tc != NULL, ("Table with index %u not found", idx));
return (&tc->no);
}
static int
table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
table_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
enum ipfw_sets_cmd cmd)
{
@ -2884,7 +2572,7 @@ table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
* so it should be called first.
*/
static int
table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
table_manage_sets_all(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
enum ipfw_sets_cmd cmd)
{
@ -2914,7 +2602,7 @@ static struct opcode_obj_rewrite opcodes[] = {
.opcode = O_IP_SRC_LOOKUP,
.etlv = IPFW_TLV_TBL_NAME,
.classifier = classify_srcdst,
.update = update_arg1,
.update = update_kidx,
.find_byname = table_findbyname,
.find_bykidx = table_findbykidx,
.create_object = create_table_compat,
@ -2924,7 +2612,7 @@ static struct opcode_obj_rewrite opcodes[] = {
.opcode = O_IP_DST_LOOKUP,
.etlv = IPFW_TLV_TBL_NAME,
.classifier = classify_srcdst,
.update = update_arg1,
.update = update_kidx,
.find_byname = table_findbyname,
.find_bykidx = table_findbykidx,
.create_object = create_table_compat,
@ -2934,7 +2622,7 @@ static struct opcode_obj_rewrite opcodes[] = {
.opcode = O_IP_FLOW_LOOKUP,
.etlv = IPFW_TLV_TBL_NAME,
.classifier = classify_flow,
.update = update_arg1,
.update = update_kidx,
.find_byname = table_findbyname,
.find_bykidx = table_findbykidx,
.create_object = create_table_compat,
@ -2944,7 +2632,7 @@ static struct opcode_obj_rewrite opcodes[] = {
.opcode = O_MAC_SRC_LOOKUP,
.etlv = IPFW_TLV_TBL_NAME,
.classifier = classify_mac_lookup,
.update = update_arg1,
.update = update_kidx,
.find_byname = table_findbyname,
.find_bykidx = table_findbykidx,
.create_object = create_table_compat,
@ -2954,7 +2642,7 @@ static struct opcode_obj_rewrite opcodes[] = {
.opcode = O_MAC_DST_LOOKUP,
.etlv = IPFW_TLV_TBL_NAME,
.classifier = classify_mac_lookup,
.update = update_arg1,
.update = update_kidx,
.find_byname = table_findbyname,
.find_bykidx = table_findbykidx,
.create_object = create_table_compat,
@ -3019,7 +2707,7 @@ ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
struct ip_fw *rule;
ipfw_insn *cmd;
int cmdlen, i, l;
uint16_t kidx;
uint32_t kidx;
uint8_t subtype;
IPFW_UH_WLOCK(ch);
@ -3277,22 +2965,18 @@ unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
}
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table },
{ IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 },
{ IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 },
{ IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table },
{ IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table },
{ IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables },
{ IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 },
{ IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 },
{ IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 },
{ IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 },
{ IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 },
{ IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 },
{ IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry },
{ IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table },
{ IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo },
{ IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size },
{ IP_FW_TABLE_XCREATE, IP_FW3_OPVER, HDIR_SET, create_table },
{ IP_FW_TABLE_XDESTROY, IP_FW3_OPVER, HDIR_SET, flush_table_v0 },
{ IP_FW_TABLE_XFLUSH, IP_FW3_OPVER, HDIR_SET, flush_table_v0 },
{ IP_FW_TABLE_XMODIFY, IP_FW3_OPVER, HDIR_BOTH, modify_table },
{ IP_FW_TABLE_XINFO, IP_FW3_OPVER, HDIR_GET, describe_table },
{ IP_FW_TABLES_XLIST, IP_FW3_OPVER, HDIR_GET, list_tables },
{ IP_FW_TABLE_XLIST, IP_FW3_OPVER, HDIR_GET, dump_table_v1 },
{ IP_FW_TABLE_XADD, IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
{ IP_FW_TABLE_XDEL, IP_FW3_OPVER, HDIR_BOTH, manage_table_ent_v1 },
{ IP_FW_TABLE_XFIND, IP_FW3_OPVER, HDIR_GET, find_table_entry },
{ IP_FW_TABLE_XSWAP, IP_FW3_OPVER, HDIR_SET, swap_table },
{ IP_FW_TABLES_ALIST, IP_FW3_OPVER, HDIR_GET, list_table_algo },
};
static int
@ -3348,7 +3032,8 @@ ipfw_init_tables(struct ip_fw_chain *ch, int first)
M_IPFW, M_WAITOK | M_ZERO);
tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
tcfg->namehash = ipfw_objhash_create(V_fw_tables_max,
DEFAULT_OBJHASH_SIZE);
ch->tblcfg = tcfg;
ipfw_table_value_init(ch, first);

View file

@ -156,10 +156,6 @@ int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
struct tentry_info *tei, uint8_t flags, uint32_t count);
int flush_table(struct ip_fw_chain *ch, struct tid_info *ti);
void ipfw_import_table_value_legacy(uint32_t value, struct table_value *v);
uint32_t ipfw_export_table_value_legacy(struct table_value *v);
int ipfw_get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd);
/* ipfw_table_value.c functions */
struct table_config;
@ -180,7 +176,7 @@ int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
struct rule_check_info *ci);
int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
uint32_t *bmask);
int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint32_t kidx,
struct sockopt_data *sd);
void ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule);
struct namedobj_instance *ipfw_get_table_objhash(struct ip_fw_chain *ch);
@ -190,7 +186,7 @@ int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
uint32_t new_set);
void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t old_set,
uint32_t new_set, int mv);
int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint32_t kidx,
ta_foreach_f f, void *arg);
/* internal functions */
@ -220,13 +216,5 @@ void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts);
void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts);
void rollback_toperation_state(struct ip_fw_chain *ch, void *object);
/* Legacy interfaces */
int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti,
uint32_t *cnt);
int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti,
uint32_t *cnt);
int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
ipfw_table *tbl);
#endif /* _KERNEL */
#endif /* _IPFW2_TABLE_H */

View file

@ -1,5 +1,7 @@
/*-
* Copyright (c) 2014 Yandex LLC
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2014-2025 Yandex LLC
* Copyright (c) 2014 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
@ -2104,7 +2106,7 @@ ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO);
icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE);
icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE, DEFAULT_OBJHASH_SIZE);
icfg->size = DEFAULT_IFIDX_SIZE;
icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW,
M_WAITOK | M_ZERO);
@ -3195,8 +3197,7 @@ ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
struct fhashentry *ent;
struct fhashentry4 *m4;
struct ipfw_flow_id *id;
uint32_t hsize;
uint16_t hash;
uint32_t hash, hsize;
id = (struct ipfw_flow_id *)key;
head = (struct fhashbhead *)ti->state;
@ -4018,9 +4019,9 @@ struct table_algo addr_kfib = {
struct mac_radix_entry {
struct radix_node rn[2];
struct sa_mac sa;
uint32_t value;
uint8_t masklen;
struct sa_mac sa;
};
struct mac_radix_cfg {

View file

@ -1,5 +1,7 @@
/*-
* Copyright (c) 2014 Yandex LLC
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2014-2025 Yandex LLC
* Copyright (c) 2014 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
@ -65,7 +67,7 @@ static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd);
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_TABLE_VLIST, 0, HDIR_GET, list_table_values },
{ IP_FW_TABLE_VLIST, IP_FW3_OPVER, HDIR_GET, list_table_values },
};
#define CHAIN_TO_VI(chain) (CHAIN_TO_TCFG(chain)->valhash)
@ -76,6 +78,7 @@ struct table_val_link
struct table_value *pval; /* Pointer to real table value */
};
#define VALDATA_START_SIZE 64 /* Allocate 64-items array by default */
#define VALDATA_HASH_SIZE 65536
struct vdump_args {
struct ip_fw_chain *ch;
@ -362,10 +365,10 @@ rollback_table_values(struct tableop_state *ts)
*/
static int
alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts,
struct namedobj_instance *vi, uint16_t *pvidx, uint8_t flags)
struct namedobj_instance *vi, uint32_t *pvidx, uint8_t flags)
{
int error, vlimit;
uint16_t vidx;
uint32_t vidx;
IPFW_UH_WLOCK_ASSERT(ch);
@ -474,8 +477,7 @@ ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts,
struct namedobj_instance *vi;
struct table_config *tc;
struct tentry_info *tei, *ptei;
uint32_t count, vlimit;
uint16_t vidx;
uint32_t count, vidx, vlimit;
struct table_val_link *ptv;
struct table_value tval, *pval;
@ -596,42 +598,6 @@ ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts,
return (0);
}
/*
* Compatibility function used to import data from old
* IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes.
*/
void
ipfw_import_table_value_legacy(uint32_t value, struct table_value *v)
{
memset(v, 0, sizeof(*v));
v->tag = value;
v->pipe = value;
v->divert = value;
v->skipto = value;
v->netgraph = value;
v->fib = value;
v->nat = value;
v->nh4 = value; /* host format */
v->dscp = value;
v->limit = value;
v->mark = value;
}
/*
* Export data to legacy table dumps opcodes.
*/
uint32_t
ipfw_export_table_value_legacy(struct table_value *v)
{
/*
* TODO: provide more compatibility depending on
* vmask value.
*/
return (v->tag);
}
/*
* Imports table value from current userland format.
* Saves value in kernel format to the same place.
@ -776,7 +742,7 @@ ipfw_table_value_init(struct ip_fw_chain *ch, int first)
tcfg = ch->tblcfg;
tcfg->val_size = VALDATA_START_SIZE;
tcfg->valhash = ipfw_objhash_create(tcfg->val_size);
tcfg->valhash = ipfw_objhash_create(tcfg->val_size, VALDATA_HASH_SIZE);
ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,
cmp_table_value);

View file

@ -55,4 +55,52 @@ void nat64lsn_uninit(struct ip_fw_chain *ch, int last);
int nat64clat_init(struct ip_fw_chain *ch, int first);
void nat64clat_uninit(struct ip_fw_chain *ch, int last);
#define NAT64_DEFINE_OPCODE_REWRITER(mod, name, ops) \
static int \
mod ## _classify(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype) \
{ \
ipfw_insn *icmd; \
icmd = cmd0 - F_LEN(cmd0); \
if (icmd->opcode != O_EXTERNAL_ACTION || \
insntod(icmd, kidx)->kidx != V_ ## mod ## _eid) \
return (1); \
*puidx = insntod(cmd0, kidx)->kidx; \
*ptype = 0; \
return (0); \
} \
static void \
mod ## _update_kidx(ipfw_insn *cmd0, uint32_t idx) \
{ \
insntod(cmd0, kidx)->kidx = idx; \
} \
static int \
mod ## _findbyname(struct ip_fw_chain *ch, struct tid_info *ti, \
struct named_object **pno) \
{ \
return (ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, \
IPFW_TLV_## name ## _NAME, pno)); \
} \
static struct named_object * \
mod ## _findbykidx(struct ip_fw_chain *ch, uint32_t idx) \
{ \
struct namedobj_instance *ni; \
struct named_object *no; \
IPFW_UH_WLOCK_ASSERT(ch); \
ni = CHAIN_TO_SRV(ch); \
no = ipfw_objhash_lookup_kidx(ni, idx); \
KASSERT(no != NULL, ("NAT with index %u not found", idx)); \
return (no); \
} \
static struct opcode_obj_rewrite ops[] = { \
{ \
.opcode = O_EXTERNAL_INSTANCE, \
.etlv = IPFW_TLV_EACTION /* just show it isn't table */,\
.classifier = mod ## _classify, \
.update = mod ## _update_kidx, \
.find_byname = mod ## _findbyname, \
.find_bykidx = mod ## _findbykidx, \
.manage_sets = mod ## _manage_sets, \
}, \
}
#endif /* _IP_FW_NAT64_H_ */

View file

@ -59,7 +59,7 @@
#include "nat64clat.h"
#define NAT64_LOOKUP(chain, cmd) \
(struct nat64clat_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
(struct nat64clat_cfg *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
static void
nat64clat_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
@ -68,7 +68,7 @@ nat64clat_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
static uint32_t pktid = 0;
memset(plog, 0, sizeof(*plog));
plog->length = PFLOG_HDRLEN;
plog->length = PFLOG_REAL_HDRLEN;
plog->af = family;
plog->action = PF_NAT;
plog->dir = PF_IN;
@ -210,9 +210,9 @@ ipfw_nat64clat(struct ip_fw_chain *chain, struct ip_fw_args *args,
IPFW_RLOCK_ASSERT(chain);
*done = 0; /* try next rule if not matched */
icmd = cmd + 1;
icmd = cmd + F_LEN(cmd);
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nat64clat_eid ||
insntod(cmd, kidx)->kidx != V_nat64clat_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
return (0);

View file

@ -42,7 +42,7 @@ struct nat64clat_cfg {
char name[64];
};
VNET_DECLARE(uint16_t, nat64clat_eid);
VNET_DECLARE(uint32_t, nat64clat_eid);
#define V_nat64clat_eid VNET(nat64clat_eid)
#define IPFW_TLV_NAT64CLAT_NAME IPFW_TLV_EACTION_NAME(V_nat64clat_eid)

View file

@ -59,7 +59,7 @@
#include "nat64clat.h"
VNET_DEFINE(uint16_t, nat64clat_eid) = 0;
VNET_DEFINE(uint32_t, nat64clat_eid) = 0;
static struct nat64clat_cfg *nat64clat_alloc_config(const char *name,
uint8_t set);
@ -484,81 +484,23 @@ nat64clat_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
}
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64CLAT_CREATE, 0, HDIR_SET, nat64clat_create },
{ IP_FW_NAT64CLAT_DESTROY,0, HDIR_SET, nat64clat_destroy },
{ IP_FW_NAT64CLAT_CONFIG, 0, HDIR_BOTH, nat64clat_config },
{ IP_FW_NAT64CLAT_LIST, 0, HDIR_GET, nat64clat_list },
{ IP_FW_NAT64CLAT_STATS, 0, HDIR_GET, nat64clat_stats },
{ IP_FW_NAT64CLAT_RESET_STATS,0, HDIR_SET, nat64clat_reset_stats },
{ IP_FW_NAT64CLAT_CREATE, IP_FW3_OPVER, HDIR_SET, nat64clat_create },
{ IP_FW_NAT64CLAT_DESTROY, IP_FW3_OPVER, HDIR_SET, nat64clat_destroy },
{ IP_FW_NAT64CLAT_CONFIG, IP_FW3_OPVER, HDIR_BOTH, nat64clat_config },
{ IP_FW_NAT64CLAT_LIST, IP_FW3_OPVER, HDIR_GET, nat64clat_list },
{ IP_FW_NAT64CLAT_STATS, IP_FW3_OPVER, HDIR_GET, nat64clat_stats },
{ IP_FW_NAT64CLAT_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nat64clat_reset_stats },
};
static int
nat64clat_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
{
ipfw_insn *icmd;
icmd = cmd - 1;
if (icmd->opcode != O_EXTERNAL_ACTION ||
icmd->arg1 != V_nat64clat_eid)
return (1);
*puidx = cmd->arg1;
*ptype = 0;
return (0);
}
static void
nat64clat_update_arg1(ipfw_insn *cmd, uint16_t idx)
{
cmd->arg1 = idx;
}
static int
nat64clat_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
struct named_object **pno)
{
int err;
err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
IPFW_TLV_NAT64CLAT_NAME, pno);
return (err);
}
static struct named_object *
nat64clat_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
{
struct namedobj_instance *ni;
struct named_object *no;
IPFW_UH_WLOCK_ASSERT(ch);
ni = CHAIN_TO_SRV(ch);
no = ipfw_objhash_lookup_kidx(ni, idx);
KASSERT(no != NULL, ("NAT with index %d not found", idx));
return (no);
}
static int
nat64clat_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
nat64clat_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
enum ipfw_sets_cmd cmd)
{
return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64CLAT_NAME,
set, new_set, cmd));
return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch),
IPFW_TLV_NAT64CLAT_NAME, set, new_set, cmd));
}
static struct opcode_obj_rewrite opcodes[] = {
{
.opcode = O_EXTERNAL_INSTANCE,
.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
.classifier = nat64clat_classify,
.update = nat64clat_update_arg1,
.find_byname = nat64clat_findbyname,
.find_bykidx = nat64clat_findbykidx,
.manage_sets = nat64clat_manage_sets,
},
};
NAT64_DEFINE_OPCODE_REWRITER(nat64clat, NAT64CLAT, opcodes);
static int
destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,

View file

@ -1,9 +1,9 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2015-2019 Yandex LLC
* Copyright (c) 2015-2020 Yandex LLC
* Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
* Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
* Copyright (c) 2016-2020 Andrey V. Elsukov <ae@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -84,7 +84,7 @@ static uma_zone_t nat64lsn_job_zone;
static void nat64lsn_periodic(void *data);
#define PERIODIC_DELAY 4
#define NAT64_LOOKUP(chain, cmd) \
(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
(struct nat64lsn_instance *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
/*
* Delayed job queue, used to create new hosts
* and new portgroups
@ -178,7 +178,7 @@ nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
{
memset(plog, 0, sizeof(*plog));
plog->length = PFLOG_HDRLEN;
plog->length = PFLOG_REAL_HDRLEN;
plog->af = family;
plog->action = PF_NAT;
plog->dir = PF_IN;
@ -212,6 +212,21 @@ nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
return (CK_SLIST_FIRST(&host->aliases));
}
static struct nat64lsn_alias*
nat64lsn_get_alias(struct nat64lsn_cfg *cfg,
const struct ipfw_flow_id *f_id __unused)
{
static uint32_t idx = 0;
/*
* We can choose alias by number of allocated PGs,
* not used yet by other hosts, or some static configured
* by user.
* XXX: for now we choose it using round robin.
*/
return (&ALIAS_BYHASH(cfg, idx++));
}
#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
#define STATE_HASH(h, v) \
((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
@ -255,53 +270,47 @@ freemask_ffsll(uint32_t *freemask)
((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
#endif /* !__LP64__ */
#define NAT64LSN_TRY_PGCNT 32
#define NAT64LSN_TRY_PGCNT 36
static struct nat64lsn_pg*
nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
uint32_t *pgidx, in_addr_t faddr)
struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, in_addr_t faddr)
{
struct nat64lsn_pg *pg, *oldpg;
struct nat64lsn_pg *pg;
uint32_t idx, oldidx;
int cnt;
cnt = 0;
/* First try last used PG */
oldpg = pg = ck_pr_load_ptr(pgptr);
/* First try last used PG. */
idx = oldidx = ck_pr_load_32(pgidx);
/* If pgidx is out of range, reset it to the first pgchunk */
if (!ISSET32(*chunkmask, idx / 32))
idx = 0;
MPASS(idx < 1024);
cnt = 0;
do {
ck_pr_fence_load();
if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
/*
* If last used PG has not free states,
* try to update pointer.
* NOTE: it can be already updated by jobs handler,
* thus we use CAS operation.
*/
if (idx > 1023 || !ISSET32(*chunkmask, idx / 32)) {
/* If it is first try, reset idx to first PG */
idx = 0;
/* Stop if idx is out of range */
if (cnt > 0)
ck_pr_cas_ptr(pgptr, oldpg, pg);
return (pg);
break;
}
/* Stop if idx is out of range */
if (!ISSET32(*chunkmask, idx / 32))
break;
if (ISSET32(pgmask[idx / 32], idx % 32))
if (ISSET32(pgmask[idx / 32], idx % 32)) {
pg = ck_pr_load_ptr(
&chunks[idx / 32]->pgptr[idx % 32]);
else
pg = NULL;
ck_pr_fence_load();
/*
* Make sure that pg did not become DEAD.
*/
if ((pg->flags & NAT64LSN_DEADPG) == 0 &&
FREEMASK_BITCOUNT(pg, faddr) > 0) {
if (cnt > 0)
ck_pr_cas_32(pgidx, oldidx, idx);
return (pg);
}
}
idx++;
} while (++cnt < NAT64LSN_TRY_PGCNT);
/* If pgidx is out of range, reset it to the first pgchunk */
if (!ISSET32(*chunkmask, idx / 32))
idx = 0;
ck_pr_cas_32(pgidx, oldidx, idx);
if (oldidx != idx)
ck_pr_cas_32(pgidx, oldidx, idx);
return (NULL);
}
@ -330,27 +339,24 @@ nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
switch (proto) {
case IPPROTO_TCP:
pg = nat64lsn_get_pg(
&link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
link->alias->tcp, &link->alias->tcp_pg,
pg = nat64lsn_get_pg(&link->alias->tcp_chunkmask,
link->alias->tcp_pgmask, link->alias->tcp,
&link->alias->tcp_pgidx, faddr);
break;
case IPPROTO_UDP:
pg = nat64lsn_get_pg(
&link->alias->udp_chunkmask, link->alias->udp_pgmask,
link->alias->udp, &link->alias->udp_pg,
pg = nat64lsn_get_pg(&link->alias->udp_chunkmask,
link->alias->udp_pgmask, link->alias->udp,
&link->alias->udp_pgidx, faddr);
break;
case IPPROTO_ICMP:
pg = nat64lsn_get_pg(
&link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
link->alias->icmp, &link->alias->icmp_pg,
pg = nat64lsn_get_pg(&link->alias->icmp_chunkmask,
link->alias->icmp_pgmask, link->alias->icmp,
&link->alias->icmp_pgidx, faddr);
break;
default:
panic("%s: wrong proto %d", __func__, proto);
}
if (pg == NULL)
if (pg == NULL || (pg->flags & NAT64LSN_DEADPG) != 0)
return (NULL);
/* Check that PG has some free states */
@ -385,14 +391,10 @@ nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
/* Insert new state into host's hash table */
HOST_LOCK(host);
SET_AGE(host->timestamp);
CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
state, entries);
host->states_count++;
/*
* XXX: In case if host is going to be expired,
* reset NAT64LSN_DEADHOST flag.
*/
host->flags &= ~NAT64LSN_DEADHOST;
HOST_UNLOCK(host);
NAT64STAT_INC(&cfg->base.stats, screated);
/* Mark the state as ready for translate4 */
@ -563,7 +565,7 @@ nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
len = ip->ip_hl << 2;
switch (ip->ip_p) {
case IPPROTO_ICMP:
len += ICMP_MINLEN; /* Enough to get icmp_id */
len += ICMP_MINLEN;
break;
case IPPROTO_TCP:
len += sizeof(struct tcphdr);
@ -740,6 +742,32 @@ nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
return (0);
}
#define PGCOUNT_ADD(alias, proto, value) \
switch (proto) { \
case IPPROTO_TCP: (alias)->tcp_pgcount += (value); break; \
case IPPROTO_UDP: (alias)->udp_pgcount += (value); break; \
case IPPROTO_ICMP: (alias)->icmp_pgcount += (value); break; \
}
#define PGCOUNT_INC(alias, proto) PGCOUNT_ADD(alias, proto, 1)
#define PGCOUNT_DEC(alias, proto) PGCOUNT_ADD(alias, proto, -1)
static inline void
nat64lsn_state_cleanup(struct nat64lsn_state *state)
{
/*
* Reset READY flag and wait until it become
* safe for translate4.
*/
ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
/*
* And set STALE flag for deferred deletion in the
* next pass of nat64lsn_maintain_pg().
*/
ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
ck_pr_fence_store();
}
static int
nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
{
@ -781,19 +809,12 @@ nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
HOST_LOCK(host);
CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
state, nat64lsn_state, entries);
/*
* Now translate6 will not use this state.
*/
host->states_count--;
HOST_UNLOCK(host);
/* Reset READY flag */
ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
/* And set STALE flag */
ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
ck_pr_fence_store();
/*
* Now translate6 will not use this state, wait
* until it become safe for translate4, then mark
* state as free.
*/
nat64lsn_state_cleanup(state);
}
}
@ -814,7 +835,7 @@ nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
struct nat64lsn_pg_slist *portgroups)
{
struct nat64lsn_alias *alias;
struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
struct nat64lsn_pg *pg, *tpg;
uint32_t *pgmask, *pgidx;
int i, idx;
@ -827,45 +848,47 @@ nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
if (pg->base_port == NAT64_MIN_PORT)
continue;
/*
* PG is expired, unlink it and schedule for
* deferred destroying.
* PG expires in two passes:
* 1. Reset bit in pgmask, mark it as DEAD.
* 2. Unlink it and schedule for deferred destroying.
*/
idx = (pg->base_port - NAT64_MIN_PORT) / 64;
switch (pg->proto) {
case IPPROTO_TCP:
pgmask = alias->tcp_pgmask;
pgptr = &alias->tcp_pg;
pgidx = &alias->tcp_pgidx;
firstpg = alias->tcp[0]->pgptr[0];
break;
case IPPROTO_UDP:
pgmask = alias->udp_pgmask;
pgptr = &alias->udp_pg;
pgidx = &alias->udp_pgidx;
firstpg = alias->udp[0]->pgptr[0];
break;
case IPPROTO_ICMP:
pgmask = alias->icmp_pgmask;
pgptr = &alias->icmp_pg;
pgidx = &alias->icmp_pgidx;
firstpg = alias->icmp[0]->pgptr[0];
break;
}
if (pg->flags & NAT64LSN_DEADPG) {
/* Unlink PG from alias's chain */
ALIAS_LOCK(alias);
CK_SLIST_REMOVE(&alias->portgroups, pg,
nat64lsn_pg, entries);
PGCOUNT_DEC(alias, pg->proto);
ALIAS_UNLOCK(alias);
/*
* Link it to job's chain for deferred
* destroying.
*/
NAT64STAT_INC(&cfg->base.stats, spgdeleted);
CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
continue;
}
/* Reset the corresponding bit in pgmask array. */
ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
pg->flags |= NAT64LSN_DEADPG;
ck_pr_fence_store();
/* If last used PG points to this PG, reset it. */
ck_pr_cas_ptr(pgptr, pg, firstpg);
ck_pr_cas_32(pgidx, idx, 0);
/* Unlink PG from alias's chain */
ALIAS_LOCK(alias);
CK_SLIST_REMOVE(&alias->portgroups, pg,
nat64lsn_pg, entries);
alias->portgroups_count--;
ALIAS_UNLOCK(alias);
/* And link to job's chain for deferred destroying */
NAT64STAT_INC(&cfg->base.stats, spgdeleted);
CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
}
}
}
@ -882,7 +905,9 @@ nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
entries, tmp) {
/* Is host was marked in previous call? */
if (host->flags & NAT64LSN_DEADHOST) {
if (host->states_count > 0) {
if (host->states_count > 0 ||
GET_AGE(host->timestamp) <
cfg->host_delete_delay) {
host->flags &= ~NAT64LSN_DEADHOST;
continue;
}
@ -898,9 +923,8 @@ nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
CK_SLIST_INSERT_HEAD(hosts, host, entries);
continue;
}
if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
continue;
if (host->states_count > 0)
if (host->states_count > 0 ||
GET_AGE(host->timestamp) < cfg->host_delete_delay)
continue;
/* Mark host as going to be expired in next pass */
host->flags |= NAT64LSN_DEADHOST;
@ -966,7 +990,7 @@ nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
#endif
/*
* This procedure is used to perform various maintenance
* This procedure is used to perform various maintance
* on dynamic hash list. Currently it is called every 4 seconds.
*/
static void
@ -1044,14 +1068,11 @@ nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
host->hval = ji->src6_hval;
host->flags = 0;
host->states_count = 0;
host->states_hashsize = NAT64LSN_HSIZE;
CK_SLIST_INIT(&host->aliases);
for (i = 0; i < host->states_hashsize; i++)
CK_SLIST_INIT(&host->states_hash[i]);
/* Determine alias from flow hash. */
hval = ALIASLINK_HVAL(cfg, &ji->f_id);
link->alias = &ALIAS_BYHASH(cfg, hval);
link->alias = nat64lsn_get_alias(cfg, &ji->f_id);
CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
ALIAS_LOCK(link->alias);
@ -1103,9 +1124,8 @@ nat64lsn_find_pg_place(uint32_t *data)
static int
nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
struct nat64lsn_alias *alias, uint32_t *chunkmask,
uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
struct nat64lsn_pg **pgptr, uint8_t proto)
struct nat64lsn_alias *alias, uint32_t *chunkmask, uint32_t *pgmask,
struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, uint8_t proto)
{
struct nat64lsn_pg *pg;
int i, pg_idx, chunk_idx;
@ -1163,17 +1183,20 @@ nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
/* Initialize PG and hook it to pgchunk */
SET_AGE(pg->timestamp);
pg->flags = 0;
pg->proto = proto;
pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
ck_pr_fence_store();
ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
ck_pr_store_ptr(pgptr, pg);
/* Set bit in pgmask and set index of last used PG */
ck_pr_bts_32(&pgmask[chunk_idx], pg_idx % 32);
ck_pr_store_32(pgidx, pg_idx);
ALIAS_LOCK(alias);
CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
SET_AGE(alias->timestamp);
alias->portgroups_count++;
PGCOUNT_INC(alias, proto);
ALIAS_UNLOCK(alias);
NAT64STAT_INC(&cfg->base.stats, spgcreated);
return (PG_ERROR(0));
@ -1210,17 +1233,17 @@ nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
case IPPROTO_TCP:
ret = nat64lsn_alloc_proto_pg(cfg, alias,
&alias->tcp_chunkmask, alias->tcp_pgmask,
alias->tcp, &alias->tcp_pg, ji->proto);
alias->tcp, &alias->tcp_pgidx, ji->proto);
break;
case IPPROTO_UDP:
ret = nat64lsn_alloc_proto_pg(cfg, alias,
&alias->udp_chunkmask, alias->udp_pgmask,
alias->udp, &alias->udp_pg, ji->proto);
alias->udp, &alias->udp_pgidx, ji->proto);
break;
case IPPROTO_ICMP:
ret = nat64lsn_alloc_proto_pg(cfg, alias,
&alias->icmp_chunkmask, alias->icmp_pgmask,
alias->icmp, &alias->icmp_pg, ji->proto);
alias->icmp, &alias->icmp_pgidx, ji->proto);
break;
default:
panic("%s: wrong proto %d", __func__, ji->proto);
@ -1362,14 +1385,115 @@ nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
JQUEUE_UNLOCK();
}
/*
* This function is used to clean up the result of less likely possible
* race condition, when host object was deleted, but some translation
* state was created before it is destroyed.
*
* Since the state expiration removes state from host's hash table,
* we need to be sure, that there will not any states, that are linked
* with this host entry.
*/
static void
nat64lsn_host_cleanup(struct nat64lsn_host *host)
{
struct nat64lsn_state *state, *ts;
int i;
printf("NAT64LSN: %s: race condition has been detected for host %p\n",
__func__, host);
for (i = 0; i < host->states_hashsize; i++) {
CK_SLIST_FOREACH_SAFE(state, &host->states_hash[i],
entries, ts) {
/*
* We can remove the state without lock,
* because this host entry is unlinked and will
* be destroyed.
*/
CK_SLIST_REMOVE(&host->states_hash[i], state,
nat64lsn_state, entries);
host->states_count--;
nat64lsn_state_cleanup(state);
}
}
MPASS(host->states_count == 0);
}
/*
* This function is used to clean up the result of less likely possible
* race condition, when portgroup was deleted, but some translation state
* was created before it is destroyed.
*
* Since states entries are accessible via host's hash table, we need
* to be sure, that there will not any states from this PG, that are
* linked with any host entries.
*/
static void
nat64lsn_pg_cleanup(struct nat64lsn_pg *pg)
{
struct nat64lsn_state *state;
uint64_t usedmask;
int c, i;
printf("NAT64LSN: %s: race condition has been detected for pg %p\n",
__func__, pg);
for (c = 0; c < pg->chunks_count; c++) {
/*
* Use inverted freemask to find what state was created.
*/
usedmask = ~(*FREEMASK_CHUNK(pg, c));
if (usedmask == 0)
continue;
for (i = 0; i < 64; i++) {
if (!ISSET64(usedmask, i))
continue;
state = &STATES_CHUNK(pg, c)->state[i];
/*
* If we have STALE bit, this means that state
* is already unlinked from host's hash table.
* Thus we can just reset the bit in mask and
* schedule destroying in the next epoch call.
*/
if (ISSET32(state->flags, NAT64_BIT_STALE)) {
FREEMASK_BTS(pg, c, i);
continue;
}
/*
* There is small window, when we have bit
* grabbed from freemask, but state is not yet
* linked into host's hash table.
* Check for READY flag, it is set just after
* linking. If it is not set, defer cleanup
* for next call.
*/
if (ISSET32(state->flags, NAT64_BIT_READY_IPV4)) {
struct nat64lsn_host *host;
host = state->host;
HOST_LOCK(host);
CK_SLIST_REMOVE(&STATE_HASH(host,
state->hval), state, nat64lsn_state,
entries);
host->states_count--;
HOST_UNLOCK(host);
nat64lsn_state_cleanup(state);
}
}
}
}
static void
nat64lsn_job_destroy(epoch_context_t ctx)
{
struct nat64lsn_hosts_slist hosts;
struct nat64lsn_pg_slist portgroups;
struct nat64lsn_job_item *ji;
struct nat64lsn_host *host;
struct nat64lsn_pg *pg;
int i;
CK_SLIST_INIT(&hosts);
CK_SLIST_INIT(&portgroups);
ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
MPASS(ji->jtype == JTYPE_DESTROY);
while (!CK_SLIST_EMPTY(&ji->hosts)) {
@ -1377,11 +1501,23 @@ nat64lsn_job_destroy(epoch_context_t ctx)
CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
if (host->states_count > 0) {
/*
* XXX: The state has been created
* during host deletion.
* The state has been created during host deletion.
*/
printf("NAT64LSN: %s: destroying host with %d "
"states\n", __func__, host->states_count);
/*
* We need to cleanup these states to avoid
* possible access to already deleted host in
* the state expiration code.
*/
nat64lsn_host_cleanup(host);
CK_SLIST_INSERT_HEAD(&hosts, host, entries);
/*
* Keep host entry for next deferred destroying.
* In the next epoch its states will be not
* accessible.
*/
continue;
}
nat64lsn_destroy_host(host);
}
@ -1391,18 +1527,33 @@ nat64lsn_job_destroy(epoch_context_t ctx)
for (i = 0; i < pg->chunks_count; i++) {
if (FREEMASK_BITCOUNT(pg, i) != 64) {
/*
* XXX: The state has been created during
* A state has been created during
* PG deletion.
*/
printf("NAT64LSN: %s: destroying PG %p "
"with non-empty chunk %d\n", __func__,
pg, i);
nat64lsn_pg_cleanup(pg);
CK_SLIST_INSERT_HEAD(&portgroups,
pg, entries);
i = -1;
break;
}
}
nat64lsn_destroy_pg(pg);
if (i != -1)
nat64lsn_destroy_pg(pg);
}
uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
uma_zfree(nat64lsn_job_zone, ji);
if (CK_SLIST_EMPTY(&hosts) &&
CK_SLIST_EMPTY(&portgroups)) {
uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
uma_zfree(nat64lsn_job_zone, ji);
return;
}
/* Schedule job item again */
CK_SLIST_MOVE(&ji->hosts, &hosts, entries);
CK_SLIST_MOVE(&ji->portgroups, &portgroups, entries);
NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, nat64lsn_job_destroy);
}
static int
@ -1569,40 +1720,40 @@ int
ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done)
{
struct nat64lsn_cfg *cfg;
struct nat64lsn_instance *i;
ipfw_insn *icmd;
int ret;
IPFW_RLOCK_ASSERT(ch);
*done = 0; /* continue the search in case of failure */
icmd = cmd + 1;
icmd = cmd + F_LEN(cmd);
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nat64lsn_eid ||
insntod(cmd, kidx)->kidx != V_nat64lsn_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
(i = NAT64_LOOKUP(ch, icmd)) == NULL)
return (IP_FW_DENY);
*done = 1; /* terminate the search */
switch (args->f_id.addr_type) {
case 4:
ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
ret = nat64lsn_translate4(i->cfg, &args->f_id, &args->m);
break;
case 6:
/*
* Check that destination IPv6 address matches our prefix6.
*/
if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
cfg->base.plat_plen / 8) != 0) {
ret = cfg->nomatch_verdict;
if ((i->cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
memcmp(&args->f_id.dst_ip6, &i->cfg->base.plat_prefix,
i->cfg->base.plat_plen / 8) != 0) {
ret = i->cfg->nomatch_verdict;
break;
}
ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
ret = nat64lsn_translate6(i->cfg, &args->f_id, &args->m);
break;
default:
ret = cfg->nomatch_verdict;
ret = i->cfg->nomatch_verdict;
}
if (ret != IP_FW_PASS && args->m != NULL) {
@ -1674,7 +1825,7 @@ nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
}
struct nat64lsn_cfg *
nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
nat64lsn_init_config(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
{
struct nat64lsn_cfg *cfg;
struct nat64lsn_alias *alias;
@ -1777,7 +1928,7 @@ nat64lsn_destroy_host(struct nat64lsn_host *host)
}
void
nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
nat64lsn_destroy_config(struct nat64lsn_cfg *cfg)
{
struct nat64lsn_host *host;
int i;
@ -1805,3 +1956,4 @@ nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
free(cfg->aliases, M_NAT64LSN);
free(cfg, M_NAT64LSN);
}

View file

@ -1,9 +1,9 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2015-2019 Yandex LLC
* Copyright (c) 2015-2020 Yandex LLC
* Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
* Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
* Copyright (c) 2015-2020 Andrey V. Elsukov <ae@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -64,13 +64,12 @@ struct nat64lsn_states_chunk {
#define ISSET64(mask, bit) ((mask) & ((uint64_t)1 << (bit)))
#define ISSET32(mask, bit) ((mask) & ((uint32_t)1 << (bit)))
struct nat64lsn_pg {
CK_SLIST_ENTRY(nat64lsn_pg) entries;
uint16_t base_port;
uint16_t timestamp;
uint8_t proto;
uint8_t chunks_count;
uint8_t spare[2];
uint16_t flags;
#define NAT64LSN_DEADPG 1
union {
uint64_t freemask64;
@ -83,6 +82,11 @@ struct nat64lsn_pg {
struct nat64lsn_states_chunk *states;
struct nat64lsn_states_chunk **states_chunk;
};
/*
* An alias object holds chain of all allocated PGs.
* The chain is used mostly by expiration code.
*/
CK_SLIST_ENTRY(nat64lsn_pg) entries;
};
#define CHUNK_BY_FADDR(p, a) ((a) & ((p)->chunks_count - 1))
@ -123,28 +127,39 @@ struct nat64lsn_alias {
struct mtx lock;
in_addr_t addr; /* host byte order */
uint32_t hosts_count;
uint32_t portgroups_count;
uint32_t tcp_chunkmask;
uint32_t udp_chunkmask;
uint32_t icmp_chunkmask;
uint32_t tcp_pgidx;
uint32_t udp_pgidx;
uint32_t icmp_pgidx;
uint16_t timestamp;
uint16_t spare;
uint16_t tcp_pgcount;
uint16_t udp_pgcount;
uint16_t icmp_pgcount;
/*
* We keep PGs in chunks by 32 PGs in each.
* Each chunk allocated by demand, and then corresponding bit
* is set in chunkmask.
*
* Also we keep last used PG's index for each protocol.
* pgidx / 32 = index of pgchunk;
* pgidx % 32 = index of pgptr in pgchunk.
*/
uint32_t tcp_chunkmask;
uint32_t tcp_pgidx;
uint32_t udp_chunkmask;
uint32_t udp_pgidx;
uint32_t icmp_chunkmask;
uint32_t icmp_pgidx;
/*
* Each pgchunk keeps 32 pointers to PGs. If pgptr pointer is
* valid, we have corresponding bit set in the pgmask.
*/
uint32_t tcp_pgmask[32];
uint32_t udp_pgmask[32];
uint32_t icmp_pgmask[32];
struct nat64lsn_pgchunk *tcp[32];
struct nat64lsn_pgchunk *udp[32];
struct nat64lsn_pgchunk *icmp[32];
/* pointer to PG that can be used for faster state allocation */
struct nat64lsn_pg *tcp_pg;
struct nat64lsn_pg *udp_pg;
struct nat64lsn_pg *icmp_pg;
};
#define ALIAS_LOCK_INIT(p) \
mtx_init(&(p)->lock, "alias_lock", NULL, MTX_DEF)
@ -177,7 +192,7 @@ struct nat64lsn_host {
#define HOST_LOCK(p) mtx_lock(&(p)->lock)
#define HOST_UNLOCK(p) mtx_unlock(&(p)->lock)
VNET_DECLARE(uint16_t, nat64lsn_eid);
VNET_DECLARE(uint32_t, nat64lsn_eid);
#define V_nat64lsn_eid VNET(nat64lsn_eid)
#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
@ -189,8 +204,6 @@ VNET_DECLARE(uint16_t, nat64lsn_eid);
STAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
struct nat64lsn_cfg {
struct named_object no;
struct nat64lsn_hosts_slist *hosts_hash;
struct nat64lsn_alias *aliases; /* array of aliases */
@ -216,7 +229,8 @@ struct nat64lsn_cfg {
uint16_t st_icmp_ttl; /* ICMP expire */
struct nat64_config base;
#define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE)
#define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE | \
NAT64LSN_ALLOW_SWAPCONF)
#define NAT64LSN_ANYPREFIX 0x00000100
struct mtx periodic_lock;
@ -228,6 +242,12 @@ struct nat64lsn_cfg {
char name[64]; /* Nat instance name */
};
struct nat64lsn_instance {
struct named_object no;
struct nat64lsn_cfg *cfg;
char name[64]; /* Nat instance name */
};
/* CFG_LOCK protects cfg->hosts_hash from modification */
#define CFG_LOCK_INIT(p) \
mtx_init(&(p)->lock, "cfg_lock", NULL, MTX_DEF)
@ -241,9 +261,11 @@ struct nat64lsn_cfg {
#define CALLOUT_LOCK(p) mtx_lock(&(p)->periodic_lock)
#define CALLOUT_UNLOCK(p) mtx_unlock(&(p)->periodic_lock)
struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
MALLOC_DECLARE(M_NAT64LSN);
struct nat64lsn_cfg *nat64lsn_init_config(struct ip_fw_chain *ch,
in_addr_t prefix, int plen);
void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_destroy_config(struct nat64lsn_cfg *cfg);
void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_init_internal(void);
void nat64lsn_uninit_internal(void);

View file

@ -55,17 +55,18 @@
#include "nat64lsn.h"
VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
VNET_DEFINE(uint32_t, nat64lsn_eid) = 0;
static struct nat64lsn_cfg *
static struct nat64lsn_instance *
nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
{
struct nat64lsn_cfg *cfg;
struct named_object *no;
cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
no = ipfw_objhash_lookup_name_type(ni, set,
IPFW_TLV_NAT64LSN_NAME, name);
return (cfg);
if (no == NULL)
return (NULL);
return (__containerof(no, struct nat64lsn_instance, no));
}
static void
@ -112,6 +113,7 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
{
ipfw_obj_lheader *olh;
ipfw_nat64lsn_cfg *uc;
struct nat64lsn_instance *i;
struct nat64lsn_cfg *cfg;
struct namedobj_instance *ni;
uint32_t addr4, mask4;
@ -157,12 +159,14 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
IPFW_UH_RUNLOCK(ch);
cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
strlcpy(cfg->name, uc->name, sizeof(cfg->name));
cfg->no.name = cfg->name;
cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
cfg->no.set = uc->set;
i = malloc(sizeof(struct nat64lsn_instance), M_NAT64LSN,
M_WAITOK | M_ZERO);
strlcpy(i->name, uc->name, sizeof(i->name));
i->no.name = i->name;
i->no.etlv = IPFW_TLV_NAT64LSN_NAME;
i->no.set = uc->set;
cfg = nat64lsn_init_config(ch, addr4, uc->plen4);
cfg->base.plat_prefix = uc->prefix6;
cfg->base.plat_plen = uc->plen6;
cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
@ -180,26 +184,28 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg->st_estab_ttl = uc->st_estab_ttl;
cfg->st_udp_ttl = uc->st_udp_ttl;
cfg->st_icmp_ttl = uc->st_icmp_ttl;
cfg->nomatch_verdict = IP_FW_DENY;
IPFW_UH_WLOCK(ch);
if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
IPFW_UH_WUNLOCK(ch);
nat64lsn_destroy_instance(cfg);
nat64lsn_destroy_config(cfg);
free(i, M_NAT64LSN);
return (EEXIST);
}
if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
if (ipfw_objhash_alloc_idx(ni, &i->no.kidx) != 0) {
IPFW_UH_WUNLOCK(ch);
nat64lsn_destroy_instance(cfg);
nat64lsn_destroy_config(cfg);
free(i, M_NAT64LSN);
return (ENOSPC);
}
ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
ipfw_objhash_add(ni, &i->no);
/* Okay, let's link data */
SRV_OBJECT(ch, cfg->no.kidx) = cfg;
i->cfg = cfg;
SRV_OBJECT(ch, i->no.kidx) = i;
nat64lsn_start_instance(cfg);
IPFW_UH_WUNLOCK(ch);
@ -207,13 +213,14 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
static void
nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
nat64lsn_detach_instance(struct ip_fw_chain *ch,
struct nat64lsn_instance *i)
{
IPFW_UH_WLOCK_ASSERT(ch);
ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
SRV_OBJECT(ch, i->no.kidx) = NULL;
ipfw_objhash_del(CHAIN_TO_SRV(ch), &i->no);
ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), i->no.kidx);
}
/*
@ -227,7 +234,7 @@ static int
nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
struct nat64lsn_cfg *cfg;
struct nat64lsn_instance *i;
ipfw_obj_header *oh;
if (sd->valsize != sizeof(*oh))
@ -236,23 +243,23 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
oh = (ipfw_obj_header *)op3;
IPFW_UH_WLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (i == NULL) {
IPFW_UH_WUNLOCK(ch);
return (ENOENT);
}
if (cfg->no.refcnt > 0) {
if (i->no.refcnt > 0) {
IPFW_UH_WUNLOCK(ch);
return (EBUSY);
}
ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
SRV_OBJECT(ch, cfg->no.kidx) = NULL;
nat64lsn_detach_config(ch, cfg);
ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, i->no.kidx);
nat64lsn_detach_instance(ch, i);
IPFW_UH_WUNLOCK(ch);
nat64lsn_destroy_instance(cfg);
nat64lsn_destroy_config(i->cfg);
free(i, M_NAT64LSN);
return (0);
}
@ -263,7 +270,7 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
struct ipfw_nat64lsn_stats *stats)
{
struct nat64lsn_alias *alias;
int i, j;
int i;
__COPY_STAT_FIELD(cfg, stats, opcnt64);
__COPY_STAT_FIELD(cfg, stats, opcnt46);
@ -294,20 +301,22 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
stats->hostcount = cfg->hosts_count;
for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
alias = &cfg->aliases[i];
for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
stats->tcpchunks += alias->tcp_pgcount;
stats->udpchunks += alias->udp_pgcount;
stats->icmpchunks += alias->icmp_pgcount;
}
}
#undef __COPY_STAT_FIELD
static void
nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_instance *i,
ipfw_nat64lsn_cfg *uc)
{
struct nat64lsn_cfg *cfg;
strlcpy(uc->name, i->no.name, sizeof(uc->name));
uc->set = i->no.set;
cfg = i->cfg;
uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
uc->states_chunks = cfg->states_chunks;
@ -323,8 +332,6 @@ nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
uc->prefix6 = cfg->base.plat_prefix;
uc->plen4 = cfg->plen4;
uc->plen6 = cfg->base.plat_plen;
uc->set = cfg->no.set;
strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
}
struct nat64_dump_arg {
@ -336,12 +343,14 @@ static int
export_config_cb(struct namedobj_instance *ni, struct named_object *no,
void *arg)
{
struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
struct nat64_dump_arg *da;
ipfw_nat64lsn_cfg *uc;
da = (struct nat64_dump_arg *)arg;
uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
sizeof(*uc));
nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
nat64lsn_export_config(da->ch,
__containerof(no, struct nat64lsn_instance, no), uc);
return (0);
}
@ -400,6 +409,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
{
ipfw_obj_header *oh;
ipfw_nat64lsn_cfg *uc;
struct nat64lsn_instance *i;
struct nat64lsn_cfg *cfg;
struct namedobj_instance *ni;
@ -417,12 +427,12 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
ni = CHAIN_TO_SRV(ch);
if (sd->sopt->sopt_dir == SOPT_GET) {
IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (i == NULL) {
IPFW_UH_RUNLOCK(ch);
return (ENOENT);
}
nat64lsn_export_config(ch, cfg, uc);
nat64lsn_export_config(ch, i, uc);
IPFW_UH_RUNLOCK(ch);
return (0);
}
@ -430,8 +440,8 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
nat64lsn_default_config(uc);
IPFW_UH_WLOCK(ch);
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
i = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (i == NULL) {
IPFW_UH_WUNLOCK(ch);
return (ENOENT);
}
@ -441,7 +451,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
* jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
* tcp_est_age, udp_age, icmp_age, flags, states_chunks.
*/
cfg = i->cfg;
cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
cfg->host_delete_delay = uc->nh_delete_delay;
@ -472,7 +482,7 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
struct sockopt_data *sd)
{
struct ipfw_nat64lsn_stats stats;
struct nat64lsn_cfg *cfg;
struct nat64lsn_instance *i;
ipfw_obj_header *oh;
ipfw_obj_ctlv *ctlv;
size_t sz;
@ -488,13 +498,13 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
memset(&stats, 0, sizeof(stats));
IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (i == NULL) {
IPFW_UH_RUNLOCK(ch);
return (ENOENT);
}
export_stats(ch, cfg, &stats);
export_stats(ch, i->cfg, &stats);
IPFW_UH_RUNLOCK(ch);
ctlv = (ipfw_obj_ctlv *)(oh + 1);
@ -519,7 +529,7 @@ static int
nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
struct sockopt_data *sd)
{
struct nat64lsn_cfg *cfg;
struct nat64lsn_instance *i;
ipfw_obj_header *oh;
if (sd->valsize != sizeof(*oh))
@ -530,12 +540,12 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
return (EINVAL);
IPFW_UH_WLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (i == NULL) {
IPFW_UH_WUNLOCK(ch);
return (ENOENT);
}
COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
COUNTER_ARRAY_ZERO(i->cfg->base.stats.cnt, NAT64STATS);
IPFW_UH_WUNLOCK(ch);
return (0);
}
@ -551,7 +561,7 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
* ipfw_nat64lsn_state x count, ... ] ]
*/
static int
nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
nat64lsn_export_states(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
{
ipfw_nat64lsn_state_v1 *s;
@ -663,24 +673,6 @@ nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
return (NULL);
}
/*
* Lists nat64lsn states.
* Data layout (v0):
* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
* Reply: [ ipfw_obj_header ipfw_obj_data [
* ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
*
* Returns 0 on success
*/
static int
nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
/* TODO: implement states listing for old ipfw(8) binaries */
return (EOPNOTSUPP);
}
/*
* Lists nat64lsn states.
* Data layout (v1)(current):
@ -691,12 +683,13 @@ nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
* Returns 0 on success
*/
static int
nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_obj_header *oh;
ipfw_obj_data *od;
ipfw_nat64lsn_stg_v1 *stg;
struct nat64lsn_instance *i;
struct nat64lsn_cfg *cfg;
struct nat64lsn_pg *pg;
union nat64lsn_pgidx idx;
@ -724,11 +717,12 @@ nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
return (EINVAL);
IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
i = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (i == NULL) {
IPFW_UH_RUNLOCK(ch);
return (ENOENT);
}
cfg = i->cfg;
if (idx.index == 0) { /* Fill in starting point */
idx.addr = cfg->prefix4;
idx.proto = IPPROTO_ICMP;
@ -762,7 +756,7 @@ nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
pg = nat64lsn_get_pg_byidx(cfg, &idx);
if (pg != NULL) {
count = 0;
ret = nat64lsn_export_states_v1(cfg, &idx, pg,
ret = nat64lsn_export_states(cfg, &idx, pg,
sd, &count);
if (ret != 0)
break;
@ -809,96 +803,113 @@ nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create },
{ IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy },
{ IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config },
{ IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
{ IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
{ IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
{ IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states_v0 },
{ IP_FW_NAT64LSN_LIST_STATES,1, HDIR_GET, nat64lsn_states_v1 },
{ IP_FW_NAT64LSN_CREATE, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_create },
{ IP_FW_NAT64LSN_DESTROY, IP_FW3_OPVER, HDIR_SET, nat64lsn_destroy },
{ IP_FW_NAT64LSN_CONFIG, IP_FW3_OPVER, HDIR_BOTH, nat64lsn_config },
{ IP_FW_NAT64LSN_LIST, IP_FW3_OPVER, HDIR_GET, nat64lsn_list },
{ IP_FW_NAT64LSN_STATS, IP_FW3_OPVER, HDIR_GET, nat64lsn_stats },
{ IP_FW_NAT64LSN_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nat64lsn_reset_stats },
{ IP_FW_NAT64LSN_LIST_STATES, IP_FW3_OPVER, HDIR_GET, nat64lsn_states },
};
#define NAT64LSN_ARE_EQUAL(v) (cfg0->v == cfg1->v)
static int
nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
nat64lsn_cmp_configs(struct nat64lsn_cfg *cfg0, struct nat64lsn_cfg *cfg1)
{
ipfw_insn *icmd;
icmd = cmd - 1;
if (icmd->opcode != O_EXTERNAL_ACTION ||
icmd->arg1 != V_nat64lsn_eid)
return (1);
if ((cfg0->base.flags & cfg1->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&
NAT64LSN_ARE_EQUAL(prefix4) &&
NAT64LSN_ARE_EQUAL(pmask4) &&
NAT64LSN_ARE_EQUAL(plen4) &&
NAT64LSN_ARE_EQUAL(base.plat_plen) &&
IN6_ARE_ADDR_EQUAL(&cfg0->base.plat_prefix,
&cfg1->base.plat_prefix))
return (0);
return (1);
}
#undef NAT64LSN_ARE_EQUAL
*puidx = cmd->arg1;
*ptype = 0;
static void
nat64lsn_swap_configs(struct nat64lsn_instance *i0,
struct nat64lsn_instance *i1)
{
struct nat64lsn_cfg *cfg;
cfg = i0->cfg;
i0->cfg = i1->cfg;
i1->cfg = cfg;
}
/*
* NAT64LSN sets swap handler.
*
* When two sets have NAT64LSN instance with the same name, we check
* most important configuration parameters, and if there are no difference,
* and both instances have NAT64LSN_ALLOW_SWAPCONF flag, we will exchange
* configs between instances. This allows to keep NAT64 states when ipfw's
* rules are reloaded using new set.
*
* XXX: since manage_sets caller doesn't hold IPFW_WLOCK(), it is possible
* that some states will be created during switching, because set of rules
* is changed a bit earley than named objects.
*/
static int
nat64lsn_swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,
void *arg)
{
struct nat64lsn_instance *i0, *i1;
uint8_t *sets;
sets = arg;
if (no->set == sets[0]) {
/*
* Check if we have instance in new set with the same
* config that is sets aware and ready to swap configs.
*/
i0 = __containerof(no, struct nat64lsn_instance, no);
if ((i0->cfg->base.flags & NAT64LSN_ALLOW_SWAPCONF) &&
(i1 = nat64lsn_find(ni, no->name, sets[1])) != NULL) {
/* Compare configs */
if (nat64lsn_cmp_configs(i0->cfg, i1->cfg) == 0) {
IPFW_UH_WLOCK_ASSERT(&V_layer3_chain);
IPFW_WLOCK(&V_layer3_chain);
nat64lsn_swap_configs(i0, i1);
IPFW_WUNLOCK(&V_layer3_chain);
}
}
}
return (0);
}
static void
nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
{
cmd->arg1 = idx;
}
static int
nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
struct named_object **pno)
{
int err;
err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
IPFW_TLV_NAT64LSN_NAME, pno);
return (err);
}
static struct named_object *
nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
{
struct namedobj_instance *ni;
struct named_object *no;
IPFW_UH_WLOCK_ASSERT(ch);
ni = CHAIN_TO_SRV(ch);
no = ipfw_objhash_lookup_kidx(ni, idx);
KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
return (no);
}
static int
nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
nat64lsn_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
enum ipfw_sets_cmd cmd)
{
uint8_t sets[2];
if (cmd == SWAP_ALL) {
sets[0] = (uint8_t)set;
sets[1] = new_set;
ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch),
nat64lsn_swap_sets_cb, &sets, IPFW_TLV_NAT64LSN_NAME);
}
return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
set, new_set, cmd));
}
static struct opcode_obj_rewrite opcodes[] = {
{
.opcode = O_EXTERNAL_INSTANCE,
.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
.classifier = nat64lsn_classify,
.update = nat64lsn_update_arg1,
.find_byname = nat64lsn_findbyname,
.find_bykidx = nat64lsn_findbykidx,
.manage_sets = nat64lsn_manage_sets,
},
};
NAT64_DEFINE_OPCODE_REWRITER(nat64lsn, NAT64LSN, opcodes);
static int
destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
void *arg)
{
struct nat64lsn_cfg *cfg;
struct nat64lsn_instance *i;
struct ip_fw_chain *ch;
ch = (struct ip_fw_chain *)arg;
cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
SRV_OBJECT(ch, no->kidx) = NULL;
nat64lsn_detach_config(ch, cfg);
nat64lsn_destroy_instance(cfg);
i = (struct nat64lsn_instance *)SRV_OBJECT(ch, no->kidx);
nat64lsn_detach_instance(ch, i);
nat64lsn_destroy_config(i->cfg);
free(i, M_NAT64LSN);
return (0);
}

View file

@ -58,7 +58,7 @@
#include "nat64stl.h"
#define NAT64_LOOKUP(chain, cmd) \
(struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
(struct nat64stl_cfg *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
static void
nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
@ -67,7 +67,7 @@ nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
static uint32_t pktid = 0;
memset(plog, 0, sizeof(*plog));
plog->length = PFLOG_HDRLEN;
plog->length = PFLOG_REAL_HDRLEN;
plog->af = family;
plog->action = PF_NAT;
plog->dir = PF_IN;
@ -204,8 +204,8 @@ int
ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
ipfw_insn *cmd, int *done)
{
ipfw_insn *icmd;
struct nat64stl_cfg *cfg;
ipfw_insn *icmd;
in_addr_t dst4;
uint32_t tablearg;
int ret;
@ -213,9 +213,9 @@ ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
IPFW_RLOCK_ASSERT(chain);
*done = 0; /* try next rule if not matched */
icmd = cmd + 1;
icmd = cmd + F_LEN(cmd);
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nat64stl_eid ||
insntod(cmd, kidx)->kidx != V_nat64stl_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
return (0);

View file

@ -35,8 +35,8 @@
struct nat64stl_cfg {
struct named_object no;
uint16_t map64; /* table with 6to4 mapping */
uint16_t map46; /* table with 4to6 mapping */
uint32_t map64; /* table with 6to4 mapping */
uint32_t map46; /* table with 4to6 mapping */
struct nat64_config base;
#define NAT64STL_KIDX 0x0100
@ -47,7 +47,7 @@ struct nat64stl_cfg {
char name[64];
};
VNET_DECLARE(uint16_t, nat64stl_eid);
VNET_DECLARE(uint32_t, nat64stl_eid);
#define V_nat64stl_eid VNET(nat64stl_eid)
#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid)

View file

@ -61,7 +61,7 @@
#include "nat64stl.h"
VNET_DEFINE(uint16_t, nat64stl_eid) = 0;
VNET_DEFINE(uint32_t, nat64stl_eid) = 0;
static struct nat64stl_cfg *nat64stl_alloc_config(const char *name,
uint8_t set);
@ -489,81 +489,23 @@ nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
}
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create },
{ IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy },
{ IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config },
{ IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list },
{ IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats },
{ IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats },
{ IP_FW_NAT64STL_CREATE, IP_FW3_OPVER, HDIR_SET, nat64stl_create },
{ IP_FW_NAT64STL_DESTROY, IP_FW3_OPVER, HDIR_SET, nat64stl_destroy },
{ IP_FW_NAT64STL_CONFIG, IP_FW3_OPVER, HDIR_BOTH,nat64stl_config },
{ IP_FW_NAT64STL_LIST, IP_FW3_OPVER, HDIR_GET, nat64stl_list },
{ IP_FW_NAT64STL_STATS, IP_FW3_OPVER, HDIR_GET, nat64stl_stats },
{ IP_FW_NAT64STL_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nat64stl_reset_stats },
};
static int
nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
{
ipfw_insn *icmd;
icmd = cmd - 1;
if (icmd->opcode != O_EXTERNAL_ACTION ||
icmd->arg1 != V_nat64stl_eid)
return (1);
*puidx = cmd->arg1;
*ptype = 0;
return (0);
}
static void
nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx)
{
cmd->arg1 = idx;
}
static int
nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
struct named_object **pno)
{
int err;
err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
IPFW_TLV_NAT64STL_NAME, pno);
return (err);
}
static struct named_object *
nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
{
struct namedobj_instance *ni;
struct named_object *no;
IPFW_UH_WLOCK_ASSERT(ch);
ni = CHAIN_TO_SRV(ch);
no = ipfw_objhash_lookup_kidx(ni, idx);
KASSERT(no != NULL, ("NAT with index %d not found", idx));
return (no);
}
static int
nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
nat64stl_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
enum ipfw_sets_cmd cmd)
{
return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME,
set, new_set, cmd));
}
static struct opcode_obj_rewrite opcodes[] = {
{
.opcode = O_EXTERNAL_INSTANCE,
.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
.classifier = nat64stl_classify,
.update = nat64stl_update_arg1,
.find_byname = nat64stl_findbyname,
.find_bykidx = nat64stl_findbykidx,
.manage_sets = nat64stl_manage_sets,
},
};
NAT64_DEFINE_OPCODE_REWRITER(nat64stl, NAT64STL, opcodes);
static int
destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,

View file

@ -60,7 +60,7 @@
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/nptv6/nptv6.h>
VNET_DEFINE_STATIC(uint16_t, nptv6_eid) = 0;
VNET_DEFINE_STATIC(uint32_t, nptv6_eid) = 0;
#define V_nptv6_eid VNET(nptv6_eid)
#define IPFW_TLV_NPTV6_NAME IPFW_TLV_EACTION_NAME(V_nptv6_eid)
@ -76,7 +76,7 @@ static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp,
int offset);
#define NPTV6_LOOKUP(chain, cmd) \
(struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
(struct nptv6_cfg *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
#ifndef IN6_MASK_ADDR
#define IN6_MASK_ADDR(a, m) do { \
@ -354,9 +354,9 @@ ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
*done = 0; /* try next rule if not matched */
ret = IP_FW_DENY;
icmd = cmd + 1;
icmd = cmd + F_LEN(cmd);
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nptv6_eid ||
insntod(cmd, kidx)->kidx != V_nptv6_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NPTV6_LOOKUP(chain, icmd)) == NULL ||
(cfg->flags & NPTV6_READY) == 0)
@ -374,7 +374,7 @@ ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
*/
ip6 = mtod(args->m, struct ip6_hdr *);
NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d",
cmd->arg1, icmd->arg1,
insntod(cmd, kidx)->kidx, insntod(icmd, kidx)->kidx,
inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
ip6->ip6_nxt);
@ -904,37 +904,38 @@ nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
}
static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NPTV6_CREATE, 0, HDIR_SET, nptv6_create },
{ IP_FW_NPTV6_DESTROY,0, HDIR_SET, nptv6_destroy },
{ IP_FW_NPTV6_CONFIG, 0, HDIR_BOTH, nptv6_config },
{ IP_FW_NPTV6_LIST, 0, HDIR_GET, nptv6_list },
{ IP_FW_NPTV6_STATS, 0, HDIR_GET, nptv6_stats },
{ IP_FW_NPTV6_RESET_STATS,0, HDIR_SET, nptv6_reset_stats },
{ IP_FW_NPTV6_CREATE, IP_FW3_OPVER, HDIR_SET, nptv6_create },
{ IP_FW_NPTV6_DESTROY, IP_FW3_OPVER, HDIR_SET, nptv6_destroy },
{ IP_FW_NPTV6_CONFIG, IP_FW3_OPVER, HDIR_BOTH,nptv6_config },
{ IP_FW_NPTV6_LIST, IP_FW3_OPVER, HDIR_GET, nptv6_list },
{ IP_FW_NPTV6_STATS, IP_FW3_OPVER, HDIR_GET, nptv6_stats },
{ IP_FW_NPTV6_RESET_STATS, IP_FW3_OPVER, HDIR_SET, nptv6_reset_stats },
};
static int
nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
nptv6_classify(ipfw_insn *cmd0, uint32_t *puidx, uint8_t *ptype)
{
ipfw_insn *icmd;
icmd = cmd - 1;
NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d",
cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1);
icmd = cmd0 - F_LEN(cmd0);
NPTV6_DEBUG("opcode %u, kidx %u, opcode0 %u, kidx %u",
cmd->opcode, insntod(cmd, kidx)->kidx,
icmd->opcode, insntod(icmd, kidx)->kidx);
if (icmd->opcode != O_EXTERNAL_ACTION ||
icmd->arg1 != V_nptv6_eid)
insntod(icmd, kidx)->kidx != V_nptv6_eid)
return (1);
*puidx = cmd->arg1;
*puidx = insntod(cmd0, kidx)->kidx;
*ptype = 0;
return (0);
}
static void
nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx)
nptv6_update_kidx(ipfw_insn *cmd0, uint32_t idx)
{
cmd->arg1 = idx;
NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
insntod(cmd0, kidx)->kidx = idx;
NPTV6_DEBUG("opcode %u, kidx -> %u", cmd->opcode, idx);
}
static int
@ -950,7 +951,7 @@ nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
}
static struct named_object *
nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
nptv6_findbykidx(struct ip_fw_chain *ch, uint32_t idx)
{
struct namedobj_instance *ni;
struct named_object *no;
@ -958,14 +959,14 @@ nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
IPFW_UH_WLOCK_ASSERT(ch);
ni = CHAIN_TO_SRV(ch);
no = ipfw_objhash_lookup_kidx(ni, idx);
KASSERT(no != NULL, ("NPT with index %d not found", idx));
KASSERT(no != NULL, ("NPT with index %u not found", idx));
NPTV6_DEBUG("kidx %u -> %s", idx, no->name);
return (no);
}
static int
nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
nptv6_manage_sets(struct ip_fw_chain *ch, uint32_t set, uint8_t new_set,
enum ipfw_sets_cmd cmd)
{
@ -978,7 +979,7 @@ static struct opcode_obj_rewrite opcodes[] = {
.opcode = O_EXTERNAL_INSTANCE,
.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
.classifier = nptv6_classify,
.update = nptv6_update_arg1,
.update = nptv6_update_kidx,
.find_byname = nptv6_findbyname,
.find_bykidx = nptv6_findbykidx,
.manage_sets = nptv6_manage_sets,

View file

@ -53,7 +53,7 @@
#include <machine/in_cksum.h>
VNET_DEFINE_STATIC(uint16_t, tcpmod_setmss_eid) = 0;
VNET_DEFINE_STATIC(uint32_t, tcpmod_setmss_eid) = 0;
#define V_tcpmod_setmss_eid VNET(tcpmod_setmss_eid)
static int
@ -178,9 +178,9 @@ ipfw_tcpmod(struct ip_fw_chain *chain, struct ip_fw_args *args,
*done = 0; /* try next rule if not matched */
ret = IP_FW_DENY;
icmd = cmd + 1;
icmd = cmd + F_LEN(cmd);
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_tcpmod_setmss_eid ||
insntod(cmd, kidx)->kidx != V_tcpmod_setmss_eid ||
icmd->opcode != O_EXTERNAL_DATA ||
icmd->len != F_INSN_SIZE(ipfw_insn))
return (ret);

View file

@ -269,7 +269,8 @@ struct accept_filter_arg {
#define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */
#define AF_HYPERV 43 /* HyperV sockets */
#define AF_DIVERT 44 /* divert(4) */
#define AF_MAX 44
#define AF_IPFWLOG 46
#define AF_MAX 46
/*
* When allocating a new AF_ constant, please only allocate
* even numbered constants for FreeBSD until 134 as odd numbered AF_
@ -395,6 +396,7 @@ struct sockproto {
#define PF_INET_SDP AF_INET_SDP
#define PF_INET6_SDP AF_INET6_SDP
#define PF_DIVERT AF_DIVERT
#define PF_IPFWLOG AF_IPFWLOG
#define PF_MAX AF_MAX