aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sbin/ipfw/ipfw.830
-rw-r--r--sbin/ipfw/ipfw2.h1
-rw-r--r--sbin/ipfw/nat64lsn.c124
-rw-r--r--sys/conf/files4
-rw-r--r--sys/modules/ipfw_nat64/Makefile2
-rw-r--r--sys/netinet6/ip_fw_nat64.h45
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.c2520
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.h425
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn_control.c434
9 files changed, 1766 insertions, 1819 deletions
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 31448aff92bb..f02ec3e148cd 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -1,7 +1,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 18, 2019
+.Dd March 19, 2019
.Dt IPFW 8
.Os
.Sh NAME
@@ -3300,6 +3300,7 @@ See
.Sx SYSCTL VARIABLES
for more info.
.Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+.Ss Stateful translation
.Nm
supports in-kernel IPv6/IPv4 network address and protocol translation.
Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers
@@ -3317,7 +3318,8 @@ to be able use stateful NAT64 translator.
Stateful NAT64 uses a bunch of memory for several types of objects.
When IPv6 client initiates connection, NAT64 translator creates a host entry
in the states table.
-Each host entry has a number of ports group entries allocated on demand.
+Each host entry uses preallocated IPv4 alias entry.
+Each alias entry has a number of ports group entries allocated on demand.
Ports group entries contains connection state entries.
There are several options to control limits and lifetime for these objects.
.Pp
@@ -3337,6 +3339,11 @@ First time an original packet is handled and consumed by translator,
and then it is handled again as translated packet.
This behavior can be changed by sysctl variable
.Va net.inet.ip.fw.nat64_direct_output .
+Also translated packet can be tagged using
+.Cm tag
+rule action, and then matched by
+.Cm tagged
+opcode to avoid loops and extra overhead.
.Pp
The stateful NAT64 configuration command is the following:
.Bd -ragged -offset indent
@@ -3364,15 +3371,16 @@ to represent IPv4 addresses. This IPv6 prefix should be configured in DNS64.
The translator implementation follows RFC6052, that restricts the length of
prefixes to one of following: 32, 40, 48, 56, 64, or 96.
The Well-Known IPv6 Prefix 64:ff9b:: must be 96 bits long.
-.It Cm max_ports Ar number
-Maximum number of ports reserved for upper level protocols to one IPv6 client.
-All reserved ports are divided into chunks between supported protocols.
-The number of connections from one IPv6 client is limited by this option.
-Note that closed TCP connections still remain in the list of connections until
-.Cm tcp_close_age
-interval will not expire.
-Default value is
-.Ar 2048 .
+The special
+.Ar ::/length
+prefix can be used to handle several IPv6 prefixes with one NAT64 instance.
+The NAT64 instance will determine a destination IPv4 address from prefix
+.Ar length .
+.It Cm states_chunks Ar number
+The number of states chunks in single ports group.
+Each ports group by default can keep 64 state entries in single chunk.
+The above value affects the maximum number of states that can be associated with single IPv4 alias address and port.
+The value must be power of 2, and up to 128.
.It Cm host_del_age Ar seconds
The number of seconds until the host entry for a IPv6 client will be deleted
and all its resources will be released due to inactivity.
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index ff6990ae1c06..2b562734d15f 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -278,6 +278,7 @@ enum tokens {
TOK_AGG_LEN,
TOK_AGG_COUNT,
TOK_MAX_PORTS,
+ TOK_STATES_CHUNKS,
TOK_JMAXLEN,
TOK_PORT_RANGE,
TOK_HOST_DEL_AGE,
diff --git a/sbin/ipfw/nat64lsn.c b/sbin/ipfw/nat64lsn.c
index c6a892572818..4a6d7a7914c3 100644
--- a/sbin/ipfw/nat64lsn.c
+++ b/sbin/ipfw/nat64lsn.c
@@ -87,68 +87,70 @@ nat64lsn_print_states(void *buf)
char sflags[4], *sf, *proto;
ipfw_obj_header *oh;
ipfw_obj_data *od;
- ipfw_nat64lsn_stg *stg;
- ipfw_nat64lsn_state *ste;
+ ipfw_nat64lsn_stg_v1 *stg;
+ ipfw_nat64lsn_state_v1 *ste;
uint64_t next_idx;
int i, sz;
oh = (ipfw_obj_header *)buf;
od = (ipfw_obj_data *)(oh + 1);
- stg = (ipfw_nat64lsn_stg *)(od + 1);
+ stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
sz = od->head.length - sizeof(*od);
next_idx = 0;
while (sz > 0 && next_idx != 0xFF) {
- next_idx = stg->next_idx;
+ next_idx = stg->next.index;
sz -= sizeof(*stg);
if (stg->count == 0) {
stg++;
continue;
}
- switch (stg->proto) {
- case IPPROTO_TCP:
- proto = "TCP";
- break;
- case IPPROTO_UDP:
- proto = "UDP";
- break;
- case IPPROTO_ICMPV6:
- proto = "ICMPv6";
- break;
- }
- inet_ntop(AF_INET6, &stg->host6, s, sizeof(s));
+ /*
+ * NOTE: addresses are in network byte order,
+ * ports are in host byte order.
+ */
inet_ntop(AF_INET, &stg->alias4, a, sizeof(a));
- ste = (ipfw_nat64lsn_state *)(stg + 1);
+ ste = (ipfw_nat64lsn_state_v1 *)(stg + 1);
for (i = 0; i < stg->count && sz > 0; i++) {
sf = sflags;
+ inet_ntop(AF_INET6, &ste->host6, s, sizeof(s));
inet_ntop(AF_INET, &ste->daddr, f, sizeof(f));
- if (stg->proto == IPPROTO_TCP) {
+ switch (ste->proto) {
+ case IPPROTO_TCP:
+ proto = "TCP";
if (ste->flags & 0x02)
*sf++ = 'S';
if (ste->flags & 0x04)
*sf++ = 'E';
if (ste->flags & 0x01)
*sf++ = 'F';
+ break;
+ case IPPROTO_UDP:
+ proto = "UDP";
+ break;
+ case IPPROTO_ICMP:
+ proto = "ICMPv6";
+ break;
}
*sf = '\0';
- switch (stg->proto) {
+ switch (ste->proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n",
s, ste->sport, a, ste->aport, proto,
sflags, ste->idle, f, ste->dport);
break;
- case IPPROTO_ICMPV6:
+ case IPPROTO_ICMP:
printf("%s\t%s\t%s\t\t%d\t%s\n",
s, a, proto, ste->idle, f);
break;
default:
printf("%s\t%s\t%d\t\t%d\t%s\n",
- s, a, stg->proto, ste->idle, f);
+ s, a, ste->proto, ste->idle, f);
}
ste++;
sz -= sizeof(*ste);
}
- stg = (ipfw_nat64lsn_stg *)ste;
+ stg = (ipfw_nat64lsn_stg_v1 *)ste;
}
return (next_idx);
}
@@ -174,6 +176,7 @@ nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
err(EX_OSERR, NULL);
do {
oh = (ipfw_obj_header *)buf;
+ oh->opheader.version = 1; /* Force using ov new API */
od = (ipfw_obj_data *)(oh + 1);
nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set);
od->head.type = IPFW_TLV_OBJDATA;
@@ -363,12 +366,8 @@ nat64lsn_parse_int(const char *arg, const char *desc)
static struct _s_x nat64newcmds[] = {
{ "prefix6", TOK_PREFIX6 },
- { "agg_len", TOK_AGG_LEN }, /* not yet */
- { "agg_count", TOK_AGG_COUNT }, /* not yet */
- { "port_range", TOK_PORT_RANGE }, /* not yet */
{ "jmaxlen", TOK_JMAXLEN },
{ "prefix4", TOK_PREFIX4 },
- { "max_ports", TOK_MAX_PORTS },
{ "host_del_age", TOK_HOST_DEL_AGE },
{ "pg_del_age", TOK_PG_DEL_AGE },
{ "tcp_syn_age", TOK_TCP_SYN_AGE },
@@ -376,10 +375,13 @@ static struct _s_x nat64newcmds[] = {
{ "tcp_est_age", TOK_TCP_EST_AGE },
{ "udp_age", TOK_UDP_AGE },
{ "icmp_age", TOK_ICMP_AGE },
+ { "states_chunks",TOK_STATES_CHUNKS },
{ "log", TOK_LOG },
{ "-log", TOK_LOGOFF },
{ "allow_private", TOK_PRIVATE },
{ "-allow_private", TOK_PRIVATEOFF },
+ /* for compatibility with old configurations */
+ { "max_ports", TOK_MAX_PORTS }, /* unused */
{ NULL, 0 }
};
@@ -436,42 +438,17 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6,
&cfg->plen6);
if (ipfw_check_nat64prefix(&cfg->prefix6,
- cfg->plen6) != 0)
+ cfg->plen6) != 0 &&
+ !IN6_IS_ADDR_UNSPECIFIED(&cfg->prefix6))
errx(EX_USAGE, "Bad prefix6 %s", *av);
ac--; av++;
break;
-#if 0
- case TOK_AGG_LEN:
- NEED1("Aggregation prefix len required");
- cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt);
- ac--; av++;
- break;
- case TOK_AGG_COUNT:
- NEED1("Max per-prefix count required");
- cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt);
- ac--; av++;
- break;
- case TOK_PORT_RANGE:
- NEED1("port range x[:y] required");
- if ((p = strchr(*av, ':')) == NULL)
- cfg->min_port = (uint16_t)nat64lsn_parse_int(
- *av, opt);
- else {
- *p++ = '\0';
- cfg->min_port = (uint16_t)nat64lsn_parse_int(
- *av, opt);
- cfg->max_port = (uint16_t)nat64lsn_parse_int(
- p, opt);
- }
- ac--; av++;
- break;
case TOK_JMAXLEN:
NEED1("job queue length required");
cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
ac--; av++;
break;
-#endif
case TOK_MAX_PORTS:
NEED1("Max per-user ports required");
cfg->max_ports = nat64lsn_parse_int(*av, opt);
@@ -519,6 +496,12 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
*av, opt);
ac--; av++;
break;
+ case TOK_STATES_CHUNKS:
+ NEED1("number of chunks required");
+ cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
case TOK_LOG:
cfg->flags |= NAT64_LOG;
break;
@@ -630,6 +613,12 @@ nat64lsn_config(const char *name, uint8_t set, int ac, char **av)
*av, opt);
ac--; av++;
break;
+ case TOK_STATES_CHUNKS:
+ NEED1("number of chunks required");
+ cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
case TOK_LOG:
cfg->flags |= NAT64_LOG;
break;
@@ -789,31 +778,24 @@ nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
printf("nat64lsn %s prefix4 %s/%u", cfg->name, abuf, cfg->plen4);
inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf));
printf(" prefix6 %s/%u", abuf, cfg->plen6);
-#if 0
- printf("agg_len %u agg_count %u ", cfg->agg_prefix_len,
- cfg->agg_prefix_max);
- if (cfg->min_port != NAT64LSN_PORT_MIN ||
- cfg->max_port != NAT64LSN_PORT_MAX)
- printf(" port_range %u:%u", cfg->min_port, cfg->max_port);
- if (cfg->jmaxlen != NAT64LSN_JMAXLEN)
- printf(" jmaxlen %u ", cfg->jmaxlen);
-#endif
- if (cfg->max_ports != NAT64LSN_MAX_PORTS)
- printf(" max_ports %u", cfg->max_ports);
- if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
+ if (co.verbose || cfg->states_chunks > 1)
+ printf(" states_chunks %u", cfg->states_chunks);
+ if (co.verbose || cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
printf(" host_del_age %u", cfg->nh_delete_delay);
- if (cfg->pg_delete_delay != NAT64LSN_PG_AGE)
+ if (co.verbose || cfg->pg_delete_delay != NAT64LSN_PG_AGE)
printf(" pg_del_age %u ", cfg->pg_delete_delay);
- if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
+ if (co.verbose || cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
printf(" tcp_syn_age %u", cfg->st_syn_ttl);
- if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
+ if (co.verbose || cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
printf(" tcp_close_age %u", cfg->st_close_ttl);
- if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
+ if (co.verbose || cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
printf(" tcp_est_age %u", cfg->st_estab_ttl);
- if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
+ if (co.verbose || cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
printf(" udp_age %u", cfg->st_udp_ttl);
- if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
+ if (co.verbose || cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
printf(" icmp_age %u", cfg->st_icmp_ttl);
+ if (co.verbose || cfg->jmaxlen != NAT64LSN_JMAXLEN)
+ printf(" jmaxlen %u ", cfg->jmaxlen);
if (cfg->flags & NAT64_LOG)
printf(" log");
if (cfg->flags & NAT64_ALLOW_PRIVATE)
diff --git a/sys/conf/files b/sys/conf/files
index 45968c43852c..ed982409534f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4398,9 +4398,9 @@ netpfil/ipfw/nat64/nat64clat.c optional inet inet6 ipfirewall \
netpfil/ipfw/nat64/nat64clat_control.c optional inet inet6 ipfirewall \
ipfirewall_nat64
netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \
- ipfirewall_nat64
+ ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \
- ipfirewall_nat64
+ ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \
ipfirewall_nat64
netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \
diff --git a/sys/modules/ipfw_nat64/Makefile b/sys/modules/ipfw_nat64/Makefile
index ee2ad7da15af..037215a71481 100644
--- a/sys/modules/ipfw_nat64/Makefile
+++ b/sys/modules/ipfw_nat64/Makefile
@@ -8,4 +8,6 @@ SRCS+= nat64clat.c nat64clat_control.c
SRCS+= nat64lsn.c nat64lsn_control.c
SRCS+= nat64stl.c nat64stl_control.c
+CFLAGS+= -I${SRCTOP}/sys/contrib/ck/include
+
.include <bsd.kmod.mk>
diff --git a/sys/netinet6/ip_fw_nat64.h b/sys/netinet6/ip_fw_nat64.h
index 47c0a70d167f..40e3441132e1 100644
--- a/sys/netinet6/ip_fw_nat64.h
+++ b/sys/netinet6/ip_fw_nat64.h
@@ -122,7 +122,7 @@ typedef struct _ipfw_nat64clat_cfg {
/*
* NAT64LSN default configuration values
*/
-#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */
+#define NAT64LSN_MAX_PORTS 2048 /* Unused */
#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
@@ -135,16 +135,20 @@ typedef struct _ipfw_nat64clat_cfg {
typedef struct _ipfw_nat64lsn_cfg {
char name[64]; /* NAT name */
uint32_t flags;
- uint32_t max_ports; /* Max ports per client */
- uint32_t agg_prefix_len; /* Prefix length to count */
- uint32_t agg_prefix_max; /* Max hosts per agg prefix */
+
+ uint32_t max_ports; /* Unused */
+ uint32_t agg_prefix_len; /* Unused */
+ uint32_t agg_prefix_max; /* Unused */
+
struct in_addr prefix4;
uint16_t plen4; /* Prefix length */
uint16_t plen6; /* Prefix length */
struct in6_addr prefix6; /* NAT64 prefix */
uint32_t jmaxlen; /* Max jobqueue length */
- uint16_t min_port; /* Min port group # to use */
- uint16_t max_port; /* Max port group # to use */
+
+ uint16_t min_port; /* Unused */
+ uint16_t max_port; /* Unused */
+
uint16_t nh_delete_delay;/* Stale host delete delay */
uint16_t pg_delete_delay;/* Stale portgroup delete delay */
uint16_t st_syn_ttl; /* TCP syn expire */
@@ -153,7 +157,7 @@ typedef struct _ipfw_nat64lsn_cfg {
uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */
uint8_t set; /* Named instance set [0..31] */
- uint8_t spare;
+ uint8_t states_chunks; /* Number of states chunks per PG */
} ipfw_nat64lsn_cfg;
typedef struct _ipfw_nat64lsn_state {
@@ -177,5 +181,30 @@ typedef struct _ipfw_nat64lsn_stg {
uint32_t spare2;
} ipfw_nat64lsn_stg;
-#endif /* _NETINET6_IP_FW_NAT64_H_ */
+typedef struct _ipfw_nat64lsn_state_v1 {
+ struct in6_addr host6; /* Bound IPv6 host */
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint16_t spare;
+ uint16_t idle; /* Last used time */
+ uint8_t flags; /* State flags */
+ uint8_t proto; /* protocol */
+} ipfw_nat64lsn_state_v1;
+typedef struct _ipfw_nat64lsn_stg_v1 {
+ union nat64lsn_pgidx {
+ uint64_t index;
+ struct {
+ uint8_t chunk; /* states chunk */
+ uint8_t proto; /* protocol */
+ uint16_t port; /* base port */
+ in_addr_t addr; /* alias address */
+ };
+ } next; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint32_t count; /* Number of states */
+} ipfw_nat64lsn_stg_v1;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.c b/sys/netpfil/ipfw/nat64/nat64lsn.c
index 1ddeaafc7dce..af88fd1622c5 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn.c
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -33,16 +33,17 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
#include <sys/errno.h>
+#include <sys/hash.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rmlock.h>
-#include <sys/rwlock.h>
#include <sys/socket.h>
-#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
@@ -71,17 +72,22 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
-static void nat64lsn_periodic(void *data);
-#define PERIODIC_DELAY 4
-static uint8_t nat64lsn_proto_map[256];
-uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+static epoch_t nat64lsn_epoch;
+#define NAT64LSN_EPOCH_ENTER(et) epoch_enter_preempt(nat64lsn_epoch, &(et))
+#define NAT64LSN_EPOCH_EXIT(et) epoch_exit_preempt(nat64lsn_epoch, &(et))
+#define NAT64LSN_EPOCH_WAIT() epoch_wait_preempt(nat64lsn_epoch)
+#define NAT64LSN_EPOCH_ASSERT() MPASS(in_epoch(nat64lsn_epoch))
+#define NAT64LSN_EPOCH_CALL(c, f) epoch_call(nat64lsn_epoch, (c), (f))
-#define NAT64_FLAG_FIN 0x01 /* FIN was seen */
-#define NAT64_FLAG_SYN 0x02 /* First syn in->out */
-#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */
-#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_pgchunk_zone;
+static uma_zone_t nat64lsn_pg_zone;
+static uma_zone_t nat64lsn_aliaslink_zone;
+static uma_zone_t nat64lsn_state_zone;
+static uma_zone_t nat64lsn_job_zone;
-#define NAT64_FLAG_RDR 0x80 /* Port redirect */
+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
#define NAT64_LOOKUP(chain, cmd) \
(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
/*
@@ -91,25 +97,33 @@ uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
enum nat64lsn_jtype {
JTYPE_NEWHOST = 1,
JTYPE_NEWPORTGROUP,
- JTYPE_DELPORTGROUP,
+ JTYPE_DESTROY,
};
struct nat64lsn_job_item {
- TAILQ_ENTRY(nat64lsn_job_item) next;
+ STAILQ_ENTRY(nat64lsn_job_item) entries;
enum nat64lsn_jtype jtype;
- struct nat64lsn_host *nh;
- struct nat64lsn_portgroup *pg;
- void *spare_idx;
- struct in6_addr haddr;
- uint8_t nat_proto;
- uint8_t done;
- int needs_idx;
- int delcount;
- unsigned int fhash; /* Flow hash */
- uint32_t aaddr; /* Last used address (net) */
- struct mbuf *m;
- struct ipfw_flow_id f_id;
- uint64_t delmask[NAT64LSN_PGPTRNMASK];
+
+ union {
+ struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
+ struct mbuf *m;
+ struct nat64lsn_host *host;
+ struct nat64lsn_state *state;
+ uint32_t src6_hval;
+ uint32_t state_hval;
+ struct ipfw_flow_id f_id;
+ in_addr_t faddr;
+ uint16_t port;
+ uint8_t proto;
+ uint8_t done;
+ };
+ struct { /* used by JTYPE_DESTROY */
+ struct nat64lsn_hosts_slist hosts;
+ struct nat64lsn_pg_slist portgroups;
+ struct nat64lsn_pgchunk *pgchunk;
+ struct epoch_context epoch_ctx;
+ };
+ };
};
static struct mtx jmtx;
@@ -118,143 +132,311 @@ static struct mtx jmtx;
#define JQUEUE_LOCK() mtx_lock(&jmtx)
#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static struct nat64lsn_job_item *nat64lsn_create_job(
+ struct nat64lsn_cfg *cfg, int jtype);
static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
struct nat64lsn_job_item *ji);
-static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
- struct nat64lsn_job_head *jhead, int jlen);
-
-static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, int jtype);
-static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
- int needs_idx);
-static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static void nat64lsn_job_destroy(epoch_context_t ctx);
+static void nat64lsn_destroy_host(struct nat64lsn_host *host);
+static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
+
static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm);
+ const struct ipfw_flow_id *f_id, struct mbuf **mp);
static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
- struct ipfw_flow_id *f_id, struct mbuf **pm);
-
-static int alloc_portgroup(struct nat64lsn_job_item *ji);
-static void destroy_portgroup(struct nat64lsn_portgroup *pg);
-static void destroy_host6(struct nat64lsn_host *nh);
-static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+ struct ipfw_flow_id *f_id, struct mbuf **mp);
+static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
+ struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
+
+#define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
+#define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
+#define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
+#define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
+#define NAT64_BIT_STALE 7 /* state is going to be expired */
+
+#define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
+#define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
+#define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
-static int attach_portgroup(struct nat64lsn_cfg *cfg,
- struct nat64lsn_job_item *ji);
-static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+#define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
+#define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
-/* XXX tmp */
-static uma_zone_t nat64lsn_host_zone;
-static uma_zone_t nat64lsn_pg_zone;
-static uma_zone_t nat64lsn_pgidx_zone;
+ result = flags & (TH_FIN|TH_SYN);
+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
-static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
- struct nat64lsn_host *nh);
+ return (result);
+}
-#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))
-#define I6_first(_ph, h) (_ph)[h]
-#define I6_next(x) (x)->next
-#define I6_val(x) (&(x)->addr)
-#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
-#define I6_lock(a, b)
-#define I6_unlock(a, b)
+static void
+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ struct nat64lsn_state *state)
+{
-#define I6HASH_FIND(_cfg, _res, _a) \
- CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
-#define I6HASH_INSERT(_cfg, _i) \
- CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
-#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \
- CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+ memset(plog, 0, sizeof(*plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(state->ip_src);
+ plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
+ (state->proto << 8) | (state->ip_dst & 0xff));
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
-#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \
- CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+#define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
+#define HOST_HVAL(c, a) HVAL((a),\
+ sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
+#define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
+
+#define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
+ sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
+#define ALIAS_BYHASH(c, v) \
+ ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
+static struct nat64lsn_aliaslink*
+nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
+ struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
+{
-#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)
+ /*
+ * We can implement some different algorithms how
+ * select an alias address.
+ * XXX: for now we use first available.
+ */
+ return (CK_SLIST_FIRST(&host->aliases));
+}
-static unsigned
-djb_hash(const unsigned char *h, const int len)
+#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
+#define STATE_HASH(h, v) \
+ ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
+#define STATES_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? (p)->states : \
+ ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
+
+#ifdef __LP64__
+#define FREEMASK_FFSLL(pg, faddr) \
+ ffsll(*FREEMASK_CHUNK((pg), (faddr)))
+#define FREEMASK_BTR(pg, faddr, bit) \
+ ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_BTS(pg, faddr, bit) \
+ ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_ISSET(pg, faddr, bit) \
+ ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_COPY(pg, n, out) \
+ (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
+#else
+static inline int
+freemask_ffsll(uint32_t *freemask)
{
- unsigned int result = 0;
int i;
- for (i = 0; i < len; i++)
- result = 33 * result ^ h[i];
-
- return (result);
+ if ((i = ffsl(freemask[0])) != 0)
+ return (i);
+ if ((i = ffsl(freemask[1])) != 0)
+ return (i + 32);
+ return (0);
}
-
-/*
-static size_t
-bitmask_size(size_t num, int *level)
+#define FREEMASK_FFSLL(pg, faddr) \
+ freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
+#define FREEMASK_BTR(pg, faddr, bit) \
+ ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
+#define FREEMASK_BTS(pg, faddr, bit) \
+ ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
+#define FREEMASK_ISSET(pg, faddr, bit) \
+ ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
+#define FREEMASK_COPY(pg, n, out) \
+ (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
+ ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
+#endif /* !__LP64__ */
+
+
+#define NAT64LSN_TRY_PGCNT 32
+static struct nat64lsn_pg*
+nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
+ struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
+ uint32_t *pgidx, in_addr_t faddr)
{
- size_t x;
- int c;
+ struct nat64lsn_pg *pg, *oldpg;
+ uint32_t idx, oldidx;
+ int cnt;
+
+ cnt = 0;
+ /* First try last used PG */
+ oldpg = pg = ck_pr_load_ptr(pgptr);
+ idx = oldidx = ck_pr_load_32(pgidx);
+ /* If pgidx is out of range, reset it to the first pgchunk */
+ if (!ISSET32(*chunkmask, idx / 32))
+ idx = 0;
+ do {
+ ck_pr_fence_load();
+ if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
+ /*
+ * If last used PG has not free states,
+ * try to update pointer.
+ * NOTE: it can be already updated by jobs handler,
+ * thus we use CAS operation.
+ */
+ if (cnt > 0)
+ ck_pr_cas_ptr(pgptr, oldpg, pg);
+ return (pg);
+ }
+ /* Stop if idx is out of range */
+ if (!ISSET32(*chunkmask, idx / 32))
+ break;
+
+ if (ISSET32(pgmask[idx / 32], idx % 32))
+ pg = ck_pr_load_ptr(
+ &chunks[idx / 32]->pgptr[idx % 32]);
+ else
+ pg = NULL;
- for (c = 0, x = num; num > 1; num /= 64, c++)
- ;
+ idx++;
+ } while (++cnt < NAT64LSN_TRY_PGCNT);
- return (x);
+ /* If pgidx is out of range, reset it to the first pgchunk */
+ if (!ISSET32(*chunkmask, idx / 32))
+ idx = 0;
+ ck_pr_cas_32(pgidx, oldidx, idx);
+ return (NULL);
}
-static void
-bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+static struct nat64lsn_state*
+nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
+ const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
+ uint16_t port, uint8_t proto)
{
- size_t x, z;
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_state *state;
+ struct nat64lsn_pg *pg;
+ int i, offset;
+
+ NAT64LSN_EPOCH_ASSERT();
+
+ /* Check that we already have state for given arguments */
+ CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
+ if (state->proto == proto && state->ip_dst == faddr &&
+ state->sport == port && state->dport == f_id->dst_port)
+ return (state);
+ }
- memset(pmask, 0xFF, bufsize);
- for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
- ;
- pmask[x] ~= 0x01;
-}
-*/
+ link = nat64lsn_get_aliaslink(cfg, host, f_id);
+ if (link == NULL)
+ return (NULL);
-static void
-nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
- uint32_t n, uint32_t sn)
-{
+ switch (proto) {
+ case IPPROTO_TCP:
+ pg = nat64lsn_get_pg(
+ &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
+ link->alias->tcp, &link->alias->tcp_pg,
+ &link->alias->tcp_pgidx, faddr);
+ break;
+ case IPPROTO_UDP:
+ pg = nat64lsn_get_pg(
+ &link->alias->udp_chunkmask, link->alias->udp_pgmask,
+ link->alias->udp, &link->alias->udp_pg,
+ &link->alias->udp_pgidx, faddr);
+ break;
+ case IPPROTO_ICMP:
+ pg = nat64lsn_get_pg(
+ &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
+ link->alias->icmp, &link->alias->icmp_pg,
+ &link->alias->icmp_pgidx, faddr);
+ break;
+ default:
+ panic("%s: wrong proto %d", __func__, proto);
+ }
+ if (pg == NULL)
+ return (NULL);
- memset(plog, 0, sizeof(*plog));
- plog->length = PFLOG_REAL_HDRLEN;
- plog->af = family;
- plog->action = PF_NAT;
- plog->dir = PF_IN;
- plog->rulenr = htonl(n);
- plog->subrulenr = htonl(sn);
- plog->ruleset[0] = '\0';
- strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
- ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+ /* Check that PG has some free states */
+ state = NULL;
+ i = FREEMASK_BITCOUNT(pg, faddr);
+ while (i-- > 0) {
+ offset = FREEMASK_FFSLL(pg, faddr);
+ if (offset == 0) {
+ /*
+ * We lost the race.
+ * No more free states in this PG.
+ */
+ break;
+ }
+
+ /* Lets try to atomically grab the state */
+ if (FREEMASK_BTR(pg, faddr, offset - 1)) {
+ state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
+ /* Initialize */
+ state->flags = proto != IPPROTO_TCP ? 0 :
+ convert_tcp_flags(f_id->_flags);
+ state->proto = proto;
+ state->aport = pg->base_port + offset - 1;
+ state->dport = f_id->dst_port;
+ state->sport = port;
+ state->ip6_dst = f_id->dst_ip6;
+ state->ip_dst = faddr;
+ state->ip_src = link->alias->addr;
+ state->hval = hval;
+ state->host = host;
+ SET_AGE(state->timestamp);
+
+ /* Insert new state into host's hash table */
+ HOST_LOCK(host);
+ CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
+ state, entries);
+ host->states_count++;
+ /*
+ * XXX: In case if host is going to be expired,
+ * reset NAT64LSN_DEADHOST flag.
+ */
+ host->flags &= ~NAT64LSN_DEADHOST;
+ HOST_UNLOCK(host);
+ NAT64STAT_INC(&cfg->base.stats, screated);
+ /* Mark the state as ready for translate4 */
+ ck_pr_fence_store();
+ ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
+ break;
+ }
+ }
+ return (state);
}
+
/*
* Inspects icmp packets to see if the message contains different
* packet header so we need to alter @addr and @port.
*/
static int
-inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
uint16_t *port)
{
+ struct icmp *icmp;
struct ip *ip;
- struct tcphdr *tcp;
- struct udphdr *udp;
- struct icmphdr *icmp;
int off;
- uint8_t proto;
+ uint8_t inner_proto;
- ip = mtod(*m, struct ip *); /* Outer IP header */
+ ip = mtod(*mp, struct ip *); /* Outer IP header */
off = (ip->ip_hl << 2) + ICMP_MINLEN;
- if ((*m)->m_len < off)
- *m = m_pullup(*m, off);
- if (*m == NULL)
+ if ((*mp)->m_len < off)
+ *mp = m_pullup(*mp, off);
+ if (*mp == NULL)
return (ENOMEM);
- ip = mtod(*m, struct ip *); /* Outer IP header */
- icmp = L3HDR(ip, struct icmphdr *);
+ ip = mtod(*mp, struct ip *); /* Outer IP header */
+ icmp = L3HDR(ip, struct icmp *);
switch (icmp->icmp_type) {
case ICMP_ECHO:
case ICMP_ECHOREPLY:
/* Use icmp ID as distinguisher */
- *port = ntohs(*((uint16_t *)(icmp + 1)));
+ *port = ntohs(icmp->icmp_id);
return (0);
case ICMP_UNREACH:
case ICMP_TIMXCEED:
@@ -266,90 +448,133 @@ inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
* ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
* of ULP header.
*/
- if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+ if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
return (EINVAL);
- if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
- *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
- if (*m == NULL)
+ if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+ *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
+ if (*mp == NULL)
return (ENOMEM);
- ip = mtodo(*m, off); /* Inner IP header */
- proto = ip->ip_p;
+ ip = mtodo(*mp, off); /* Inner IP header */
+ inner_proto = ip->ip_p;
off += ip->ip_hl << 2; /* Skip inner IP header */
*addr = ntohl(ip->ip_src.s_addr);
- if ((*m)->m_len < off + ICMP_MINLEN)
- *m = m_pullup(*m, off + ICMP_MINLEN);
- if (*m == NULL)
+ if ((*mp)->m_len < off + ICMP_MINLEN)
+ *mp = m_pullup(*mp, off + ICMP_MINLEN);
+ if (*mp == NULL)
return (ENOMEM);
- switch (proto) {
+ switch (inner_proto) {
case IPPROTO_TCP:
- tcp = mtodo(*m, off);
- *nat_proto = NAT_PROTO_TCP;
- *port = ntohs(tcp->th_sport);
- return (0);
case IPPROTO_UDP:
- udp = mtodo(*m, off);
- *nat_proto = NAT_PROTO_UDP;
- *port = ntohs(udp->uh_sport);
+ /* Copy source port from the header */
+ *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
+ *proto = inner_proto;
return (0);
case IPPROTO_ICMP:
/*
* We will translate only ICMP errors for our ICMP
* echo requests.
*/
- icmp = mtodo(*m, off);
+ icmp = mtodo(*mp, off);
if (icmp->icmp_type != ICMP_ECHO)
return (EOPNOTSUPP);
- *port = ntohs(*((uint16_t *)(icmp + 1)));
+ *port = ntohs(icmp->icmp_id);
return (0);
};
return (EOPNOTSUPP);
}
-static inline uint8_t
-convert_tcp_flags(uint8_t flags)
+static struct nat64lsn_state*
+nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{
- uint8_t result;
+ struct nat64lsn_state *state;
+ struct nat64lsn_pg *pg;
+ int chunk_idx, pg_idx, state_idx;
- result = flags & (TH_FIN|TH_SYN);
- result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
- result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+ NAT64LSN_EPOCH_ASSERT();
- return (result);
+ if (port < NAT64_MIN_PORT)
+ return (NULL);
+ /*
+ * Alias keeps 32 pgchunks for each protocol.
+ * Each pgchunk has 32 pointers to portgroup.
+ * Each portgroup has 64 states for ports.
+ */
+ port -= NAT64_MIN_PORT;
+ chunk_idx = port / 2048;
+
+ port -= chunk_idx * 2048;
+ pg_idx = port / 64;
+ state_idx = port % 64;
+
+ /*
+ * First check in proto_chunkmask that we have allocated PG chunk.
+ * Then check in proto_pgmask that we have valid PG pointer.
+ */
+ pg = NULL;
+ switch (proto) {
+ case IPPROTO_TCP:
+ if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
+ ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ case IPPROTO_UDP:
+ if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
+ ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->udp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ case IPPROTO_ICMP:
+ if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
+ ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ default:
+ panic("%s: wrong proto %d", __func__, proto);
+ }
+ if (pg == NULL)
+ return (NULL);
+
+ if (FREEMASK_ISSET(pg, faddr, state_idx))
+ return (NULL);
+
+ state = &STATES_CHUNK(pg, faddr)->state[state_idx];
+ ck_pr_fence_load();
+ if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
+ return (state);
+ return (NULL);
}
-static NAT64NOINLINE int
-nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
- struct mbuf **pm)
+static int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **mp)
{
struct pfloghdr loghdr, *logdata;
struct in6_addr src6;
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_host *nh;
- struct nat64lsn_state *st;
- struct ip *ip;
- uint32_t addr;
- uint16_t state_flags, state_ts;
- uint16_t port, lport;
- uint8_t nat_proto;
+ struct nat64lsn_state *state;
+ struct nat64lsn_alias *alias;
+ uint32_t addr, flags;
+ uint16_t port, ts;
int ret;
+ uint8_t proto;
addr = f_id->dst_ip;
port = f_id->dst_port;
+ proto = f_id->proto;
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
- /* Check if protocol is supported and get its short id */
- nat_proto = nat64lsn_proto_map[f_id->proto];
- if (nat_proto == 0) {
- NAT64STAT_INC(&cfg->base.stats, noproto);
- return (cfg->nomatch_verdict);
- }
-
- /* We might need to handle icmp differently */
- if (nat_proto == NAT_PROTO_ICMP) {
- ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+ /* Check if protocol is supported */
+ switch (proto) {
+ case IPPROTO_ICMP:
+ ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
if (ret != 0) {
if (ret == ENOMEM) {
NAT64STAT_INC(&cfg->base.stats, nomem);
@@ -358,804 +583,640 @@ nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
NAT64STAT_INC(&cfg->base.stats, noproto);
return (cfg->nomatch_verdict);
}
- /* XXX: Check addr for validity */
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
+ /* FALLTHROUGH */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ NAT64STAT_INC(&cfg->base.stats, noproto);
+ return (cfg->nomatch_verdict);
}
- /* Calc portgroup offset w.r.t protocol */
- pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+ alias = &ALIAS_BYHASH(cfg, addr);
+ MPASS(addr == alias->addr);
- /* Check if this port is occupied by any portgroup */
- if (pg == NULL) {
+ /* Check that we have state for this port */
+ state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
+ port, proto);
+ if (state == NULL) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
-#if 0
- DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
- _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
-#endif
return (cfg->nomatch_verdict);
}
/* TODO: Check flags to see if we need to do some static mapping */
- nh = pg->host;
-
- /* Prepare some fields we might need to update */
- SET_AGE(state_ts);
- ip = mtod(*pm, struct ip *);
- if (ip->ip_p == IPPROTO_TCP)
- state_flags = convert_tcp_flags(
- L3HDR(ip, struct tcphdr *)->th_flags);
- else
- state_flags = 0;
-
- /* Lock host and get port mapping */
- NAT64_LOCK(nh);
- st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
- if (st->timestamp != state_ts)
- st->timestamp = state_ts;
- if ((st->flags & state_flags) != state_flags)
- st->flags |= state_flags;
- lport = htons(st->u.s.lport);
+ /* Update some state fields if need */
+ SET_AGE(ts);
+ if (f_id->proto == IPPROTO_TCP)
+ flags = convert_tcp_flags(f_id->_flags);
+ else
+ flags = 0;
+ if (state->timestamp != ts)
+ state->timestamp = ts;
+ if ((state->flags & flags) != flags)
+ state->flags |= flags;
- NAT64_UNLOCK(nh);
+ port = htons(state->sport);
+ src6 = state->ip6_dst;
if (cfg->base.flags & NAT64_LOG) {
logdata = &loghdr;
- nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
+ nat64lsn_log(logdata, *mp, AF_INET, state);
} else
logdata = NULL;
+ /*
+ * We already have src6 with embedded address, but it is possible,
+ * that src_ip is different than state->ip_dst, this is why we
+ * do embedding again.
+ */
nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
- ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+ ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
&cfg->base, logdata);
-
if (ret == NAT64SKIP)
return (cfg->nomatch_verdict);
- if (ret == NAT64MFREE)
- m_freem(*pm);
- *pm = NULL;
-
+ if (ret == NAT64RETURN)
+ *mp = NULL;
return (IP_FW_DENY);
}
-void
-nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
- const char *px, int off)
-{
- char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
-
- if ((V_nat64_debug & DP_STATE) == 0)
- return;
- inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
- inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
- inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
-
- DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
- "%s:%d AGE %d", px, pg->idx, st, off,
- s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
- d, st->u.s.fport, GET_AGE(st->timestamp));
-}
-
/*
- * Check if particular TCP state is stale and should be deleted.
+ * Check if particular state is stale and should be deleted.
* Return 1 if true, 0 otherwise.
*/
static int
-nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_state *st, int age)
+nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
{
- int ttl;
-
- if (st->flags & NAT64_FLAG_FIN)
- ttl = cfg->st_close_ttl;
- else if (st->flags & NAT64_FLAG_ESTAB)
- ttl = cfg->st_estab_ttl;
- else if (st->flags & NAT64_FLAG_SYN)
- ttl = cfg->st_syn_ttl;
- else
- ttl = cfg->st_syn_ttl;
+ int age, ttl;
- if (age > ttl)
+ /* State was marked as stale in previous pass. */
+ if (ISSET32(state->flags, NAT64_BIT_STALE))
return (1);
- return (0);
-}
-
-/*
- * Check if nat state @st is stale and should be deleted.
- * Return 1 if true, 0 otherwise.
- */
-static NAT64NOINLINE int
-nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
-{
- int age, delete;
-
- age = GET_AGE(st->timestamp);
- delete = 0;
- /* Skip immutable records */
- if (st->flags & NAT64_FLAG_RDR)
+ /* State is not yet initialized, it is going to be READY */
+ if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
return (0);
- switch (pg->nat_proto) {
- case NAT_PROTO_TCP:
- delete = nat64lsn_periodic_check_tcp(cfg, st, age);
- break;
- case NAT_PROTO_UDP:
- if (age > cfg->st_udp_ttl)
- delete = 1;
- break;
- case NAT_PROTO_ICMP:
- if (age > cfg->st_icmp_ttl)
- delete = 1;
- break;
+ age = GET_AGE(state->timestamp);
+ switch (state->proto) {
+ case IPPROTO_TCP:
+ if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
+ ttl = cfg->st_close_ttl;
+ else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
+ ttl = cfg->st_estab_ttl;
+ else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
+ ttl = cfg->st_syn_ttl;
+ else
+ ttl = cfg->st_syn_ttl;
+ if (age > ttl)
+ return (1);
+ break;
+ case IPPROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ return (1);
+ break;
+ case IPPROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ return (1);
+ break;
}
-
- return (delete);
+ return (0);
}
-
-/*
- * The following structures and functions
- * are used to perform SLIST_FOREACH_SAFE()
- * analog for states identified by struct st_ptr.
- */
-
-struct st_idx {
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
- struct st_ptr sidx_next;
-};
-
-static struct st_idx *
-st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
- struct st_ptr *sidx, struct st_idx *si)
+static int
+nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
-
- if (sidx->idx == 0) {
- memset(si, 0, sizeof(*si));
- return (si);
+ struct nat64lsn_state *state;
+ struct nat64lsn_host *host;
+ uint64_t freemask;
+ int c, i, update_age;
+
+ update_age = 0;
+ for (c = 0; c < pg->chunks_count; c++) {
+ FREEMASK_COPY(pg, c, freemask);
+ for (i = 0; i < 64; i++) {
+ if (ISSET64(freemask, i))
+ continue;
+ state = &STATES_CHUNK(pg, c)->state[i];
+ if (nat64lsn_check_state(cfg, state) == 0) {
+ update_age = 1;
+ continue;
+ }
+ /*
+ * Expire state:
+ * 1. Mark as STALE and unlink from host's hash.
+ * 2. Set bit in freemask.
+ */
+ if (ISSET32(state->flags, NAT64_BIT_STALE)) {
+ /*
+ * State was marked as STALE in previous
+ * pass. Now it is safe to release it.
+ */
+ state->flags = 0;
+ ck_pr_fence_store();
+ FREEMASK_BTS(pg, c, i);
+ NAT64STAT_INC(&cfg->base.stats, sdeleted);
+ continue;
+ }
+ MPASS(state->flags & NAT64_FLAG_READY);
+
+ host = state->host;
+ HOST_LOCK(host);
+ CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
+ state, nat64lsn_state, entries);
+ host->states_count--;
+ HOST_UNLOCK(host);
+
+ /* Reset READY flag */
+ ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
+ /* And set STALE flag */
+ ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
+ ck_pr_fence_store();
+ /*
+ * Now translate6 will not use this state, wait
+ * until it become safe for translate4, then mark
+ * state as free.
+ */
+ }
}
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
- st = &pg->states[sidx->off];
+ /*
+ * We have some alive states, update timestamp.
+ */
+ if (update_age)
+ SET_AGE(pg->timestamp);
- si->pg = pg;
- si->st = st;
- si->sidx_next = st->next;
+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+ return (0);
- return (si);
+ return (1);
}
-static struct st_idx *
-st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
- struct st_idx *si)
+static void
+nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_pg_slist *portgroups)
{
- struct st_ptr sidx;
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
-
- sidx = si->sidx_next;
- if (sidx.idx == 0) {
- memset(si, 0, sizeof(*si));
- si->st = NULL;
- si->pg = NULL;
- return (si);
+ struct nat64lsn_alias *alias;
+ struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
+ uint32_t *pgmask, *pgidx;
+ int i, idx;
+
+ for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
+ alias = &cfg->aliases[i];
+ CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
+ if (nat64lsn_maintain_pg(cfg, pg) == 0)
+ continue;
+ /* Always keep first PG */
+ if (pg->base_port == NAT64_MIN_PORT)
+ continue;
+ /*
+ * PG is expired, unlink it and schedule for
+ * deferred destroying.
+ */
+ idx = (pg->base_port - NAT64_MIN_PORT) / 64;
+ switch (pg->proto) {
+ case IPPROTO_TCP:
+ pgmask = alias->tcp_pgmask;
+ pgptr = &alias->tcp_pg;
+ pgidx = &alias->tcp_pgidx;
+ firstpg = alias->tcp[0]->pgptr[0];
+ break;
+ case IPPROTO_UDP:
+ pgmask = alias->udp_pgmask;
+ pgptr = &alias->udp_pg;
+ pgidx = &alias->udp_pgidx;
+ firstpg = alias->udp[0]->pgptr[0];
+ break;
+ case IPPROTO_ICMP:
+ pgmask = alias->icmp_pgmask;
+ pgptr = &alias->icmp_pg;
+ pgidx = &alias->icmp_pgidx;
+ firstpg = alias->icmp[0]->pgptr[0];
+ break;
+ }
+ /* Reset the corresponding bit in pgmask array. */
+ ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
+ ck_pr_fence_store();
+ /* If last used PG points to this PG, reset it. */
+ ck_pr_cas_ptr(pgptr, pg, firstpg);
+ ck_pr_cas_32(pgidx, idx, 0);
+ /* Unlink PG from alias's chain */
+ ALIAS_LOCK(alias);
+ CK_SLIST_REMOVE(&alias->portgroups, pg,
+ nat64lsn_pg, entries);
+ alias->portgroups_count--;
+ ALIAS_UNLOCK(alias);
+ /* And link to job's chain for deferred destroying */
+ NAT64STAT_INC(&cfg->base.stats, spgdeleted);
+ CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
+ }
}
-
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- st = &pg->states[sidx.off];
-
- si->pg = pg;
- si->st = st;
- si->sidx_next = st->next;
-
- return (si);
-}
-
-static struct st_idx *
-st_save_cond(struct st_idx *si_dst, struct st_idx *si)
-{
- if (si->st != NULL)
- *si_dst = *si;
-
- return (si_dst);
}
-unsigned int
-nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+static void
+nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_hosts_slist *hosts)
{
- struct st_idx si, si_prev;
+ struct nat64lsn_host *host, *tmp;
int i;
- unsigned int delcount;
-
- delcount = 0;
- for (i = 0; i < nh->hsize; i++) {
- memset(&si_prev, 0, sizeof(si_prev));
- for (st_first(cfg, nh, &nh->phash[i], &si);
- si.st != NULL;
- st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
- if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
+ entries, tmp) {
+ /* Is host was marked in previous call? */
+ if (host->flags & NAT64LSN_DEADHOST) {
+ if (host->states_count > 0) {
+ host->flags &= ~NAT64LSN_DEADHOST;
+ continue;
+ }
+ /*
+ * Unlink host from hash table and schedule
+ * it for deferred destroying.
+ */
+ CFG_LOCK(cfg);
+ CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
+ nat64lsn_host, entries);
+ cfg->hosts_count--;
+ CFG_UNLOCK(cfg);
+ CK_SLIST_INSERT_HEAD(hosts, host, entries);
+ continue;
+ }
+ if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
continue;
- nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
- si.st->cur.off);
- /* Unlink from hash */
- if (si_prev.st != NULL)
- si_prev.st->next = si.st->next;
- else
- nh->phash[i] = si.st->next;
- /* Delete state and free its data */
- PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
- memset(si.st, 0, sizeof(struct nat64lsn_state));
- si.st = NULL;
- delcount++;
-
- /* Update portgroup timestamp */
- SET_AGE(si.pg->timestamp);
+ if (host->states_count > 0)
+ continue;
+ /* Mark host as going to be expired in next pass */
+ host->flags |= NAT64LSN_DEADHOST;
+ ck_pr_fence_store();
}
}
- NAT64STAT_ADD(&cfg->base.stats, sdeleted, delcount);
- return (delcount);
-}
-
-/*
- * Checks if portgroup is not used and can be deleted,
- * Returns 1 if stale, 0 otherwise
- */
-static int
-stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
-{
-
- if (!PG_IS_EMPTY(pg))
- return (0);
- if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
- return (0);
- return (1);
}
-/*
- * Checks if host record is not used and can be deleted,
- * Returns 1 if stale, 0 otherwise
- */
-static int
-stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+static struct nat64lsn_pgchunk*
+nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
{
-
- if (nh->pg_used != 0)
- return (0);
- if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
- return (0);
- return (1);
-}
-
-struct nat64lsn_periodic_data {
- struct nat64lsn_cfg *cfg;
- struct nat64lsn_job_head jhead;
- int jlen;
-};
-
-static NAT64NOINLINE int
-nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
- struct nat64lsn_periodic_data *d)
-{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_job_item *ji;
- uint64_t delmask[NAT64LSN_PGPTRNMASK];
- int delcount, i;
-
- delcount = 0;
- memset(delmask, 0, sizeof(delmask));
-
- if (V_nat64_debug & DP_JQUEUE) {
- char a[INET6_ADDRSTRLEN];
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
- stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
- }
- if (!stale_nh(d->cfg, nh)) {
- /* Non-stale host. Inspect internals */
- NAT64_LOCK(nh);
-
- /* Stage 1: Check&expire states */
- if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
- SET_AGE(nh->timestamp);
-
- /* Stage 2: Check if we need to expire */
- for (i = 0; i < nh->pg_used; i++) {
- pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
- if (pg == NULL)
+#if 0
+ struct nat64lsn_alias *alias;
+ struct nat64lsn_pgchunk *chunk;
+ uint32_t pgmask;
+ int i, c;
+
+ for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
+ alias = &cfg->aliases[i];
+ if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
+ continue;
+ /* Always keep single chunk allocated */
+ for (c = 1; c < 32; c++) {
+ if ((alias->tcp_chunkmask & (1 << c)) == 0)
+ break;
+ chunk = ck_pr_load_ptr(&alias->tcp[c]);
+ if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue;
-
- /* Check if we can delete portgroup */
- if (stale_pg(d->cfg, pg) == 0)
+ ck_pr_btr_32(&alias->tcp_chunkmask, c);
+ ck_pr_fence_load();
+ if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue;
-
- DPRINTF(DP_JQUEUE, "Check PG %d", i);
- delmask[i / 64] |= ((uint64_t)1 << (i % 64));
- delcount++;
}
-
- NAT64_UNLOCK(nh);
- if (delcount == 0)
- return (0);
}
+#endif
+ return (NULL);
+}
- DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
- /* We have something to delete - add it to queue */
- ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
- if (ji == NULL)
- return (0);
-
- ji->haddr = nh->addr;
- ji->delcount = delcount;
- memcpy(ji->delmask, delmask, sizeof(ji->delmask));
-
- TAILQ_INSERT_TAIL(&d->jhead, ji, next);
- d->jlen++;
- return (0);
+#if 0
+static void
+nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_host *h;
+ struct nat64lsn_states_slist *hash;
+ int i, j, hsize;
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
+ if (h->states_count / 2 < h->states_hashsize ||
+ h->states_hashsize >= NAT64LSN_MAX_HSIZE)
+ continue;
+ hsize = h->states_hashsize * 2;
+ hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
+ if (hash == NULL)
+ continue;
+ for (j = 0; j < hsize; j++)
+ CK_SLIST_INIT(&hash[i]);
+
+ ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
+ }
+ }
}
+#endif
/*
* This procedure is used to perform various maintance
- * on dynamic hash list. Currently it is called every second.
+ * on dynamic hash list. Currently it is called every 4 seconds.
*/
static void
nat64lsn_periodic(void *data)
{
- struct ip_fw_chain *ch;
- IPFW_RLOCK_TRACKER;
+ struct nat64lsn_job_item *ji;
struct nat64lsn_cfg *cfg;
- struct nat64lsn_periodic_data d;
- struct nat64lsn_host *nh, *tmp;
cfg = (struct nat64lsn_cfg *) data;
- ch = cfg->ch;
CURVNET_SET(cfg->vp);
-
- memset(&d, 0, sizeof(d));
- d.cfg = cfg;
- TAILQ_INIT(&d.jhead);
-
- IPFW_RLOCK(ch);
-
- /* Stage 1: foreach host, check all its portgroups */
- I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
-
- /* Enqueue everything we have requested */
- nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
-
+ if (cfg->hosts_count > 0) {
+ ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
+ if (ji != NULL) {
+ ji->jtype = JTYPE_DESTROY;
+ CK_SLIST_INIT(&ji->hosts);
+ CK_SLIST_INIT(&ji->portgroups);
+ nat64lsn_expire_hosts(cfg, &ji->hosts);
+ nat64lsn_expire_portgroups(cfg, &ji->portgroups);
+ ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
+ NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
+ nat64lsn_job_destroy);
+ } else
+ NAT64STAT_INC(&cfg->base.stats, jnomem);
+ }
callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
-
- IPFW_RUNLOCK(ch);
-
CURVNET_RESTORE();
}
-static NAT64NOINLINE void
-reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
-
- if (ji->m == NULL)
- return;
-
- /* Request has failed or packet type is wrong */
- if (ji->f_id.addr_type != 6 || ji->done == 0) {
- m_freem(ji->m);
- ji->m = NULL;
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
- ji->jtype, ji->done);
- return;
- }
-
- /*
- * XXX: Limit recursion level
- */
-
- NAT64STAT_INC(&cfg->base.stats, jreinjected);
- DPRINTF(DP_JQUEUE, "Reinject mbuf");
- nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
-}
-
-static void
-destroy_portgroup(struct nat64lsn_portgroup *pg)
-{
-
- DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
- uma_zfree(nat64lsn_pg_zone, pg);
-}
-
-static NAT64NOINLINE int
-alloc_portgroup(struct nat64lsn_job_item *ji)
-{
- struct nat64lsn_portgroup *pg;
-
- pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
- if (pg == NULL)
- return (1);
-
- if (ji->needs_idx != 0) {
- ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
- /* Failed alloc isn't always fatal, so don't check */
- }
- memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
- pg->nat_proto = ji->nat_proto;
- ji->pg = pg;
- return (0);
-
-}
-
-static void
-destroy_host6(struct nat64lsn_host *nh)
+#define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
+#define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
+#define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
+static int
+nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_host *host;
+ struct nat64lsn_state *state;
+ uint32_t hval, data[2];
int i;
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
- nh->pg_used);
- NAT64_LOCK_DESTROY(nh);
- for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
- uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
- uma_zfree(nat64lsn_host_zone, nh);
-}
-
-static NAT64NOINLINE int
-alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
- struct nat64lsn_host *nh;
- char a[INET6_ADDRSTRLEN];
-
- nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
- if (nh == NULL)
- return (1);
- PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
- if (PORTGROUP_CHUNK(nh, 0) == NULL) {
- uma_zfree(nat64lsn_host_zone, nh);
- return (2);
- }
- if (alloc_portgroup(ji) != 0) {
- NAT64STAT_INC(&cfg->base.stats, jportfails);
- uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
- uma_zfree(nat64lsn_host_zone, nh);
- return (3);
+ /* Check that host was not yet added. */
+ NAT64LSN_EPOCH_ASSERT();
+ CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
+ if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
+ /* The host was allocated in previous call. */
+ ji->host = host;
+ goto get_state;
+ }
}
- NAT64_LOCK_INIT(nh);
- nh->addr = ji->haddr;
- nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
- nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
- nh->pg_used = 0;
- ji->nh = nh;
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
- return (0);
-}
-
-/*
- * Finds free @pg index inside @nh
- */
-static NAT64NOINLINE int
-find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
-{
- int i;
+ host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
+ if (ji->host == NULL)
+ return (HOST_ERROR(1));
- for (i = 0; i < nh->pg_allocated; i++) {
- if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
- *idx = i;
- return (0);
- }
+ host->states_hashsize = NAT64LSN_HSIZE;
+ host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
+ host->states_hashsize, M_NAT64LSN, M_NOWAIT);
+ if (host->states_hash == NULL) {
+ uma_zfree(nat64lsn_host_zone, host);
+ return (HOST_ERROR(2));
}
- return (1);
-}
-static NAT64NOINLINE int
-attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
- char a[INET6_ADDRSTRLEN];
- struct nat64lsn_host *nh;
-
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL) {
- /* Add new host to list */
- nh = ji->nh;
- I6HASH_INSERT(cfg, nh);
- cfg->ihcount++;
- ji->nh = NULL;
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
- /*
- * Try to add portgroup.
- * Note it will automatically set
- * 'done' on ji if successful.
- */
- if (attach_portgroup(cfg, ji) != 0) {
- DPRINTF(DP_DROPS, "%s %p failed to attach PG",
- a, nh);
- NAT64STAT_INC(&cfg->base.stats, jportfails);
- return (1);
- }
- return (0);
+ link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
+ if (link == NULL) {
+ free(host->states_hash, M_NAT64LSN);
+ uma_zfree(nat64lsn_host_zone, host);
+ return (HOST_ERROR(3));
}
+ /* Initialize */
+ HOST_LOCK_INIT(host);
+ SET_AGE(host->timestamp);
+ host->addr = ji->f_id.src_ip6;
+ host->hval = ji->src6_hval;
+ host->flags = 0;
+ host->states_count = 0;
+ host->states_hashsize = NAT64LSN_HSIZE;
+ CK_SLIST_INIT(&host->aliases);
+ for (i = 0; i < host->states_hashsize; i++)
+ CK_SLIST_INIT(&host->states_hash[i]);
+
+ /* Determine alias from flow hash. */
+ hval = ALIASLINK_HVAL(cfg, &ji->f_id);
+ link->alias = &ALIAS_BYHASH(cfg, hval);
+ CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
+
+ ALIAS_LOCK(link->alias);
+ CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
+ link->alias->hosts_count++;
+ ALIAS_UNLOCK(link->alias);
+
+ CFG_LOCK(cfg);
+ CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
+ cfg->hosts_count++;
+ CFG_UNLOCK(cfg);
+
+get_state:
+ data[0] = ji->faddr;
+ data[1] = (ji->f_id.dst_port << 16) | ji->port;
+ ji->state_hval = hval = STATE_HVAL(cfg, data);
+ state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
+ ji->faddr, ji->port, ji->proto);
/*
- * nh isn't NULL. This probably means we had several simultaneous
- * host requests. The previous one request has already attached
- * this host. Requeue attached mbuf and mark job as done, but
- * leave nh and pg pointers not changed, so nat64lsn_do_request()
- * will release all allocated resources.
+ * We failed to obtain new state, used alias needs new PG.
+ * XXX: or another alias should be used.
*/
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "%s %p is already attached as %p",
- a, ji->nh, nh);
+ if (state == NULL) {
+ /* Try to allocate new PG */
+ if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
+ return (HOST_ERROR(4));
+ /* We assume that nat64lsn_alloc_pg() got state */
+ } else
+ ji->state = state;
+
ji->done = 1;
- return (0);
+ DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
+ inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
+ return (HOST_ERROR(0));
}
-static NAT64NOINLINE int
-find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
- int nat_proto, uint16_t *aport, int *ppg_idx)
+static int
+nat64lsn_find_pg_place(uint32_t *data)
{
- int j, pg_idx;
-
- pg_idx = addr_off * _ADDR_PG_COUNT +
- (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+ int i;
- for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
- if (cfg->pg[pg_idx + j] != NULL)
+ for (i = 0; i < 32; i++) {
+ if (~data[i] == 0)
continue;
-
- *aport = j * NAT64_CHUNK_SIZE;
- *ppg_idx = pg_idx + j;
- return (1);
+ return (i * 32 + ffs(~data[i]) - 1);
}
-
- return (0);
+ return (-1);
}
-/*
- * XXX: This function needs to be rewritten to
- * use free bitmask for faster pg finding,
- * additionally, it should take into consideration
- * a) randomization and
- * b) previous addresses allocated to given nat instance
- *
- */
-static NAT64NOINLINE int
-find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
- uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+static int
+nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_alias *alias, uint32_t *chunkmask,
+ uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
+ struct nat64lsn_pg **pgptr, uint8_t proto)
{
- int i, nat_proto;
-
- /*
- * XXX: Use bitmask index to be able to find/check if IP address
- * has some spare pg's
- */
- nat_proto = ji->nat_proto;
-
- /* First, try to use same address */
- if (ji->aaddr != 0) {
- i = ntohl(ji->aaddr) - cfg->prefix4;
- if (find_pg_place_addr(cfg, i, nat_proto, aport,
- ppg_idx) != 0){
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
- }
- }
-
- /* Next, try to use random address based on flow hash */
- i = ji->fhash % (1 << (32 - cfg->plen4));
- if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
+ struct nat64lsn_pg *pg;
+ int i, pg_idx, chunk_idx;
+
+ /* Find place in pgchunk where PG can be added */
+ pg_idx = nat64lsn_find_pg_place(pgmask);
+ if (pg_idx < 0) /* no more PGs */
+ return (PG_ERROR(1));
+ /* Check that we have allocated pgchunk for given PG index */
+ chunk_idx = pg_idx / 32;
+ if (!ISSET32(*chunkmask, chunk_idx)) {
+ chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
+ M_NOWAIT);
+ if (chunks[chunk_idx] == NULL)
+ return (PG_ERROR(2));
+ ck_pr_bts_32(chunkmask, chunk_idx);
+ ck_pr_fence_store();
}
-
-
- /* Last one: simply find ANY available */
- for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
- if (find_pg_place_addr(cfg, i, nat_proto, aport,
- ppg_idx) != 0){
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
+ /* Allocate PG and states chunks */
+ pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
+ if (pg == NULL)
+ return (PG_ERROR(3));
+ pg->chunks_count = cfg->states_chunks;
+ if (pg->chunks_count > 1) {
+ pg->freemask_chunk = malloc(pg->chunks_count *
+ sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
+ if (pg->freemask_chunk == NULL) {
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(4));
+ }
+ pg->states_chunk = malloc(pg->chunks_count *
+ sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
+ M_NOWAIT | M_ZERO);
+ if (pg->states_chunk == NULL) {
+ free(pg->freemask_chunk, M_NAT64LSN);
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(5));
}
+ for (i = 0; i < pg->chunks_count; i++) {
+ pg->states_chunk[i] = uma_zalloc(
+ nat64lsn_state_zone, M_NOWAIT);
+ if (pg->states_chunk[i] == NULL)
+ goto states_failed;
+ }
+ memset(pg->freemask_chunk, 0xff,
+ sizeof(uint64_t) * pg->chunks_count);
+ } else {
+ pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
+ if (pg->states == NULL) {
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(6));
+ }
+ memset(&pg->freemask64, 0xff, sizeof(uint64_t));
}
- return (1);
+ /* Initialize PG and hook it to pgchunk */
+ SET_AGE(pg->timestamp);
+ pg->proto = proto;
+ pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
+ ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
+ ck_pr_fence_store();
+ ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
+ ck_pr_store_ptr(pgptr, pg);
+
+ ALIAS_LOCK(alias);
+ CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
+ SET_AGE(alias->timestamp);
+ alias->portgroups_count++;
+ ALIAS_UNLOCK(alias);
+ NAT64STAT_INC(&cfg->base.stats, spgcreated);
+ return (PG_ERROR(0));
+
+states_failed:
+ for (i = 0; i < pg->chunks_count; i++)
+ uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
+ free(pg->freemask_chunk, M_NAT64LSN);
+ free(pg->states_chunk, M_NAT64LSN);
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(7));
}
-static NAT64NOINLINE int
-attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+static int
+nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
- char a[INET6_ADDRSTRLEN];
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_host *nh;
- uint32_t aaddr;
- uint16_t aport;
- int nh_pg_idx, pg_idx;
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_alias *alias;
+ int ret;
- pg = ji->pg;
+ link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
+ if (link == NULL)
+ return (PG_ERROR(1));
/*
- * Find source host and bind: we can't rely on
- * pg->host
+ * TODO: check that we did not already allocated PG in
+ * previous call.
*/
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL)
- return (1);
- /* Find spare port chunk */
- if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
- return (2);
+ ret = 0;
+ alias = link->alias;
+ /* Find place in pgchunk where PG can be added */
+ switch (ji->proto) {
+ case IPPROTO_TCP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->tcp_chunkmask, alias->tcp_pgmask,
+ alias->tcp, &alias->tcp_pg, ji->proto);
+ break;
+ case IPPROTO_UDP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->udp_chunkmask, alias->udp_pgmask,
+ alias->udp, &alias->udp_pg, ji->proto);
+ break;
+ case IPPROTO_ICMP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->icmp_chunkmask, alias->icmp_pgmask,
+ alias->icmp, &alias->icmp_pg, ji->proto);
+ break;
+ default:
+ panic("%s: wrong proto %d", __func__, ji->proto);
}
-
- /* Expand PG indexes if needed */
- if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
- PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
- ji->spare_idx;
- nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
- ji->spare_idx = NULL;
+ if (ret == PG_ERROR(1)) {
+ /*
+ * PG_ERROR(1) means that alias lacks free PGs
+ * XXX: try next alias.
+ */
+ printf("NAT64LSN: %s: failed to obtain PG\n",
+ __func__);
+ return (ret);
}
-
- /* Find empty index to store PG in the @nh */
- if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
- a);
- return (3);
+ if (ret == PG_ERROR(0)) {
+ ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
+ ji->state_hval, ji->faddr, ji->port, ji->proto);
+ if (ji->state == NULL)
+ ret = PG_ERROR(8);
+ else
+ ji->done = 1;
}
-
- cfg->pg[pg_idx] = pg;
- cfg->protochunks[pg->nat_proto]++;
- NAT64STAT_INC(&cfg->base.stats, spgcreated);
-
- pg->aaddr = aaddr;
- pg->aport = aport;
- pg->host = nh;
- pg->idx = pg_idx;
- SET_AGE(pg->timestamp);
-
- PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
- if (nh->pg_used == nh_pg_idx)
- nh->pg_used++;
- SET_AGE(nh->timestamp);
-
- ji->pg = NULL;
- ji->done = 1;
-
- return (0);
+ return (ret);
}
-static NAT64NOINLINE void
-consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+static void
+nat64lsn_do_request(void *data)
{
- struct nat64lsn_host *nh, *nh_tmp;
- struct nat64lsn_portgroup *pg, *pg_list[256];
- int i, pg_lidx, idx;
+ struct epoch_tracker et;
+ struct nat64lsn_job_head jhead;
+ struct nat64lsn_job_item *ji, *ji2;
+ struct nat64lsn_cfg *cfg;
+ int jcount;
+ uint8_t flags;
- /* Find source host */
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL || nh->pg_used == 0)
+ cfg = (struct nat64lsn_cfg *)data;
+ if (cfg->jlen == 0)
return;
- memset(pg_list, 0, sizeof(pg_list));
- pg_lidx = 0;
-
- NAT64_LOCK(nh);
-
- for (i = nh->pg_used - 1; i >= 0; i--) {
- if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
- continue;
- pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
-
- /* Check that PG isn't busy. */
- if (stale_pg(cfg, pg) == 0)
- continue;
-
- /* DO delete */
- pg_list[pg_lidx++] = pg;
- PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
-
- idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
- pg->aport);
- KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
- cfg->pg[idx] = NULL;
- cfg->protochunks[pg->nat_proto]--;
- NAT64STAT_INC(&cfg->base.stats, spgdeleted);
-
- /* Decrease pg_used */
- while (nh->pg_used > 0 &&
- PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
- nh->pg_used--;
-
- /* Check if on-stack buffer has ended */
- if (pg_lidx == nitems(pg_list))
- break;
- }
-
- NAT64_UNLOCK(nh);
-
- if (stale_nh(cfg, nh)) {
- I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
- KASSERT(nh != NULL, ("Unable to find address"));
- cfg->ihcount--;
- ji->nh = nh;
- I6HASH_FIND(cfg, nh, &ji->haddr);
- KASSERT(nh == NULL, ("Failed to delete address"));
- }
-
- /* TODO: Delay freeing portgroups */
- while (pg_lidx > 0) {
- pg_lidx--;
- NAT64STAT_INC(&cfg->base.stats, spgdeleted);
- destroy_portgroup(pg_list[pg_lidx]);
- }
-}
-
-/*
- * Main request handler.
- * Responsible for handling jqueue, e.g.
- * creating new hosts, addind/deleting portgroups.
- */
-static NAT64NOINLINE void
-nat64lsn_do_request(void *data)
-{
- IPFW_RLOCK_TRACKER;
- struct nat64lsn_job_head jhead;
- struct nat64lsn_job_item *ji;
- int jcount, nhsize;
- struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
- struct ip_fw_chain *ch;
- int delcount;
-
CURVNET_SET(cfg->vp);
-
- TAILQ_INIT(&jhead);
-
- /* XXX: We're running unlocked here */
-
- ch = cfg->ch;
- delcount = 0;
- IPFW_RLOCK(ch);
+ STAILQ_INIT(&jhead);
/* Grab queue */
JQUEUE_LOCK();
- TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+ STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
jcount = cfg->jlen;
cfg->jlen = 0;
JQUEUE_UNLOCK();
- /* check if we need to resize hash */
- nhsize = 0;
- if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
- nhsize = cfg->ihsize;
- for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
- ;
- } else if (cfg->ihcount < cfg->ihsize * 4) {
- nhsize = cfg->ihsize;
- for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
- ;
- }
-
- IPFW_RUNLOCK(ch);
-
- if (TAILQ_EMPTY(&jhead)) {
- CURVNET_RESTORE();
- return;
- }
+ /* TODO: check if we need to resize hash */
NAT64STAT_INC(&cfg->base.stats, jcalls);
DPRINTF(DP_JQUEUE, "count=%d", jcount);
@@ -1169,442 +1230,283 @@ nat64lsn_do_request(void *data)
* TODO: Limit per-call number of items
*/
- /* Pre-allocate everything for entire chain */
- TAILQ_FOREACH(ji, &jhead, next) {
+ NAT64LSN_EPOCH_ENTER(et);
+ STAILQ_FOREACH(ji, &jhead, entries) {
switch (ji->jtype) {
- case JTYPE_NEWHOST:
- if (alloc_host6(cfg, ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jhostfails);
- break;
- case JTYPE_NEWPORTGROUP:
- if (alloc_portgroup(ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jportfails);
- break;
- case JTYPE_DELPORTGROUP:
- delcount += ji->delcount;
- break;
- default:
- break;
+ case JTYPE_NEWHOST:
+ if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
+ NAT64STAT_INC(&cfg->base.stats, jhostfails);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
+ NAT64STAT_INC(&cfg->base.stats, jportfails);
+ break;
+ default:
+ continue;
}
- }
-
- /*
- * TODO: Alloc hew hash
- */
- nhsize = 0;
- if (nhsize > 0) {
- /* XXX: */
- }
-
- /* Apply all changes in batch */
- IPFW_UH_WLOCK(ch);
- IPFW_WLOCK(ch);
-
- TAILQ_FOREACH(ji, &jhead, next) {
- switch (ji->jtype) {
- case JTYPE_NEWHOST:
- if (ji->nh != NULL)
- attach_host6(cfg, ji);
- break;
- case JTYPE_NEWPORTGROUP:
- if (ji->pg != NULL &&
- attach_portgroup(cfg, ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jportfails);
- break;
- case JTYPE_DELPORTGROUP:
- consider_del_portgroup(cfg, ji);
- break;
+ if (ji->done != 0) {
+ flags = ji->proto != IPPROTO_TCP ? 0 :
+ convert_tcp_flags(ji->f_id._flags);
+ nat64lsn_translate6_internal(cfg, &ji->m,
+ ji->state, flags);
+ NAT64STAT_INC(&cfg->base.stats, jreinjected);
}
}
+ NAT64LSN_EPOCH_EXIT(et);
- if (nhsize > 0) {
- /* XXX: Move everything to new hash */
- }
-
- IPFW_WUNLOCK(ch);
- IPFW_UH_WUNLOCK(ch);
-
- /* Flush unused entries */
- while (!TAILQ_EMPTY(&jhead)) {
- ji = TAILQ_FIRST(&jhead);
- TAILQ_REMOVE(&jhead, ji, next);
- if (ji->nh != NULL)
- destroy_host6(ji->nh);
- if (ji->pg != NULL)
- destroy_portgroup(ji->pg);
- if (ji->m != NULL)
- reinject_mbuf(cfg, ji);
- if (ji->spare_idx != NULL)
- uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
- free(ji, M_IPFW);
+ ji = STAILQ_FIRST(&jhead);
+ while (ji != NULL) {
+ ji2 = STAILQ_NEXT(ji, entries);
+ /*
+ * In any case we must free mbuf if
+ * translator did not consumed it.
+ */
+ m_freem(ji->m);
+ uma_zfree(nat64lsn_job_zone, ji);
+ ji = ji2;
}
CURVNET_RESTORE();
}
-static NAT64NOINLINE struct nat64lsn_job_item *
-nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
- int jtype)
+static struct nat64lsn_job_item *
+nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
{
struct nat64lsn_job_item *ji;
- struct in6_addr haddr;
- uint8_t nat_proto;
/*
- * Do not try to lock possibly contested mutex if we're near the limit.
- * Drop packet instead.
+ * Do not try to lock possibly contested mutex if we're near the
+ * limit. Drop packet instead.
*/
- if (cfg->jlen >= cfg->jmaxlen) {
+ ji = NULL;
+ if (cfg->jlen >= cfg->jmaxlen)
NAT64STAT_INC(&cfg->base.stats, jmaxlen);
- return (NULL);
- }
-
- memset(&haddr, 0, sizeof(haddr));
- nat_proto = 0;
- if (f_id != NULL) {
- haddr = f_id->src_ip6;
- nat_proto = nat64lsn_proto_map[f_id->proto];
-
- DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
- nat_proto, f_id->proto);
-
- if (nat_proto == 0)
- return (NULL);
+ else {
+ ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
+ if (ji == NULL)
+ NAT64STAT_INC(&cfg->base.stats, jnomem);
}
-
- ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
- M_NOWAIT | M_ZERO);
-
if (ji == NULL) {
- NAT64STAT_INC(&cfg->base.stats, jnomem);
- return (NULL);
- }
-
- ji->jtype = jtype;
-
- if (f_id != NULL) {
- ji->f_id = *f_id;
- ji->haddr = haddr;
- ji->nat_proto = nat_proto;
+ NAT64STAT_INC(&cfg->base.stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->jtype = jtype;
+ ji->done = 0;
}
-
return (ji);
}
-static NAT64NOINLINE void
+static void
nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
- if (ji == NULL)
- return;
-
JQUEUE_LOCK();
- TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
- cfg->jlen++;
+ STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
NAT64STAT_INC(&cfg->base.stats, jrequests);
+ cfg->jlen++;
if (callout_pending(&cfg->jcallout) == 0)
callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
JQUEUE_UNLOCK();
}
-static NAT64NOINLINE void
-nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
- struct nat64lsn_job_head *jhead, int jlen)
-{
-
- if (TAILQ_EMPTY(jhead))
- return;
-
- /* Attach current queue to execution one */
- JQUEUE_LOCK();
- TAILQ_CONCAT(&cfg->jhead, jhead, next);
- cfg->jlen += jlen;
- NAT64STAT_ADD(&cfg->base.stats, jrequests, jlen);
-
- if (callout_pending(&cfg->jcallout) == 0)
- callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
- JQUEUE_UNLOCK();
-}
-
-static unsigned int
-flow6_hash(const struct ipfw_flow_id *f_id)
+static void
+nat64lsn_job_destroy(epoch_context_t ctx)
{
- unsigned char hbuf[36];
-
- memcpy(hbuf, &f_id->dst_ip6, 16);
- memcpy(&hbuf[16], &f_id->src_ip6, 16);
- memcpy(&hbuf[32], &f_id->dst_port, 2);
- memcpy(&hbuf[32], &f_id->src_port, 2);
+ struct nat64lsn_job_item *ji;
+ struct nat64lsn_host *host;
+ struct nat64lsn_pg *pg;
+ int i;
- return (djb_hash(hbuf, sizeof(hbuf)));
+ ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
+ MPASS(ji->jtype == JTYPE_DESTROY);
+ while (!CK_SLIST_EMPTY(&ji->hosts)) {
+ host = CK_SLIST_FIRST(&ji->hosts);
+ CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
+ if (host->states_count > 0) {
+ /*
+ * XXX: The state has been created
+ * during host deletion.
+ */
+ printf("NAT64LSN: %s: destroying host with %d "
+ "states\n", __func__, host->states_count);
+ }
+ nat64lsn_destroy_host(host);
+ }
+ while (!CK_SLIST_EMPTY(&ji->portgroups)) {
+ pg = CK_SLIST_FIRST(&ji->portgroups);
+ CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
+ for (i = 0; i < pg->chunks_count; i++) {
+ if (FREEMASK_BITCOUNT(pg, i) != 64) {
+ /*
+ * XXX: The state has been created during
+ * PG deletion.
+ */
+ printf("NAT64LSN: %s: destroying PG %p "
+ "with non-empty chunk %d\n", __func__,
+ pg, i);
+ }
+ }
+ nat64lsn_destroy_pg(pg);
+ }
+ uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
+ uma_zfree(nat64lsn_job_zone, ji);
}
-static NAT64NOINLINE int
+static int
nat64lsn_request_host(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm)
+ const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_job_item *ji;
- struct mbuf *m;
- m = *pm;
- *pm = NULL;
+ ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
+ if (ji != NULL) {
+ ji->m = *mp;
+ ji->f_id = *f_id;
+ ji->faddr = faddr;
+ ji->port = port;
+ ji->proto = proto;
+ ji->src6_hval = hval;
- ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
- if (ji == NULL) {
- m_freem(m);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "failed to create job");
- } else {
- ji->m = m;
- /* Provide pseudo-random value based on flow */
- ji->fhash = flow6_hash(f_id);
nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jhostsreq);
+ *mp = NULL;
}
-
return (IP_FW_DENY);
}
-static NAT64NOINLINE int
-nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
- int needs_idx)
+static int
+nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
+ const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_job_item *ji;
- struct mbuf *m;
- m = *pm;
- *pm = NULL;
+ ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
+ if (ji != NULL) {
+ ji->m = *mp;
+ ji->f_id = *f_id;
+ ji->faddr = faddr;
+ ji->port = port;
+ ji->proto = proto;
+ ji->state_hval = hval;
+ ji->host = host;
- ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
- if (ji == NULL) {
- m_freem(m);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "failed to create job");
- } else {
- ji->m = m;
- /* Provide pseudo-random value based on flow */
- ji->fhash = flow6_hash(f_id);
- ji->aaddr = aaddr;
- ji->needs_idx = needs_idx;
nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jportreq);
+ *mp = NULL;
}
-
return (IP_FW_DENY);
}
-static NAT64NOINLINE struct nat64lsn_state *
-nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
- int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+static int
+nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
+ struct nat64lsn_state *state, uint8_t flags)
{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
- int i, hval, off;
-
- /* XXX: create additional bitmask for selecting proper portgroup */
- for (i = 0; i < nh->pg_used; i++) {
- pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
- if (pg == NULL)
- continue;
- if (*aaddr == 0)
- *aaddr = pg->aaddr;
- if (pg->nat_proto != nat_proto)
- continue;
-
- off = PG_GET_FREE_IDX(pg);
- if (off != 0) {
- /* We have found spare state. Use it */
- off--;
- PG_MARK_BUSY_IDX(pg, off);
- st = &pg->states[off];
-
- /*
- * Fill in new info. Assume state was zeroed.
- * Timestamp and flags will be filled by caller.
- */
- st->u.s = kst->u.s;
- st->cur.idx = i + 1;
- st->cur.off = off;
-
- /* Insert into host hash table */
- hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
- st->next = nh->phash[hval];
- nh->phash[hval] = st->cur;
-
- nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+ struct pfloghdr loghdr, *logdata;
+ int ret;
+ uint16_t ts;
- NAT64STAT_INC(&cfg->base.stats, screated);
+ /* Update timestamp and flags if needed */
+ SET_AGE(ts);
+ if (state->timestamp != ts)
+ state->timestamp = ts;
+ if ((state->flags & flags) != 0)
+ state->flags |= flags;
- return (st);
- }
- /* Saev last used alias affress */
- *aaddr = pg->aaddr;
- }
+ if (cfg->base.flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *mp, AF_INET6, state);
+ } else
+ logdata = NULL;
- return (NULL);
+ ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
+ htons(state->aport), &cfg->base, logdata);
+ if (ret == NAT64SKIP)
+ return (cfg->nomatch_verdict);
+ if (ret == NAT64RETURN)
+ *mp = NULL;
+ return (IP_FW_DENY);
}
-static NAT64NOINLINE int
+static int
nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
- struct mbuf **pm)
+ struct mbuf **mp)
{
- struct pfloghdr loghdr, *logdata;
- char a[INET6_ADDRSTRLEN];
- struct nat64lsn_host *nh;
- struct st_ptr sidx;
- struct nat64lsn_state *st, kst;
- struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *state;
+ struct nat64lsn_host *host;
struct icmp6_hdr *icmp6;
- uint32_t aaddr;
- int action, hval, nat_proto, proto;
- uint16_t aport, state_ts, state_flags;
-
- /* Check if af/protocol is supported and get it short id */
- nat_proto = nat64lsn_proto_map[f_id->proto];
- if (nat_proto == 0) {
+ uint32_t addr, hval, data[2];
+ int offset, proto;
+ uint16_t port;
+ uint8_t flags;
+
+ /* Check if protocol is supported */
+ port = f_id->src_port;
+ proto = f_id->proto;
+ switch (f_id->proto) {
+ case IPPROTO_ICMPV6:
/*
- * Since we can be called from jobs handler, we need
- * to free mbuf by self, do not leave this task to
- * ipfw_check_packet().
+ * For ICMPv6 echo reply/request we use icmp6_id as
+ * local port.
*/
+ offset = 0;
+ proto = nat64_getlasthdr(*mp, &offset);
+ if (proto < 0) {
+ NAT64STAT_INC(&cfg->base.stats, dropped);
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ return (IP_FW_DENY);
+ }
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(*mp, offset);
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ port = ntohs(icmp6->icmp6_id);
+ }
+ proto = IPPROTO_ICMP;
+ /* FALLTHROUGH */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
NAT64STAT_INC(&cfg->base.stats, noproto);
- goto drop;
+ return (cfg->nomatch_verdict);
}
- /* Try to find host first */
- I6HASH_FIND(cfg, nh, &f_id->src_ip6);
-
- if (nh == NULL)
- return (nat64lsn_request_host(cfg, f_id, pm));
-
- /* Fill-in on-stack state structure */
- kst.u.s.faddr = nat64_extract_ip4(&f_id->dst_ip6,
- cfg->base.plat_plen);
- if (kst.u.s.faddr == 0 ||
- nat64_check_private_ip4(&cfg->base, kst.u.s.faddr) != 0) {
- NAT64STAT_INC(&cfg->base.stats, dropped);
- goto drop;
- }
- kst.u.s.fport = f_id->dst_port;
- kst.u.s.lport = f_id->src_port;
+ /* Extract IPv4 from destination IPv6 address */
+ addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
+ if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
+ char a[INET_ADDRSTRLEN];
- /* Prepare some fields we might need to update */
- hval = 0;
- proto = nat64_getlasthdr(*pm, &hval);
- if (proto < 0) {
NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
- goto drop;
+ DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
+ inet_ntop(AF_INET, &addr, a, sizeof(a)));
+ return (IP_FW_DENY); /* XXX: add extra stats? */
}
- SET_AGE(state_ts);
- if (proto == IPPROTO_TCP)
- state_flags = convert_tcp_flags(
- TCP(mtodo(*pm, hval))->th_flags);
- else
- state_flags = 0;
- if (proto == IPPROTO_ICMPV6) {
- /* Alter local port data */
- icmp6 = mtodo(*pm, hval);
- if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
- icmp6->icmp6_type == ICMP6_ECHO_REPLY)
- kst.u.s.lport = ntohs(icmp6->icmp6_id);
- }
-
- hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
- pg = NULL;
- st = NULL;
-
- /* OK, let's find state in host hash */
- NAT64_LOCK(nh);
- sidx = nh->phash[hval];
- int k = 0;
- while (sidx.idx != 0) {
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- st = &pg->states[sidx.off];
- //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
- //st->next.idx, st->next.off);
- if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+ /* Try to find host */
+ hval = HOST_HVAL(cfg, &f_id->src_ip6);
+ CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
+ if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
break;
- if (k++ > 1000) {
- DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
- sidx.idx, sidx.off, st->next.idx, st->next.off);
- DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)),
- nh, curcpu);
- k = 0;
- }
- sidx = st->next;
}
-
- if (sidx.idx == 0) {
- aaddr = 0;
- st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
- if (st == NULL) {
- /* No free states. Request more if we can */
- if (nh->pg_used >= cfg->max_chunks) {
- /* Limit reached */
- DPRINTF(DP_DROPS, "PG limit reached "
- " for host %s (used %u, allocated %u, "
- "limit %u)", inet_ntop(AF_INET6,
- &nh->addr, a, sizeof(a)),
- nh->pg_used * NAT64_CHUNK_SIZE,
- nh->pg_allocated * NAT64_CHUNK_SIZE,
- cfg->max_chunks * NAT64_CHUNK_SIZE);
- NAT64_UNLOCK(nh);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- goto drop;
- }
- if ((nh->pg_allocated <=
- nh->pg_used + NAT64LSN_REMAININGPG) &&
- nh->pg_allocated < cfg->max_chunks)
- action = 1; /* Request new indexes */
- else
- action = 0;
- NAT64_UNLOCK(nh);
- //DPRINTF("No state, unlock for %p", nh);
- return (nat64lsn_request_portgroup(cfg, f_id,
- pm, aaddr, action));
- }
-
- /* We've got new state. */
- sidx = st->cur;
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- }
-
- /* Okay, state found */
-
- /* Update necessary fileds */
- if (st->timestamp != state_ts)
- st->timestamp = state_ts;
- if ((st->flags & state_flags) != 0)
- st->flags |= state_flags;
-
- /* Copy needed state data */
- aaddr = pg->aaddr;
- aport = htons(pg->aport + sidx.off);
-
- NAT64_UNLOCK(nh);
-
- if (cfg->base.flags & NAT64_LOG) {
- logdata = &loghdr;
- nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
- } else
- logdata = NULL;
-
- action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->base, logdata);
- if (action == NAT64SKIP)
- return (cfg->nomatch_verdict);
- if (action == NAT64MFREE) {
-drop:
- m_freem(*pm);
- }
- *pm = NULL; /* mark mbuf as consumed */
- return (IP_FW_DENY);
+ /* We use IPv4 address in host byte order */
+ addr = ntohl(addr);
+ if (host == NULL)
+ return (nat64lsn_request_host(cfg, f_id, mp,
+ hval, addr, port, proto));
+
+ flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
+
+ data[0] = addr;
+ data[1] = (f_id->dst_port << 16) | port;
+ hval = STATE_HVAL(cfg, data);
+ state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
+ port, proto);
+ if (state == NULL)
+ return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
+ port, proto));
+ return (nat64lsn_translate6_internal(cfg, mp, state, flags));
}
/*
@@ -1614,49 +1516,61 @@ int
ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done)
{
- ipfw_insn *icmd;
+ struct epoch_tracker et;
struct nat64lsn_cfg *cfg;
+ ipfw_insn *icmd;
int ret;
IPFW_RLOCK_ASSERT(ch);
- *done = 1; /* terminate the search */
+ *done = 0; /* continue the search in case of failure */
icmd = cmd + 1;
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nat64lsn_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
- return (0);
+ return (IP_FW_DENY);
+
+ *done = 1; /* terminate the search */
+ NAT64LSN_EPOCH_ENTER(et);
switch (args->f_id.addr_type) {
case 4:
ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
break;
case 6:
+ /*
+ * Check that destination IPv6 address matches our prefix6.
+ */
+ if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
+ memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
+ cfg->base.plat_plen / 8) != 0) {
+ ret = cfg->nomatch_verdict;
+ break;
+ }
ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
break;
default:
- return (cfg->nomatch_verdict);
+ ret = cfg->nomatch_verdict;
}
- return (ret);
-}
-
-static int
-nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
-{
- struct nat64lsn_host *nh;
+ NAT64LSN_EPOCH_EXIT(et);
- nh = (struct nat64lsn_host *)mem;
- memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
- memset(nh->phash, 0, sizeof(nh->phash));
- return (0);
+ if (ret != IP_FW_PASS && args->m != NULL) {
+ m_freem(args->m);
+ args->m = NULL;
+ }
+ return (ret);
}
static int
-nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
+nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
{
+ struct nat64lsn_states_chunk *chunk;
+ int i;
- memset(mem, 0, size);
+ chunk = (struct nat64lsn_states_chunk *)mem;
+ for (i = 0; i < 64; i++)
+ chunk->state[i].flags = 0;
return (0);
}
@@ -1664,109 +1578,185 @@ void
nat64lsn_init_internal(void)
{
- memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
- /* Set up supported protocol map */
- nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
- nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
- nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
- nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
- /* Fill in reverse proto map */
- memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
- nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
- nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
- nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+ nat64lsn_epoch = epoch_alloc(EPOCH_PREEMPT);
- JQUEUE_LOCK_INIT();
- nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
- sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
- nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
- sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+ nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
+ sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
+ sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
+ sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
+ sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
- nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
- sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
- nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
+ sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
+ NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
+ sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ JQUEUE_LOCK_INIT();
}
void
nat64lsn_uninit_internal(void)
{
+ /* XXX: epoch_task drain */
+ epoch_free(nat64lsn_epoch);
+
JQUEUE_LOCK_DESTROY();
uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_pgchunk_zone);
uma_zdestroy(nat64lsn_pg_zone);
- uma_zdestroy(nat64lsn_pgidx_zone);
+ uma_zdestroy(nat64lsn_aliaslink_zone);
+ uma_zdestroy(nat64lsn_state_zone);
+ uma_zdestroy(nat64lsn_job_zone);
}
void
nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
{
+ CALLOUT_LOCK(cfg);
callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
nat64lsn_periodic, cfg);
+ CALLOUT_UNLOCK(cfg);
}
struct nat64lsn_cfg *
-nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
{
struct nat64lsn_cfg *cfg;
+ struct nat64lsn_alias *alias;
+ int i, naddr;
+
+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
+ M_WAITOK | M_ZERO);
- cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
- TAILQ_INIT(&cfg->jhead);
+ CFG_LOCK_INIT(cfg);
+ CALLOUT_LOCK_INIT(cfg);
+ STAILQ_INIT(&cfg->jhead);
cfg->vp = curvnet;
- cfg->ch = ch;
COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
- cfg->ihsize = NAT64LSN_HSIZE;
- cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
- M_WAITOK | M_ZERO);
-
- cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
- M_WAITOK | M_ZERO);
+ cfg->hash_seed = arc4random();
+ cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
+ cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
+ cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
+ for (i = 0; i < cfg->hosts_hashsize; i++)
+ CK_SLIST_INIT(&cfg->hosts_hash[i]);
+
+ naddr = 1 << (32 - plen);
+ cfg->prefix4 = prefix;
+ cfg->pmask4 = prefix | (naddr - 1);
+ cfg->plen4 = plen;
+ cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
+ M_NAT64LSN, M_WAITOK | M_ZERO);
+ for (i = 0; i < naddr; i++) {
+ alias = &cfg->aliases[i];
+ alias->addr = prefix + i; /* host byte order */
+ CK_SLIST_INIT(&alias->hosts);
+ ALIAS_LOCK_INIT(alias);
+ }
- callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+ callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
return (cfg);
}
-/*
- * Destroy all hosts callback.
- * Called on module unload when all activity already finished, so
- * can work without any locks.
- */
-static NAT64NOINLINE int
-nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
+static void
+nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
{
- struct nat64lsn_portgroup *pg;
int i;
- for (i = nh->pg_used; i > 0; i--) {
- pg = PORTGROUP_BYSIDX(cfg, nh, i);
- if (pg == NULL)
- continue;
- cfg->pg[pg->idx] = NULL;
- destroy_portgroup(pg);
- nh->pg_used--;
+ if (pg->chunks_count == 1) {
+ uma_zfree(nat64lsn_state_zone, pg->states);
+ } else {
+ for (i = 0; i < pg->chunks_count; i++)
+ uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
+ free(pg->states_chunk, M_NAT64LSN);
+ free(pg->freemask_chunk, M_NAT64LSN);
}
- destroy_host6(nh);
- cfg->ihcount--;
- return (0);
+ uma_zfree(nat64lsn_pg_zone, pg);
+}
+
+static void
+nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_alias *alias)
+{
+ struct nat64lsn_pg *pg;
+ int i;
+
+ while (!CK_SLIST_EMPTY(&alias->portgroups)) {
+ pg = CK_SLIST_FIRST(&alias->portgroups);
+ CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
+ nat64lsn_destroy_pg(pg);
+ }
+ for (i = 0; i < 32; i++) {
+ if (ISSET32(alias->tcp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
+ if (ISSET32(alias->udp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
+ if (ISSET32(alias->icmp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
+ }
+ ALIAS_LOCK_DESTROY(alias);
+}
+
+static void
+nat64lsn_destroy_host(struct nat64lsn_host *host)
+{
+ struct nat64lsn_aliaslink *link;
+
+ while (!CK_SLIST_EMPTY(&host->aliases)) {
+ link = CK_SLIST_FIRST(&host->aliases);
+ CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
+
+ ALIAS_LOCK(link->alias);
+ CK_SLIST_REMOVE(&link->alias->hosts, link,
+ nat64lsn_aliaslink, alias_entries);
+ link->alias->hosts_count--;
+ ALIAS_UNLOCK(link->alias);
+
+ uma_zfree(nat64lsn_aliaslink_zone, link);
+ }
+ HOST_LOCK_DESTROY(host);
+ free(host->states_hash, M_NAT64LSN);
+ uma_zfree(nat64lsn_host_zone, host);
}
void
nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
{
- struct nat64lsn_host *nh, *tmp;
+ struct nat64lsn_host *host;
+ int i;
- callout_drain(&cfg->jcallout);
+ CALLOUT_LOCK(cfg);
callout_drain(&cfg->periodic);
- I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
- DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
+ CALLOUT_UNLOCK(cfg);
+ callout_drain(&cfg->jcallout);
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
+ host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
+ CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
+ nat64lsn_destroy_host(host);
+ }
+ }
+
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
+ nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
+ CALLOUT_LOCK_DESTROY(cfg);
+ CFG_LOCK_DESTROY(cfg);
COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
- free(cfg->ih, M_IPFW);
- free(cfg->pg, M_IPFW);
- free(cfg, M_IPFW);
+ free(cfg->hosts_hash, M_NAT64LSN);
+ free(cfg->aliases, M_NAT64LSN);
+ free(cfg, M_NAT64LSN);
}
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.h b/sys/netpfil/ipfw/nat64/nat64lsn.h
index 44036cb3efcb..797876b229c2 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn.h
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -35,75 +35,149 @@
#include "ip_fw_nat64.h"
#include "nat64_translate.h"
-#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
-#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
-
#define NAT64_MIN_PORT 1024
-#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+struct nat64lsn_host;
+struct nat64lsn_alias;
-struct st_ptr {
- uint8_t idx; /* index in nh->pg_ptr array.
- * NOTE: it starts from 1.
- */
- uint8_t off;
+struct nat64lsn_state {
+ /* IPv6 host entry keeps hash table to speedup state lookup */
+ CK_SLIST_ENTRY(nat64lsn_state) entries;
+ struct nat64lsn_host *host;
+
+ struct in6_addr ip6_dst; /* Destination IPv6 address */
+
+ in_addr_t ip_src; /* Alias IPv4 address */
+ in_addr_t ip_dst; /* Destination IPv4 address */
+ uint16_t dport; /* Destination port */
+ uint16_t sport; /* Source port */
+
+ uint32_t hval;
+ uint32_t flags; /* Internal flags */
+ uint16_t aport;
+ uint16_t timestamp; /* last used */
+ uint8_t proto;
+ uint8_t _spare[7];
};
-#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
-#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
-#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
- NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
-struct nat64lsn_portgroup;
-/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
-struct nat64lsn_host {
- struct rwlock h_lock; /* Host states lock */
-
- struct in6_addr addr;
- struct nat64lsn_host *next;
- uint16_t timestamp; /* Last altered */
- uint16_t hsize; /* ports hash size */
- uint16_t pg_used; /* Number of portgroups used */
-#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
- * requesting of new chunk of indexes.
- */
- uint16_t pg_allocated; /* Number of portgroups indexes
- * allocated.
- */
-#define NAT64LSN_HSIZE 64
- struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
- /*
- * PG indexes are stored in chunks with 32 elements.
- * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
- */
-#define NAT64LSN_PGIDX_CHUNK 32
-#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
- NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
- struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
+struct nat64lsn_states_chunk {
+ struct nat64lsn_state state[64];
+};
+
+#define ISSET64(mask, bit) ((mask) & ((uint64_t)1 << (bit)))
+#define ISSET32(mask, bit) ((mask) & ((uint32_t)1 << (bit)))
+struct nat64lsn_pg {
+ CK_SLIST_ENTRY(nat64lsn_pg) entries;
+
+ uint16_t base_port;
+ uint16_t timestamp;
+ uint8_t proto;
+ uint8_t chunks_count;
+ uint8_t spare[2];
+
+ union {
+ uint64_t freemask64;
+ uint32_t freemask32[2];
+ uint64_t *freemask64_chunk;
+ uint32_t *freemask32_chunk;
+ void *freemask_chunk;
+ };
+ union {
+ struct nat64lsn_states_chunk *states;
+ struct nat64lsn_states_chunk **states_chunk;
+ };
+};
+
+#define CHUNK_BY_FADDR(p, a) ((a) & ((p)->chunks_count - 1))
+
+#ifdef __LP64__
+#define FREEMASK_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? &(p)->freemask64 : \
+ &(p)->freemask64_chunk[CHUNK_BY_FADDR(p, v)])
+#define FREEMASK_BITCOUNT(pg, faddr) \
+ bitcount64(*FREEMASK_CHUNK((pg), (faddr)))
+#else
+#define FREEMASK_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? &(p)->freemask32[0] : \
+ &(p)->freemask32_chunk[CHUNK_BY_FADDR(p, v) * 2])
+#define FREEMASK_BITCOUNT(pg, faddr) \
+ bitcount64(*(uint64_t *)FREEMASK_CHUNK((pg), (faddr)))
+#endif /* !__LP64__ */
+
+struct nat64lsn_pgchunk {
+ struct nat64lsn_pg *pgptr[32];
};
-#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
-#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
+struct nat64lsn_aliaslink {
+ CK_SLIST_ENTRY(nat64lsn_aliaslink) alias_entries;
+ CK_SLIST_ENTRY(nat64lsn_aliaslink) host_entries;
+ struct nat64lsn_alias *alias;
+};
-#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
-#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
-#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
-#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
-#define NAT64_LOCK(h) NAT64_WLOCK(h)
-#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
-#define NAT64_LOCK_INIT(h) do { \
- rw_init(&(h)->h_lock, "NAT64 host lock"); \
- } while (0)
+CK_SLIST_HEAD(nat64lsn_aliaslink_slist, nat64lsn_aliaslink);
+CK_SLIST_HEAD(nat64lsn_states_slist, nat64lsn_state);
+CK_SLIST_HEAD(nat64lsn_hosts_slist, nat64lsn_host);
+CK_SLIST_HEAD(nat64lsn_pg_slist, nat64lsn_pg);
+
+struct nat64lsn_alias {
+ struct nat64lsn_aliaslink_slist hosts;
+ struct nat64lsn_pg_slist portgroups;
+
+ struct mtx lock;
+ in_addr_t addr; /* host byte order */
+ uint32_t hosts_count;
+ uint32_t portgroups_count;
+ uint32_t tcp_chunkmask;
+ uint32_t udp_chunkmask;
+ uint32_t icmp_chunkmask;
+
+ uint32_t tcp_pgidx;
+ uint32_t udp_pgidx;
+ uint32_t icmp_pgidx;
+ uint16_t timestamp;
+ uint16_t spare;
+
+ uint32_t tcp_pgmask[32];
+ uint32_t udp_pgmask[32];
+ uint32_t icmp_pgmask[32];
+ struct nat64lsn_pgchunk *tcp[32];
+ struct nat64lsn_pgchunk *udp[32];
+ struct nat64lsn_pgchunk *icmp[32];
+
+ /* pointer to PG that can be used for faster state allocation */
+ struct nat64lsn_pg *tcp_pg;
+ struct nat64lsn_pg *udp_pg;
+ struct nat64lsn_pg *icmp_pg;
+};
+#define ALIAS_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "alias_lock", NULL, MTX_DEF)
+#define ALIAS_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define ALIAS_LOCK(p) mtx_lock(&(p)->lock)
+#define ALIAS_UNLOCK(p) mtx_unlock(&(p)->lock)
-#define NAT64_LOCK_DESTROY(h) do { \
- rw_destroy(&(h)->h_lock); \
- } while (0)
+#define NAT64LSN_HSIZE 256
+#define NAT64LSN_MAX_HSIZE 4096
+#define NAT64LSN_HOSTS_HSIZE 1024
-/* Internal proto index */
-#define NAT_PROTO_TCP 1
-#define NAT_PROTO_UDP 2
-#define NAT_PROTO_ICMP 3
+struct nat64lsn_host {
+ struct in6_addr addr;
+ struct nat64lsn_aliaslink_slist aliases;
+ struct nat64lsn_states_slist *states_hash;
+ CK_SLIST_ENTRY(nat64lsn_host) entries;
+ uint32_t states_count;
+ uint32_t hval;
+ uint32_t flags;
+#define NAT64LSN_DEADHOST 1
+#define NAT64LSN_GROWHASH 2
+ uint16_t states_hashsize;
+ uint16_t timestamp;
+ struct mtx lock;
+};
-#define NAT_MAX_PROTO 4
-extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+#define HOST_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "host_lock", NULL, MTX_DEF|MTX_NEW)
+#define HOST_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define HOST_LOCK(p) mtx_lock(&(p)->lock)
+#define HOST_UNLOCK(p) mtx_unlock(&(p)->lock)
VNET_DECLARE(uint16_t, nat64lsn_eid);
#define V_nat64lsn_eid VNET(nat64lsn_eid)
@@ -112,124 +186,65 @@ VNET_DECLARE(uint16_t, nat64lsn_eid);
/* Timestamp macro */
#define _CT ((int)time_uptime % 65536)
#define SET_AGE(x) (x) = _CT
-#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
- (int)65536 + _CT - (x))
+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x): (int)65536 + _CT - (x))
-#ifdef __LP64__
-/* ffsl() is capable of checking 64-bit ints */
-#define _FFS64
-#endif
-
-/* 16 bytes */
-struct nat64lsn_state {
- union {
- struct {
- in_addr_t faddr; /* Remote IPv4 address */
- uint16_t fport; /* Remote IPv4 port */
- uint16_t lport; /* Local IPv6 port */
- }s;
- uint64_t hkey;
- } u;
- uint8_t nat_proto;
- uint8_t flags;
- uint16_t timestamp;
- struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
- struct st_ptr next; /* Next entry index */
-};
-
-/*
- * 1024+32 bytes per 64 states, used to store state
- * AND for outside-in state lookup
- */
-struct nat64lsn_portgroup {
- struct nat64lsn_host *host; /* IPv6 source host info */
- in_addr_t aaddr; /* Alias addr, network format */
- uint16_t aport; /* Base port */
- uint16_t timestamp;
- uint8_t nat_proto;
- uint8_t spare[3];
- uint32_t idx;
-#ifdef _FFS64
- uint64_t freemask; /* Mask of free entries */
-#else
- uint32_t freemask[2]; /* Mask of free entries */
-#endif
- struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
-};
-#ifdef _FFS64
-#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
-#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
-#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
-#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
-#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
-#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
-#else
-#define PG_MARK_BUSY_IDX(_pg, _idx) \
- (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
-#define PG_MARK_FREE_IDX(_pg, _idx) \
- (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
-#define PG_IS_FREE_IDX(_pg, _idx) \
- ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
-#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
-#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
-#define PG_IS_EMPTY(_pg) \
- ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
-
-static inline int
-_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
-{
- int i;
-
- if ((i = ffsl(pg->freemask[0])) != 0)
- return (i);
- if ((i = ffsl(pg->freemask[1])) != 0)
- return (i + 32);
- return (0);
-}
-
-#endif
-
-TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+STAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
struct nat64lsn_cfg {
struct named_object no;
- struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
- struct nat64lsn_host **ih; /* Host hash */
+
+ struct nat64lsn_hosts_slist *hosts_hash;
+ struct nat64lsn_alias *aliases; /* array of aliases */
+
+ struct mtx lock;
+ uint32_t hosts_hashsize;
+ uint32_t hash_seed;
+
uint32_t prefix4; /* IPv4 prefix */
uint32_t pmask4; /* IPv4 prefix mask */
- uint32_t ihsize; /* IPv6 host hash size */
uint8_t plen4;
- uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
+ uint8_t nomatch_verdict;/* Return value on no-match */
- uint32_t ihcount; /* Number of items in host hash */
- int max_chunks; /* Max chunks per client */
- int agg_prefix_len; /* Prefix length to count */
- int agg_prefix_max; /* Max hosts per agg prefix */
+ uint32_t hosts_count; /* Number of items in host hash */
+ uint32_t states_chunks; /* Number of states chunks per PG */
uint32_t jmaxlen; /* Max jobqueue length */
- uint16_t min_chunk; /* Min port group # to use */
- uint16_t max_chunk; /* Max port group # to use */
- uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t host_delete_delay; /* Stale host delete delay */
+ uint16_t pgchunk_delete_delay;
uint16_t pg_delete_delay; /* Stale portgroup del delay */
uint16_t st_syn_ttl; /* TCP syn expire */
uint16_t st_close_ttl; /* TCP fin expire */
uint16_t st_estab_ttl; /* TCP established expire */
uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */
- uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
struct nat64_config base;
#define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE)
+#define NAT64LSN_ANYPREFIX 0x00000100
+ struct mtx periodic_lock;
struct callout periodic;
struct callout jcallout;
- struct ip_fw_chain *ch;
struct vnet *vp;
struct nat64lsn_job_head jhead;
int jlen;
char name[64]; /* Nat instance name */
};
+/* CFG_LOCK protects cfg->hosts_hash from modification */
+#define CFG_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "cfg_lock", NULL, MTX_DEF)
+#define CFG_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define CFG_LOCK(p) mtx_lock(&(p)->lock)
+#define CFG_UNLOCK(p) mtx_unlock(&(p)->lock)
+
+#define CALLOUT_LOCK_INIT(p) \
+ mtx_init(&(p)->periodic_lock, "periodic_lock", NULL, MTX_DEF)
+#define CALLOUT_LOCK_DESTROY(p) mtx_destroy(&(p)->periodic_lock)
+#define CALLOUT_LOCK(p) mtx_lock(&(p)->periodic_lock)
+#define CALLOUT_UNLOCK(p) mtx_unlock(&(p)->periodic_lock)
+
struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
- size_t numaddr);
+ in_addr_t prefix, int plen);
void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_init_internal(void);
@@ -237,114 +252,4 @@ void nat64lsn_uninit_internal(void);
int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done);
-void
-nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
- const char *px, int off);
-/*
- * Portgroup layout
- * addr x nat_proto x port_off
- *
- */
-
-#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
-#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
-
-#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
-#define __GET_PORTGROUP_IDX(_proto, _port) \
- ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
- ((_port) >> NAT64_CHUNK_SIZE_BITS))
-
-#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
- GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
- __GET_PORTGROUP_IDX(_proto, _port)
-#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
- ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
-
-#define PORTGROUP_CHUNK(_nh, _idx) \
- ((_nh)->pg_ptr[(_idx)])
-#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
- (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
- [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
-
-
-/* Chained hash table */
-#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _x = _PX##first(_ph, _buck); \
- for ( ; _x != NULL; _x = _PX##next(_x)) { \
- if (_PX##cmp(_key, _PX##val(_x))) \
- break; \
- } \
- if (_x == NULL) \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
- _PX##unlock(_ph, _buck);
-
-#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
- unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _PX##next(_i) = _PX##first(_ph, _buck); \
- _PX##first(_ph, _buck) = _i; \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _x = _PX##first(_ph, _buck); \
- _tmp = NULL; \
- for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
- if (_PX##cmp(_key, _PX##val(_x))) \
- break; \
- } \
- if (_x != NULL) { \
- if (_tmp == NULL) \
- _PX##first(_ph, _buck) = _PX##next(_x); \
- else \
- _PX##next(_tmp) = _PX##next(_x); \
- } \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
- for (unsigned int _i = 0; _i < _hsize; _i++) { \
- _PX##lock(_ph, _i); \
- _x = _PX##first(_ph, _i); \
- _tmp = NULL; \
- for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
- if (_cb(_x, _arg) == 0) \
- continue; \
- if (_tmp == NULL) \
- _PX##first(_ph, _i) = _PX##next(_x); \
- else \
- _tmp = _PX##next(_x); \
- } \
- _PX##unlock(_ph, _i); \
- } \
-} while(0)
-
-#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
- unsigned int _buck; \
- for (unsigned int _i = 0; _i < _hsize; _i++) { \
- _x = _PX##first(_ph, _i); \
- _y = _x; \
- while (_y != NULL) { \
- _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
- _y = _PX##next(_x); \
- _PX##next(_x) = _PX##first(_nph, _buck); \
- _PX##first(_nph, _buck) = _x; \
- } \
- } \
-} while(0)
-
#endif /* _IP_FW_NAT64LSN_H_ */
-
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
index 6bb48d29e382..65481a88d64e 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn_control.c
+++ b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
@@ -43,10 +45,8 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sockopt.h>
-#include <sys/queue.h>
#include <net/if.h>
-#include <net/pfil.h>
#include <netinet/in.h>
#include <netinet/ip.h>
@@ -75,12 +75,6 @@ static void
nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
{
- if (uc->max_ports == 0)
- uc->max_ports = NAT64LSN_MAX_PORTS;
- else
- uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
- if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
- uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
if (uc->jmaxlen == 0)
uc->jmaxlen = NAT64LSN_JMAXLEN;
if (uc->jmaxlen > 65536)
@@ -99,6 +93,13 @@ nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
uc->st_udp_ttl = NAT64LSN_UDP_AGE;
if (uc->st_icmp_ttl == 0)
uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+
+ if (uc->states_chunks == 0)
+ uc->states_chunks = 1;
+ else if (uc->states_chunks >= 128)
+ uc->states_chunks = 128;
+ else if (!powerof2(uc->states_chunks))
+ uc->states_chunks = 1 << fls(uc->states_chunks);
}
/*
@@ -127,12 +128,20 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
if (ipfw_check_object_name_generic(uc->name) != 0)
return (EINVAL);
- if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+ if (uc->set >= IPFW_MAX_SETS)
return (EINVAL);
if (uc->plen4 > 32)
return (EINVAL);
- if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0)
+
+ /*
+ * Unspecified address has special meaning. But it must
+ * have valid prefix length. This length will be used to
+ * correctly extract and embedd IPv4 address into IPv6.
+ */
+ if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
+ IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
+ nat64_check_prefixlen(uc->plen6) != 0)
return (EINVAL);
/* XXX: Check prefix4 to be global */
@@ -140,14 +149,6 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
mask4 = ~((1 << (32 - uc->plen4)) - 1);
if ((addr4 & mask4) != addr4)
return (EINVAL);
- if (uc->min_port == 0)
- uc->min_port = NAT64_MIN_PORT;
- if (uc->max_port == 0)
- uc->max_port = 65535;
- if (uc->min_port > uc->max_port)
- return (EINVAL);
- uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
- uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
nat64lsn_default_config(uc);
@@ -159,7 +160,7 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
IPFW_UH_RUNLOCK(ch);
- cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+ cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
strlcpy(cfg->name, uc->name, sizeof(cfg->name));
cfg->no.name = cfg->name;
cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
@@ -170,20 +171,12 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
cfg->base.flags |= NAT64_WKPFX;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
+ cfg->base.flags |= NAT64LSN_ANYPREFIX;
- cfg->prefix4 = addr4;
- cfg->pmask4 = addr4 | ~mask4;
- cfg->plen4 = uc->plen4;
-
- cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
- cfg->agg_prefix_len = uc->agg_prefix_len;
- cfg->agg_prefix_max = uc->agg_prefix_max;
-
- cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
- cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
-
+ cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
- cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl;
@@ -249,7 +242,7 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
if (cfg->no.refcnt > 0) {
@@ -272,6 +265,8 @@ static void
export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
struct ipfw_nat64lsn_stats *stats)
{
+ struct nat64lsn_alias *alias;
+ int i, j;
__COPY_STAT_FIELD(cfg, stats, opcnt64);
__COPY_STAT_FIELD(cfg, stats, opcnt46);
@@ -299,10 +294,16 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
__COPY_STAT_FIELD(cfg, stats, spgcreated);
__COPY_STAT_FIELD(cfg, stats, spgdeleted);
- stats->hostcount = cfg->ihcount;
- stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
- stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
- stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+ stats->hostcount = cfg->hosts_count;
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+ alias = &cfg->aliases[i];
+ for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
+ stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
+ for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
+ stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
+ for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
+ stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
+ }
}
#undef __COPY_STAT_FIELD
@@ -312,12 +313,9 @@ nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
{
uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
- uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
- uc->agg_prefix_len = cfg->agg_prefix_len;
- uc->agg_prefix_max = cfg->agg_prefix_max;
-
+ uc->states_chunks = cfg->states_chunks;
uc->jmaxlen = cfg->jmaxlen;
- uc->nh_delete_delay = cfg->nh_delete_delay;
+ uc->nh_delete_delay = cfg->host_delete_delay;
uc->pg_delete_delay = cfg->pg_delete_delay;
uc->st_syn_ttl = cfg->st_syn_ttl;
uc->st_close_ttl = cfg->st_close_ttl;
@@ -425,7 +423,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
- return (EEXIST);
+ return (ENOENT);
}
nat64lsn_export_config(ch, cfg, uc);
IPFW_UH_RUNLOCK(ch);
@@ -438,18 +436,18 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
- return (EEXIST);
+ return (ENOENT);
}
/*
* For now allow to change only following values:
* jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
- * tcp_est_age, udp_age, icmp_age, flags, max_ports.
+ * tcp_est_age, udp_age, icmp_age, flags, states_chunks.
*/
- cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
- cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl;
@@ -496,7 +494,7 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
export_stats(ch, cfg, &stats);
@@ -538,163 +536,176 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
IPFW_UH_WUNLOCK(ch);
return (0);
}
+#ifdef __LP64__
+#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n))
+#else
+#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \
+ ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
+#endif
/*
* Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
* ipfw_nat64lsn_state x count, ... ] ]
*/
static int
-export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
- ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
+ struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
{
- ipfw_nat64lsn_state *ste;
- struct nat64lsn_state *st;
- int i, count;
+ ipfw_nat64lsn_state_v1 *s;
+ struct nat64lsn_state *state;
+ uint64_t freemask;
+ uint32_t i, count;
- NAT64_LOCK(pg->host);
- count = 0;
- for (i = 0; i < 64; i++) {
- if (PG_IS_BUSY_IDX(pg, i))
- count++;
- }
- DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
+ /* validate user input */
+ if (idx->chunk > pg->chunks_count - 1)
+ return (EINVAL);
- if (count == 0) {
- stg->count = 0;
- NAT64_UNLOCK(pg->host);
- return (0);
- }
- ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
- count * sizeof(ipfw_nat64lsn_state));
- if (ste == NULL) {
- NAT64_UNLOCK(pg->host);
- return (1);
- }
+ FREEMASK_COPY(pg, idx->chunk, freemask);
+ count = 64 - bitcount64(freemask);
+ if (count == 0)
+ return (0); /* Try next PG/chunk */
+
+ DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
+ (uintmax_t)idx->index, count);
+
+ s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
+ count * sizeof(ipfw_nat64lsn_state_v1));
+ if (s == NULL)
+ return (ENOMEM);
- stg->alias4.s_addr = pg->aaddr;
- stg->proto = nat64lsn_rproto_map[pg->nat_proto];
- stg->flags = 0;
- stg->host6 = pg->host->addr;
- stg->count = count;
for (i = 0; i < 64; i++) {
- if (PG_IS_FREE_IDX(pg, i))
+ if (ISSET64(freemask, i))
continue;
- st = &pg->states[i];
- ste->daddr.s_addr = st->u.s.faddr;
- ste->dport = st->u.s.fport;
- ste->aport = pg->aport + i;
- ste->sport = st->u.s.lport;
- ste->flags = st->flags; /* XXX filter flags */
- ste->idle = GET_AGE(st->timestamp);
- ste++;
+ state = pg->chunks_count == 1 ? &pg->states->state[i] :
+ &pg->states_chunk[idx->chunk]->state[i];
+
+ s->host6 = state->host->addr;
+ s->daddr.s_addr = htonl(state->ip_dst);
+ s->dport = state->dport;
+ s->sport = state->sport;
+ s->aport = state->aport;
+ s->flags = (uint8_t)(state->flags & 7);
+ s->proto = state->proto;
+ s->idle = GET_AGE(state->timestamp);
+ s++;
}
- NAT64_UNLOCK(pg->host);
-
+ *ret_count = count;
return (0);
}
+#define LAST_IDX 0xFF
static int
-get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
+nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
+ union nat64lsn_pgidx *idx)
{
- if (*port < 65536 - NAT64_CHUNK_SIZE) {
- *port += NAT64_CHUNK_SIZE;
- return (0);
+ /* First iterate over chunks */
+ if (pg != NULL) {
+ if (idx->chunk < pg->chunks_count - 1) {
+ idx->chunk++;
+ return (0);
+ }
}
- *port = 0;
-
- if (*nat_proto < NAT_MAX_PROTO - 1) {
- *nat_proto += 1;
+ idx->chunk = 0;
+ /* Then over PGs */
+ if (idx->port < UINT16_MAX - 64) {
+ idx->port += 64;
return (0);
}
- *nat_proto = 1;
-
- if (*addr < cfg->pmask4) {
- *addr += 1;
+ idx->port = NAT64_MIN_PORT;
+ /* Then over supported protocols */
+ switch (idx->proto) {
+ case IPPROTO_ICMP:
+ idx->proto = IPPROTO_TCP;
return (0);
+ case IPPROTO_TCP:
+ idx->proto = IPPROTO_UDP;
+ return (0);
+ default:
+ idx->proto = IPPROTO_ICMP;
}
-
- /* End of space. */
- return (1);
+ /* And then over IPv4 alias addresses */
+ if (idx->addr < cfg->pmask4) {
+ idx->addr++;
+ return (1); /* New states group is needed */
+ }
+ idx->index = LAST_IDX;
+ return (-1); /* No more states */
}
-#define PACK_IDX(addr, proto, port) \
- ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
-#define UNPACK_IDX(idx, addr, proto, port) \
- (addr) = (uint32_t)((idx) >> 32); \
- (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
- (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
-
-static struct nat64lsn_portgroup *
-get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
+static struct nat64lsn_pg*
+nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
{
- struct nat64lsn_portgroup *pg;
- uint64_t pre_pack, post_pack;
-
- pg = NULL;
- pre_pack = PACK_IDX(*addr, *nat_proto, *port);
- for (;;) {
- if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
- /* End of states */
- return (pg);
- }
-
- pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
- if (pg != NULL)
- break;
+ struct nat64lsn_alias *alias;
+ int pg_idx;
+
+ alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
+ MPASS(alias->addr == idx->addr);
+
+ pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
+ switch (idx->proto) {
+ case IPPROTO_ICMP:
+ if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
+ case IPPROTO_TCP:
+ if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
+ case IPPROTO_UDP:
+ if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
}
-
- post_pack = PACK_IDX(*addr, *nat_proto, *port);
- if (pre_pack == post_pack)
- DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
- *addr, *nat_proto, *port);
- return (pg);
+ return (NULL);
}
-static NAT64NOINLINE struct nat64lsn_portgroup *
-get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- struct nat64lsn_portgroup *pg;
- pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
- if (pg == NULL)
- pg = get_next_pg(cfg, addr, nat_proto, port);
-
- return (pg);
+ /* TODO: implement states listing for old ipfw(8) binaries */
+ return (EOPNOTSUPP);
}
/*
* Lists nat64lsn states.
- * Data layout (v0)(current):
+ * Data layout (v1)(current):
* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
* Reply: [ ipfw_obj_header ipfw_obj_data [
- * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ * ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
*
* Returns 0 on success
*/
static int
-nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_obj_header *oh;
ipfw_obj_data *od;
- ipfw_nat64lsn_stg *stg;
+ ipfw_nat64lsn_stg_v1 *stg;
struct nat64lsn_cfg *cfg;
- struct nat64lsn_portgroup *pg, *pg_next;
- uint64_t next_idx;
+ struct nat64lsn_pg *pg;
+ union nat64lsn_pgidx idx;
size_t sz;
- uint32_t addr, states;
- uint16_t port;
- uint8_t nat_proto;
+ uint32_t count, total;
+ int ret;
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
sizeof(uint64_t);
@@ -708,78 +719,96 @@ nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
od->head.length != sz - sizeof(ipfw_obj_header))
return (EINVAL);
- next_idx = *(uint64_t *)(od + 1);
- /* Translate index to the request position to start from */
- UNPACK_IDX(next_idx, addr, nat_proto, port);
- if (nat_proto >= NAT_MAX_PROTO)
+ idx.index = *(uint64_t *)(od + 1);
+ if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
+ idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
return (EINVAL);
- if (nat_proto == 0 && addr != 0)
+ if (idx.index == LAST_IDX)
return (EINVAL);
IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
- /* Fill in starting point */
- if (addr == 0) {
- addr = cfg->prefix4;
- nat_proto = 1;
- port = 0;
+ if (idx.index == 0) { /* Fill in starting point */
+ idx.addr = cfg->prefix4;
+ idx.proto = IPPROTO_ICMP;
+ idx.port = NAT64_MIN_PORT;
}
- if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
+ idx.port < NAT64_MIN_PORT) {
IPFW_UH_RUNLOCK(ch);
- DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
- (uintmax_t)next_idx, addr, cfg->pmask4);
return (EINVAL);
}
-
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
- sizeof(ipfw_nat64lsn_stg);
- if (sd->valsize < sz)
+ sizeof(ipfw_nat64lsn_stg_v1);
+ if (sd->valsize < sz) {
+ IPFW_UH_RUNLOCK(ch);
return (ENOMEM);
+ }
oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
od = (ipfw_obj_data *)(oh + 1);
od->head.type = IPFW_TLV_OBJDATA;
od->head.length = sz - sizeof(ipfw_obj_header);
- stg = (ipfw_nat64lsn_stg *)(od + 1);
-
- pg = get_first_pg(cfg, &addr, &nat_proto, &port);
- if (pg == NULL) {
- /* No states */
- stg->next_idx = 0xFF;
- stg->count = 0;
- IPFW_UH_RUNLOCK(ch);
- return (0);
- }
- states = 0;
- pg_next = NULL;
- while (pg != NULL) {
- pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
- if (pg_next == NULL)
- stg->next_idx = 0xFF;
- else
- stg->next_idx = PACK_IDX(addr, nat_proto, port);
-
- if (export_pg_states(cfg, pg, stg, sd) != 0) {
- IPFW_UH_RUNLOCK(ch);
- return (states == 0 ? ENOMEM: 0);
+ stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
+ stg->count = total = 0;
+ stg->next.index = idx.index;
+ /*
+ * Acquire CALLOUT_LOCK to avoid races with expiration code.
+ * Thus states, hosts and PGs will not expire while we hold it.
+ */
+ CALLOUT_LOCK(cfg);
+ ret = 0;
+ do {
+ pg = nat64lsn_get_pg_byidx(cfg, &idx);
+ if (pg != NULL) {
+ count = 0;
+ ret = nat64lsn_export_states_v1(cfg, &idx, pg,
+ sd, &count);
+ if (ret != 0)
+ break;
+ if (count > 0) {
+ stg->count += count;
+ total += count;
+ /* Update total size of reply */
+ od->head.length +=
+ count * sizeof(ipfw_nat64lsn_state_v1);
+ sz += count * sizeof(ipfw_nat64lsn_state_v1);
+ }
+ stg->alias4.s_addr = htonl(idx.addr);
}
- states += stg->count;
- od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
- sz += stg->count * sizeof(ipfw_nat64lsn_state);
- if (pg_next != NULL) {
- sz += sizeof(ipfw_nat64lsn_stg);
- if (sd->valsize < sz)
+ /* Determine new index */
+ switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
+ case -1:
+ ret = ENOENT; /* End of search */
+ break;
+ case 1: /*
+ * Next alias address, new group may be needed.
+ * If states count is zero, use this group.
+ */
+ if (stg->count == 0)
+ continue;
+ /* Otherwise try to create new group */
+ sz += sizeof(ipfw_nat64lsn_stg_v1);
+ if (sd->valsize < sz) {
+ ret = ENOMEM;
break;
- stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
- sizeof(ipfw_nat64lsn_stg));
+ }
+ /* Save next index in current group */
+ stg->next.index = idx.index;
+ stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg_v1));
+ od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
+ stg->count = 0;
+ break;
}
- pg = pg_next;
- }
+ stg->next.index = idx.index;
+ } while (ret == 0);
+ CALLOUT_UNLOCK(cfg);
IPFW_UH_RUNLOCK(ch);
- return (0);
+ return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
}
static struct ipfw_sopt_handler scodes[] = {
@@ -789,7 +818,8 @@ static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
{ IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
{ IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
- { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states_v0 },
+ { IP_FW_NAT64LSN_LIST_STATES,1, HDIR_GET, nat64lsn_states_v1 },
};
static int