diff options
author | Max Laier <mlaier@FreeBSD.org> | 2004-02-26 02:04:28 +0000 |
---|---|---|
committer | Max Laier <mlaier@FreeBSD.org> | 2004-02-26 02:04:28 +0000 |
commit | 5c45a928b95416a77855a8da21bad4df8f60e7a0 (patch) | |
tree | 0b6b682f610aa724f175d04a78bd61a7ab6e127f | |
download | src-test2-5c45a928b95416a77855a8da21bad4df8f60e7a0.tar.gz src-test2-5c45a928b95416a77855a8da21bad4df8f60e7a0.zip |
Notes
-rw-r--r-- | sys/contrib/pf/net/if_pflog.c | 231 | ||||
-rw-r--r-- | sys/contrib/pf/net/if_pflog.h | 75 | ||||
-rw-r--r-- | sys/contrib/pf/net/if_pfsync.c | 359 | ||||
-rw-r--r-- | sys/contrib/pf/net/if_pfsync.h | 84 | ||||
-rw-r--r-- | sys/contrib/pf/net/pf.c | 5263 | ||||
-rw-r--r-- | sys/contrib/pf/net/pf_ioctl.c | 2158 | ||||
-rw-r--r-- | sys/contrib/pf/net/pf_norm.c | 1528 | ||||
-rw-r--r-- | sys/contrib/pf/net/pf_osfp.c | 524 | ||||
-rw-r--r-- | sys/contrib/pf/net/pf_table.c | 2018 | ||||
-rw-r--r-- | sys/contrib/pf/net/pfvar.h | 1264 | ||||
-rw-r--r-- | sys/contrib/pf/netinet/in4_cksum.c | 219 |
11 files changed, 13723 insertions, 0 deletions
diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c new file mode 100644 index 000000000000..e856f2cc965f --- /dev/null +++ b/sys/contrib/pf/net/if_pflog.c @@ -0,0 +1,231 @@ +/* $OpenBSD: if_pflog.c,v 1.9 2003/05/14 08:42:00 canacar Exp $ */ +/* + * The authors of this code are John Ioannidis (ji@tla.org), + * Angelos D. Keromytis (kermit@csd.uch.gr) and + * Niels Provos (provos@physnet.uni-hamburg.de). + * + * This code was written by John Ioannidis for BSD/OS in Athens, Greece, + * in November 1995. + * + * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, + * by Angelos D. Keromytis. + * + * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis + * and Niels Provos. + * + * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis + * and Niels Provos. + * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos. + * + * Permission to use, copy, and modify this software with or without fee + * is hereby granted, provided that this entire notice is included in + * all copies of any software which is or includes a copy or + * modification of this software. + * You may use this code under the GNU public license if you so wish. Please + * contribute changes back to the authors under this freer than GPL license + * so that we may further the use of strong encryption without limitations to + * all. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE + * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR + * PURPOSE. + */ + +#include "bpfilter.h" +#include "pflog.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/ioctl.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/route.h> +#include <net/bpf.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#endif + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet6/nd6.h> +#endif /* INET6 */ + +#include <net/pfvar.h> +#include <net/if_pflog.h> + +#define PFLOGMTU (32768 + MHLEN + MLEN) + +#ifdef PFLOGDEBUG +#define DPRINTF(x) do { if (pflogdebug) printf x ; } while (0) +#else +#define DPRINTF(x) +#endif + +struct pflog_softc pflogif[NPFLOG]; + +void pflogattach(int); +int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int pflogioctl(struct ifnet *, u_long, caddr_t); +void pflogrtrequest(int, struct rtentry *, struct sockaddr *); +void pflogstart(struct ifnet *); + +extern int ifqmaxlen; + +void +pflogattach(int npflog) +{ + struct ifnet *ifp; + int i; + + bzero(pflogif, sizeof(pflogif)); + + for (i = 0; i < NPFLOG; i++) { + ifp = &pflogif[i].sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", i); + ifp->if_softc = &pflogif[i]; + ifp->if_mtu = PFLOGMTU; + ifp->if_ioctl = pflogioctl; + ifp->if_output = pflogoutput; + ifp->if_start = pflogstart; + ifp->if_type = IFT_PFLOG; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFLOG_HDRLEN; + if_attach(ifp); + if_alloc_sadl(ifp); + +#if NBPFILTER > 0 + bpfattach(&pflogif[i].sc_if.if_bpf, ifp, DLT_PFLOG, + PFLOG_HDRLEN); +#endif + } +} + +/* + * Start output on the pflog interface. + */ +void +pflogstart(struct ifnet *ifp) +{ + struct mbuf *m; + int s; + + for (;;) { + s = splimp(); + IF_DROP(&ifp->if_snd); + IF_DEQUEUE(&ifp->if_snd, m); + splx(s); + + if (m == NULL) + return; + else + m_freem(m); + } +} + +int +pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + m_freem(m); + return (0); +} + +/* ARGSUSED */ +void +pflogrtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) +{ + if (rt) + rt->rt_rmx.rmx_mtu = PFLOGMTU; +} + +/* ARGSUSED */ +int +pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + switch (cmd) { + case SIOCSIFADDR: + case SIOCAIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + break; + default: + return (EINVAL); + } + + return (0); +} + +int +pflog_packet(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int8_t dir, + u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, + struct pf_ruleset *ruleset) +{ +#if NBPFILTER > 0 + struct ifnet *ifn; + struct pfloghdr hdr; + struct mbuf m1; + + if (ifp == NULL || m == NULL || rm == NULL) + return (-1); + + hdr.length = PFLOG_REAL_HDRLEN; + hdr.af = af; + hdr.action = rm->action; + hdr.reason = reason; + memcpy(hdr.ifname, ifp->if_xname, sizeof(hdr.ifname)); + + if (am == NULL) { + hdr.rulenr = htonl(rm->nr); + hdr.subrulenr = -1; + bzero(hdr.ruleset, sizeof(hdr.ruleset)); + } else { + hdr.rulenr = htonl(am->nr); + hdr.subrulenr = htonl(rm->nr); + if (ruleset == NULL) + bzero(hdr.ruleset, sizeof(hdr.ruleset)); + else + memcpy(hdr.ruleset, ruleset->name, + sizeof(hdr.ruleset)); + + + } + hdr.dir = dir; + +#ifdef INET + if (af == AF_INET && dir == PF_OUT) { + struct ip *ip; + + ip = mtod(m, struct ip *); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + } +#endif /* INET */ + + m1.m_next = m; + m1.m_len = PFLOG_HDRLEN; + m1.m_data = (char *) &hdr; + + ifn = &(pflogif[0].sc_if); + + if (ifn->if_bpf) + bpf_mtap(ifn->if_bpf, &m1); +#endif + + return (0); +} diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h new file mode 100644 index 000000000000..9333f48eebf7 --- /dev/null +++ b/sys/contrib/pf/net/if_pflog.h @@ -0,0 +1,75 @@ +/* $OpenBSD: if_pflog.h,v 1.9 2003/07/15 20:27:27 dhartmei Exp $ */ +/* + * Copyright 2001 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_IF_PFLOG_H_ +#define _NET_IF_PFLOG_H_ + +struct pflog_softc { + struct ifnet sc_if; /* the interface */ +}; + +/* XXX keep in sync with pfvar.h */ +#ifndef PF_RULESET_NAME_SIZE +#define PF_RULESET_NAME_SIZE 16 +#endif + +struct pfloghdr { + u_int8_t length; + sa_family_t af; + u_int8_t action; + u_int8_t reason; + char ifname[IFNAMSIZ]; + char ruleset[PF_RULESET_NAME_SIZE]; + u_int32_t rulenr; + u_int32_t subrulenr; + u_int8_t dir; + u_int8_t pad[3]; +}; + +#define PFLOG_HDRLEN sizeof(struct pfloghdr) +/* minus pad, also used as a signature */ +#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad); + +/* XXX remove later when old format logs are no longer needed */ +struct old_pfloghdr { + u_int32_t af; + char ifname[IFNAMSIZ]; + short rnr; + u_short reason; + u_short action; + u_short dir; +}; +#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) + +#ifdef _KERNEL + +#if NPFLOG > 0 +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g) +#else +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0) +#endif /* NPFLOG > 0 */ +#endif /* _KERNEL */ +#endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c new file mode 100644 index 000000000000..e29a06ec8615 --- /dev/null +++ b/sys/contrib/pf/net/if_pfsync.c @@ -0,0 +1,359 @@ +/* $OpenBSD: if_pfsync.c,v 1.6 2003/06/21 09:07:01 djm Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bpfilter.h" +#include "pfsync.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/time.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/timeout.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/route.h> +#include <net/bpf.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/in_var.h> +#endif + +#ifdef INET6 +#ifndef INET +#include <netinet/in.h> +#endif +#include <netinet6/nd6.h> +#endif /* INET6 */ + +#include <net/pfvar.h> +#include <net/if_pfsync.h> + +#define PFSYNC_MINMTU \ + (sizeof(struct pfsync_header) + sizeof(struct pf_state)) + +#ifdef PFSYNCDEBUG +#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) +int pfsyncdebug; +#else +#define DPRINTF(x) +#endif + +struct pfsync_softc pfsyncif; + +void pfsyncattach(int); +void pfsync_setmtu(struct pfsync_softc *sc, int); +int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int pfsyncioctl(struct ifnet *, u_long, caddr_t); +void pfsyncstart(struct ifnet *); + +struct mbuf *pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action); +int pfsync_sendout(struct pfsync_softc *sc); +void pfsync_timeout(void *v); + +extern int ifqmaxlen; + +void +pfsyncattach(int npfsync) +{ + struct ifnet *ifp; + + pfsyncif.sc_mbuf = NULL; + pfsyncif.sc_ptr = NULL; + pfsyncif.sc_count = 8; + ifp = &pfsyncif.sc_if; + strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname); + ifp->if_softc = &pfsyncif; + ifp->if_ioctl = pfsyncioctl; + ifp->if_output = pfsyncoutput; + ifp->if_start = pfsyncstart; + ifp->if_type = IFT_PFSYNC; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFSYNC_HDRLEN; + ifp->if_baudrate = IF_Mbps(100); + pfsync_setmtu(&pfsyncif, MCLBYTES); + timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif); + if_attach(ifp); + if_alloc_sadl(ifp); + +#if NBPFILTER > 0 + bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); +#endif +} + +/* + * Start output on the pfsync interface. + */ +void +pfsyncstart(struct ifnet *ifp) +{ + struct mbuf *m; + int s; + + for (;;) { + s = splimp(); + IF_DROP(&ifp->if_snd); + IF_DEQUEUE(&ifp->if_snd, m); + splx(s); + + if (m == NULL) + return; + else + m_freem(m); + } +} + +int +pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + m_freem(m); + return (0); +} + +/* ARGSUSED */ +int +pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct pfsync_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int s; + + switch (cmd) { + case SIOCSIFADDR: + case SIOCAIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + break; + case SIOCSIFMTU: + if (ifr->ifr_mtu < PFSYNC_MINMTU) + return (EINVAL); + if (ifr->ifr_mtu > MCLBYTES) + ifr->ifr_mtu = MCLBYTES; + s = splnet(); + if (ifr->ifr_mtu < ifp->if_mtu) + pfsync_sendout(sc); + pfsync_setmtu(sc, ifr->ifr_mtu); + splx(s); + break; + default: + return (ENOTTY); + } + + return (0); +} + +void +pfsync_setmtu(sc, mtu) + struct pfsync_softc *sc; + int mtu; +{ + sc->sc_count = (mtu - sizeof(struct pfsync_header)) / + sizeof(struct pf_state); + sc->sc_if.if_mtu = sizeof(struct pfsync_header) + + sc->sc_count * sizeof(struct pf_state); +} + +struct mbuf * +pfsync_get_mbuf(sc, action) + struct pfsync_softc *sc; + u_int8_t action; +{ + extern int hz; + struct pfsync_header *h; + struct mbuf *m; + int len; + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == NULL) { + sc->sc_if.if_oerrors++; + return (NULL); + } + + len = sc->sc_if.if_mtu; + if (len > MHLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + sc->sc_if.if_oerrors++; + return (NULL); + } + } + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = m->m_len = len; + + h = mtod(m, struct pfsync_header *); + h->version = PFSYNC_VERSION; + h->af = 0; + h->count = 0; + h->action = action; + + sc->sc_mbuf = m; + sc->sc_ptr = (struct pf_state *)((char *)h + PFSYNC_HDRLEN); + timeout_add(&sc->sc_tmo, hz); + + return (m); +} + +int +pfsync_pack_state(action, st) + u_int8_t action; + struct pf_state *st; +{ + extern struct timeval time; + struct ifnet *ifp = &pfsyncif.sc_if; + struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_header *h; + struct pf_state *sp; + struct pf_rule *r = st->rule.ptr; + struct mbuf *m; + u_long secs; + int s, ret; + + if (action >= PFSYNC_ACT_MAX) + return (EINVAL); + + s = splnet(); + m = sc->sc_mbuf; + if (m == NULL) { + if ((m = pfsync_get_mbuf(sc, action)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(m, struct pfsync_header *); + } else { + h = mtod(m, struct pfsync_header *); + if (h->action != action) { + pfsync_sendout(sc); + if ((m = pfsync_get_mbuf(sc, action)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(m, struct pfsync_header *); + } + } + + sp = sc->sc_ptr++; + h->count++; + bzero(sp, sizeof(*sp)); + + bcopy(&st->lan, &sp->lan, sizeof(sp->lan)); + bcopy(&st->gwy, &sp->gwy, sizeof(sp->gwy)); + bcopy(&st->ext, &sp->ext, sizeof(sp->ext)); + + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + secs = time.tv_sec; + sp->creation = htonl(secs - st->creation); + if (st->expire <= secs) + sp->expire = htonl(0); + else + sp->expire = htonl(st->expire - secs); + sp->packets[0] = htonl(st->packets[0]); + sp->packets[1] = htonl(st->packets[1]); + sp->bytes[0] = htonl(st->bytes[0]); + sp->bytes[1] = htonl(st->bytes[1]); + if (r == NULL) + sp->rule.nr = htonl(-1); + else + sp->rule.nr = htonl(r->nr); + sp->af = st->af; + sp->proto = st->proto; + sp->direction = st->direction; + sp->log = st->log; + sp->allow_opts = st->allow_opts; + + ret = 0; + if (h->count == sc->sc_count) + ret = pfsync_sendout(sc); + + splx(s); + return (0); +} + +int +pfsync_clear_state(st) + struct pf_state *st; +{ + struct ifnet *ifp = &pfsyncif.sc_if; + struct pfsync_softc *sc = ifp->if_softc; + struct mbuf *m = sc->sc_mbuf; + int s, ret; + + s = splnet(); + if (m == NULL && (m = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR)) == NULL) { + splx(s); + return (ENOMEM); + } + + ret = (pfsync_sendout(sc)); + splx(s); + return (ret); +} + +void +pfsync_timeout(void *v) +{ + struct pfsync_softc *sc = v; + int s; + + s = splnet(); + pfsync_sendout(sc); + splx(s); +} + +int +pfsync_sendout(sc) + struct pfsync_softc *sc; +{ + struct ifnet *ifp = &sc->sc_if; + struct mbuf *m = sc->sc_mbuf; + + timeout_del(&sc->sc_tmo); + sc->sc_mbuf = NULL; + sc->sc_ptr = NULL; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m); +#endif + + m_freem(m); + + return (0); +} diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h new file mode 100644 index 000000000000..9fff97fea8e1 --- /dev/null +++ b/sys/contrib/pf/net/if_pfsync.h @@ -0,0 +1,84 @@ +/* $OpenBSD: if_pfsync.h,v 1.2 2002/12/11 18:31:26 mickey Exp $ */ + +/* + * Copyright (c) 2001 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_IF_PFSYNC_H_ +#define _NET_IF_PFSYNC_H_ + +#ifdef _KERNEL +struct pfsync_softc { + struct ifnet sc_if; + + struct timeout sc_tmo; + struct mbuf *sc_mbuf; /* current cummulative mbuf */ + struct pf_state *sc_ptr; /* current ongoing state */ + int sc_count; /* number of states in one mtu */ +}; +#endif + +struct pfsync_header { + u_int8_t version; +#define PFSYNC_VERSION 1 + u_int8_t af; + u_int8_t action; +#define PFSYNC_ACT_CLR 0 +#define PFSYNC_ACT_INS 1 +#define PFSYNC_ACT_UPD 2 +#define PFSYNC_ACT_DEL 3 +#define PFSYNC_ACT_MAX 4 + u_int8_t count; +}; + +#define PFSYNC_HDRLEN sizeof(struct pfsync_header) +#define PFSYNC_ACTIONS \ + "CLR ST", "INS ST", "UPD ST", "DEL ST" + +#define pf_state_peer_hton(s,d) do { \ + (d)->seqlo = htonl((s)->seqlo); \ + (d)->seqhi = htonl((s)->seqhi); \ + (d)->seqdiff = htonl((s)->seqdiff); \ + (d)->max_win = htons((s)->max_win); \ + (d)->state = (s)->state; \ +} while (0) + +#define pf_state_peer_ntoh(s,d) do { \ + (d)->seqlo = ntohl((s)->seqlo); \ + (d)->seqhi = ntohl((s)->seqhi); \ + (d)->seqdiff = ntohl((s)->seqdiff); \ + (d)->max_win = ntohs((s)->max_win); \ + (d)->state = (s)->state; \ +} while (0) + +#ifdef _KERNEL +int pfsync_clear_state(struct pf_state *); +int pfsync_pack_state(u_int8_t, struct pf_state *); +#define pfsync_insert_state(st) pfsync_pack_state(PFSYNC_ACT_INS, (st)) +#define pfsync_update_state(st) pfsync_pack_state(PFSYNC_ACT_UPD, (st)) +#define pfsync_delete_state(st) pfsync_pack_state(PFSYNC_ACT_DEL, (st)) +#endif + +#endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c new file mode 100644 index 000000000000..6d9517184247 --- /dev/null +++ b/sys/contrib/pf/net/pf.c @@ -0,0 +1,5263 @@ +/* $OpenBSD: pf.c,v 1.390 2003/09/24 17:18:03 mcbride Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include "bpfilter.h" +#include "pflog.h" +#include "pfsync.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/filio.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/kernel.h> +#include <sys/time.h> +#include <sys/pool.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/bpf.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_seq.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include <netinet/in_pcb.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> +#include <netinet/udp_var.h> +#include <netinet/icmp_var.h> + +#include <dev/rndvar.h> +#include <net/pfvar.h> +#include <net/if_pflog.h> +#include <net/if_pfsync.h> + +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet/in_pcb.h> +#include <netinet/icmp6.h> +#include <netinet6/nd6.h> +#endif /* INET6 */ + +#ifdef ALTQ +#include <altq/if_altq.h> +#endif + + +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +struct pf_state_tree; + +/* + * Global variables + */ + +struct pf_anchorqueue pf_anchors; +struct pf_ruleset pf_main_ruleset; +struct pf_altqqueue pf_altqs[2]; +struct pf_palist pf_pabuf; +struct pf_altqqueue *pf_altqs_active; +struct pf_altqqueue *pf_altqs_inactive; +struct pf_status pf_status; +struct ifnet *status_ifp; + +u_int32_t ticket_altqs_active; +u_int32_t ticket_altqs_inactive; +u_int32_t ticket_pabuf; + +struct timeout pf_expire_to; /* expire timeout */ + +struct pool pf_tree_pl, pf_rule_pl, pf_addr_pl; +struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; + +void pf_dynaddr_update(void *); +void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); +void pf_print_state(struct pf_state *); +void pf_print_flags(u_int8_t); + +u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, + u_int8_t); +void pf_change_ap(struct pf_addr *, u_int16_t *, + u_int16_t *, u_int16_t *, struct pf_addr *, + u_int16_t, u_int8_t, sa_family_t); +#ifdef INET6 +void pf_change_a6(struct pf_addr *, u_int16_t *, + struct pf_addr *, u_int8_t); +#endif /* INET6 */ +void pf_change_icmp(struct pf_addr *, u_int16_t *, + struct pf_addr *, struct pf_addr *, u_int16_t, + u_int16_t *, u_int16_t *, u_int16_t *, + u_int16_t *, u_int8_t, sa_family_t); +void pf_send_tcp(const struct pf_rule *, sa_family_t, + const struct pf_addr *, const struct pf_addr *, + u_int16_t, u_int16_t, u_int32_t, u_int32_t, + u_int8_t, u_int16_t, u_int16_t, u_int8_t); +void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, + sa_family_t, struct pf_rule *); +struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct ifnet *, + struct pf_addr *, u_int16_t, struct pf_addr *, + u_int16_t, int); +struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct ifnet *, + struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t *); +int pf_test_tcp(struct pf_rule **, struct pf_state **, + int, struct ifnet *, struct mbuf *, int, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_udp(struct pf_rule **, struct pf_state **, + int, struct ifnet *, struct mbuf *, int, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_icmp(struct pf_rule **, struct pf_state **, + int, struct ifnet *, struct mbuf *, int, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_other(struct pf_rule **, struct pf_state **, + int, struct ifnet *, struct mbuf *, int, void *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_fragment(struct pf_rule **, int, + struct ifnet *, struct mbuf *, void *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_state_tcp(struct pf_state **, int, + struct ifnet *, struct mbuf *, int, int, + void *, struct pf_pdesc *, u_short *); +int pf_test_state_udp(struct pf_state **, int, + struct ifnet *, struct mbuf *, int, int, + void *, struct pf_pdesc *); +int pf_test_state_icmp(struct pf_state **, int, + struct ifnet *, struct mbuf *, int, int, + void *, struct pf_pdesc *); +int pf_test_state_other(struct pf_state **, int, + struct ifnet *, struct pf_pdesc *); +struct pf_tag *pf_get_tag(struct mbuf *); +int pf_match_tag(struct mbuf *, struct pf_rule *, + struct pf_rule *, struct pf_rule *, + struct pf_tag *, int *); +void pf_hash(struct pf_addr *, struct pf_addr *, + struct pf_poolhashkey *, sa_family_t); +int pf_map_addr(u_int8_t, struct pf_pool *, + struct pf_addr *, struct pf_addr *, + struct pf_addr *); +int pf_get_sport(sa_family_t, u_int8_t, struct pf_pool *, + struct pf_addr *, struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t); +void pf_route(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *); +void pf_route6(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *); +int pf_socket_lookup(uid_t *, gid_t *, int, sa_family_t, + int, struct pf_pdesc *); +u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, + sa_family_t); +u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, + sa_family_t); +u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, + u_int16_t); +void pf_set_rt_ifp(struct pf_state *, + struct pf_addr *); +int pf_check_proto_cksum(struct mbuf *, int, int, + u_int8_t, sa_family_t); +int pf_addr_wrap_neq(struct pf_addr_wrap *, + struct pf_addr_wrap *); + + +struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = + { { &pf_state_pl, PFSTATE_HIWAT }, { &pf_frent_pl, PFFRAG_FRENT_HIWAT } }; + +#define STATE_LOOKUP() \ + do { \ + if (direction == PF_IN) \ + *state = pf_find_state(&tree_ext_gwy, &key); \ + else \ + *state = pf_find_state(&tree_lan_ext, &key); \ + if (*state == NULL) \ + return (PF_DROP); \ + if (direction == PF_OUT && \ + (((*state)->rule.ptr->rt == PF_ROUTETO && \ + (*state)->rule.ptr->direction == PF_OUT) || \ + ((*state)->rule.ptr->rt == PF_REPLYTO && \ + (*state)->rule.ptr->direction == PF_IN)) && \ + (*state)->rt_ifp != NULL && \ + (*state)->rt_ifp != ifp) \ + return (PF_PASS); \ + } while (0) + +#define STATE_TRANSLATE(s) \ + (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \ + ((s)->af == AF_INET6 && \ + ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \ + (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \ + (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ + (s)->lan.port != (s)->gwy.port + +static __inline int pf_state_compare(struct pf_tree_node *, + struct pf_tree_node *); + +struct pf_state_tree tree_lan_ext, tree_ext_gwy; +RB_GENERATE(pf_state_tree, pf_tree_node, entry, pf_state_compare); + +static __inline int +pf_state_compare(struct pf_tree_node *a, struct pf_tree_node *b) +{ + int diff; + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) + return (1); + if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) + return (-1); + if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) + return (1); + if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) + return (-1); + if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) + return (1); + if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) + return (-1); + if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) + return (1); + if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) + return (-1); + if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) + return (1); + if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) + return (-1); + if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) + return (1); + if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) + return (-1); + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET6 */ + } + + if ((diff = a->port[0] - b->port[0]) != 0) + return (diff); + if ((diff = a->port[1] - b->port[1]) != 0) + return (diff); + + return (0); +} + +#ifdef INET6 +void +pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + dst->addr32[0] = src->addr32[0]; + break; +#endif /* INET */ + case AF_INET6: + dst->addr32[0] = src->addr32[0]; + dst->addr32[1] = src->addr32[1]; + dst->addr32[2] = src->addr32[2]; + dst->addr32[3] = src->addr32[3]; + break; + } +} +#endif + +struct pf_state * +pf_find_state(struct pf_state_tree *tree, struct pf_tree_node *key) +{ + struct pf_tree_node *k; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + k = RB_FIND(pf_state_tree, tree, key); + if (k) + return (k->state); + else + return (NULL); +} + +int +pf_insert_state(struct pf_state *state) +{ + struct pf_tree_node *keya, *keyb; + + keya = pool_get(&pf_tree_pl, PR_NOWAIT); + if (keya == NULL) + return (-1); + keya->state = state; + keya->proto = state->proto; + keya->af = state->af; + PF_ACPY(&keya->addr[0], &state->lan.addr, state->af); + keya->port[0] = state->lan.port; + PF_ACPY(&keya->addr[1], &state->ext.addr, state->af); + keya->port[1] = state->ext.port; + + /* Thou MUST NOT insert multiple duplicate keys */ + if (RB_INSERT(pf_state_tree, &tree_lan_ext, keya) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state insert failed: tree_lan_ext"); + printf(" lan: "); + pf_print_host(&state->lan.addr, state->lan.port, + state->af); + printf(" gwy: "); + pf_print_host(&state->gwy.addr, state->gwy.port, + state->af); + printf(" ext: "); + pf_print_host(&state->ext.addr, state->ext.port, + state->af); + printf("\n"); + } + pool_put(&pf_tree_pl, keya); + return (-1); + } + + keyb = pool_get(&pf_tree_pl, PR_NOWAIT); + if (keyb == NULL) { + /* Need to pull out the other state */ + RB_REMOVE(pf_state_tree, &tree_lan_ext, keya); + pool_put(&pf_tree_pl, keya); + return (-1); + } + keyb->state = state; + keyb->proto = state->proto; + keyb->af = state->af; + PF_ACPY(&keyb->addr[0], &state->ext.addr, state->af); + keyb->port[0] = state->ext.port; + PF_ACPY(&keyb->addr[1], &state->gwy.addr, state->af); + keyb->port[1] = state->gwy.port; + + if (RB_INSERT(pf_state_tree, &tree_ext_gwy, keyb) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state insert failed: tree_ext_gwy"); + printf(" lan: "); + pf_print_host(&state->lan.addr, state->lan.port, + state->af); + printf(" gwy: "); + pf_print_host(&state->gwy.addr, state->gwy.port, + state->af); + printf(" ext: "); + pf_print_host(&state->ext.addr, state->ext.port, + state->af); + printf("\n"); + } + RB_REMOVE(pf_state_tree, &tree_lan_ext, keya); + pool_put(&pf_tree_pl, keya); + pool_put(&pf_tree_pl, keyb); + return (-1); + } + + pf_status.fcounters[FCNT_STATE_INSERT]++; + pf_status.states++; +#if NPFSYNC + pfsync_insert_state(state); +#endif + return (0); +} + +void +pf_purge_timeout(void *arg) +{ + struct timeout *to = arg; + int s; + + s = splsoftnet(); + pf_purge_expired_states(); + pf_purge_expired_fragments(); + splx(s); + + timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz); +} + +u_int32_t +pf_state_expires(const struct pf_state *state) +{ + u_int32_t timeout; + u_int32_t start; + u_int32_t end; + u_int32_t states; + + /* handle all PFTM_* > PFTM_MAX here */ + if (state->timeout == PFTM_PURGE) + return (time.tv_sec); + if (state->timeout == PFTM_UNTIL_PACKET) + return (0); + KASSERT(state->timeout < PFTM_MAX); + timeout = state->rule.ptr->timeout[state->timeout]; + if (!timeout) + timeout = pf_default_rule.timeout[state->timeout]; + start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; + if (start) { + end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; + states = state->rule.ptr->states; + } else { + start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; + end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; + states = pf_status.states; + } + if (end && states > start && start < end) { + if (states < end) + return (state->expire + timeout * (end - states) / + (end - start)); + else + return (time.tv_sec); + } + return (state->expire + timeout); +} + +void +pf_purge_expired_states(void) +{ + struct pf_tree_node *cur, *peer, *next; + struct pf_tree_node key; + + for (cur = RB_MIN(pf_state_tree, &tree_ext_gwy); cur; cur = next) { + next = RB_NEXT(pf_state_tree, &tree_ext_gwy, cur); + + if (pf_state_expires(cur->state) <= time.tv_sec) { + if (cur->state->src.state == PF_TCPS_PROXY_DST) + pf_send_tcp(cur->state->rule.ptr, + cur->state->af, + &cur->state->ext.addr, + &cur->state->lan.addr, + cur->state->ext.port, + cur->state->lan.port, + cur->state->src.seqhi, + cur->state->src.seqlo + 1, + 0, + TH_RST|TH_ACK, 0, 0); + RB_REMOVE(pf_state_tree, &tree_ext_gwy, cur); + + /* Need this key's peer (in the other tree) */ + key.state = cur->state; + key.proto = cur->state->proto; + key.af = cur->state->af; + PF_ACPY(&key.addr[0], &cur->state->lan.addr, + cur->state->af); + key.port[0] = cur->state->lan.port; + PF_ACPY(&key.addr[1], &cur->state->ext.addr, + cur->state->af); + key.port[1] = cur->state->ext.port; + + peer = RB_FIND(pf_state_tree, &tree_lan_ext, &key); + KASSERT(peer); + KASSERT(peer->state == cur->state); + RB_REMOVE(pf_state_tree, &tree_lan_ext, peer); + +#if NPFSYNC + pfsync_delete_state(cur->state); +#endif + if (--cur->state->rule.ptr->states <= 0) + pf_rm_rule(NULL, cur->state->rule.ptr); + if (cur->state->nat_rule.ptr != NULL) + if (--cur->state->nat_rule.ptr->states <= 0) + pf_rm_rule(NULL, + cur->state->nat_rule.ptr); + if (cur->state->anchor.ptr != NULL) + if (--cur->state->anchor.ptr->states <= 0) + pf_rm_rule(NULL, + cur->state->anchor.ptr); + pf_normalize_tcp_cleanup(cur->state); + pool_put(&pf_state_pl, cur->state); + pool_put(&pf_tree_pl, cur); + pool_put(&pf_tree_pl, peer); + pf_status.fcounters[FCNT_STATE_REMOVALS]++; + pf_status.states--; + } + } +} + +int +pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_TABLE) + return (0); + if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) + return (1); + return (0); +} + +void +pf_tbladdr_remove(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) + return; + pfr_detach_table(aw->p.tbl); + aw->p.tbl = NULL; +} + +void +pf_tbladdr_copyout(struct pf_addr_wrap *aw) +{ + struct pfr_ktable *kt = aw->p.tbl; + + if (aw->type != PF_ADDR_TABLE || kt == NULL) + return; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + aw->p.tbl = NULL; + aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? + kt->pfrkt_cnt : -1; +} + +int +pf_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) +{ + if (aw->type != PF_ADDR_DYNIFTL) + return (0); + aw->p.dyn = pool_get(&pf_addr_pl, PR_NOWAIT); + if (aw->p.dyn == NULL) + return (1); + bcopy(aw->v.ifname, aw->p.dyn->ifname, sizeof(aw->p.dyn->ifname)); + aw->p.dyn->ifp = ifunit(aw->p.dyn->ifname); + if (aw->p.dyn->ifp == NULL) { + pool_put(&pf_addr_pl, aw->p.dyn); + aw->p.dyn = NULL; + return (1); + } + aw->p.dyn->addr = &aw->v.a.addr; + aw->p.dyn->af = af; + aw->p.dyn->undefined = 1; + aw->p.dyn->hook_cookie = hook_establish( + aw->p.dyn->ifp->if_addrhooks, 1, + pf_dynaddr_update, aw->p.dyn); + if (aw->p.dyn->hook_cookie == NULL) { + pool_put(&pf_addr_pl, aw->p.dyn); + aw->p.dyn = NULL; + return (1); + } + pf_dynaddr_update(aw->p.dyn); + return (0); +} + +void +pf_dynaddr_update(void *p) +{ + struct pf_addr_dyn *ad = (struct pf_addr_dyn *)p; + struct ifaddr *ia; + int s, changed = 0; + + if (ad == NULL || ad->ifp == NULL) + panic("pf_dynaddr_update"); + s = splsoftnet(); + TAILQ_FOREACH(ia, &ad->ifp->if_addrlist, ifa_list) + if (ia->ifa_addr != NULL && + ia->ifa_addr->sa_family == ad->af) { + if (ad->af == AF_INET) { + struct in_addr *a, *b; + + a = &ad->addr->v4; + b = &((struct sockaddr_in *)ia->ifa_addr) + ->sin_addr; + if (ad->undefined || + memcmp(a, b, sizeof(*a))) { + bcopy(b, a, sizeof(*a)); + changed = 1; + } + } else if (ad->af == AF_INET6) { + struct in6_addr *a, *b; + + a = &ad->addr->v6; + b = &((struct sockaddr_in6 *)ia->ifa_addr) + ->sin6_addr; + if (ad->undefined || + memcmp(a, b, sizeof(*a))) { + bcopy(b, a, sizeof(*a)); + changed = 1; + } + } + if (changed) + ad->undefined = 0; + break; + } + if (ia == NULL) + ad->undefined = 1; + splx(s); +} + +void +pf_dynaddr_remove(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL) + return; + hook_disestablish(aw->p.dyn->ifp->if_addrhooks, + aw->p.dyn->hook_cookie); + pool_put(&pf_addr_pl, aw->p.dyn); + aw->p.dyn = NULL; +} + +void +pf_dynaddr_copyout(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL) + return; + bcopy(aw->p.dyn->ifname, aw->v.ifname, sizeof(aw->v.ifname)); + aw->p.dyn = (struct pf_addr_dyn *)1; +} + +void +pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: { + u_int32_t a = ntohl(addr->addr32[0]); + printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, + (a>>8)&255, a&255); + if (p) { + p = ntohs(p); + printf(":%u", p); + } + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + u_int16_t b; + u_int8_t i, curstart = 255, curend = 0, + maxstart = 0, maxend = 0; + for (i = 0; i < 8; i++) { + if (!addr->addr16[i]) { + if (curstart == 255) + curstart = i; + else + curend = i; + } else { + if (curstart) { + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; + curstart = 255; + } + } + } + } + for (i = 0; i < 8; i++) { + if (i >= maxstart && i <= maxend) { + if (maxend != 7) { + if (i == maxstart) + printf(":"); + } else { + if (i == maxend) + printf(":"); + } + } else { + b = ntohs(addr->addr16[i]); + printf("%x", b); + if (i < 7) + printf(":"); + } + } + if (p) { + p = ntohs(p); + printf("[%u]", p); + } + break; + } +#endif /* INET6 */ + } +} + +void +pf_print_state(struct pf_state *s) +{ + switch (s->proto) { + case IPPROTO_TCP: + printf("TCP "); + break; + case IPPROTO_UDP: + printf("UDP "); + break; + case IPPROTO_ICMP: + printf("ICMP "); + break; + case IPPROTO_ICMPV6: + printf("ICMPV6 "); + break; + default: + printf("%u ", s->proto); + break; + } + pf_print_host(&s->lan.addr, s->lan.port, s->af); + printf(" "); + pf_print_host(&s->gwy.addr, s->gwy.port, s->af); + printf(" "); + pf_print_host(&s->ext.addr, s->ext.port, s->af); + printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, + s->src.seqhi, s->src.max_win, s->src.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, + s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" %u:%u", s->src.state, s->dst.state); +} + +void +pf_print_flags(u_int8_t f) +{ + if (f) + printf(" "); + if (f & TH_FIN) + printf("F"); + if (f & TH_SYN) + printf("S"); + if (f & TH_RST) + printf("R"); + if (f & TH_PUSH) + printf("P"); + if (f & TH_ACK) + printf("A"); + if (f & TH_URG) + printf("U"); + if (f & TH_ECE) + printf("E"); + if (f & TH_CWR) + printf("W"); +} + +#define PF_SET_SKIP_STEPS(i) \ + do { \ + while (head[i] != cur) { \ + head[i]->skip[i].ptr = cur; \ + head[i] = TAILQ_NEXT(head[i], entries); \ + } \ + } while (0) + +void +pf_calc_skip_steps(struct pf_rulequeue *rules) +{ + struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; + int i; + + cur = TAILQ_FIRST(rules); + prev = cur; + for (i = 0; i < PF_SKIP_COUNT; ++i) + head[i] = cur; + while (cur != NULL) { + + if (cur->ifp != prev->ifp || cur->ifnot != prev->ifnot) + PF_SET_SKIP_STEPS(PF_SKIP_IFP); + if (cur->direction != prev->direction) + PF_SET_SKIP_STEPS(PF_SKIP_DIR); + if (cur->af != prev->af) + PF_SET_SKIP_STEPS(PF_SKIP_AF); + if (cur->proto != prev->proto) + PF_SET_SKIP_STEPS(PF_SKIP_PROTO); + if (cur->src.not != prev->src.not || + pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); + if (cur->src.port[0] != prev->src.port[0] || + cur->src.port[1] != prev->src.port[1] || + cur->src.port_op != prev->src.port_op) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); + if (cur->dst.not != prev->dst.not || + pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) + PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); + if (cur->dst.port[0] != prev->dst.port[0] || + cur->dst.port[1] != prev->dst.port[1] || + cur->dst.port_op != prev->dst.port_op) + PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); + + prev = cur; + cur = TAILQ_NEXT(cur, entries); + } + for (i = 0; i < PF_SKIP_COUNT; ++i) + PF_SET_SKIP_STEPS(i); +} + +int +pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) +{ + if (aw1->type != aw2->type) + return (1); + switch (aw1->type) { + case PF_ADDR_ADDRMASK: + if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) + return (1); + if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) + return (1); + return (0); + case PF_ADDR_DYNIFTL: + if (aw1->p.dyn->ifp != aw2->p.dyn->ifp) + return (1); + if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) + return (1); + return (0); + case PF_ADDR_NOROUTE: + return (0); + case PF_ADDR_TABLE: + return (aw1->p.tbl != aw2->p.tbl); + default: + printf("invalid address type: %d\n", aw1->type); + return (1); + } +} + +void +pf_rule_set_qid(struct pf_rulequeue *rules) +{ + struct pf_rule *rule; + + TAILQ_FOREACH(rule, rules, entries) + if (rule->qname[0] != 0) { + rule->qid = pf_qname_to_qid(rule->qname); + if (rule->pqname[0] != 0) + rule->pqid = pf_qname_to_qid(rule->pqname); + else + rule->pqid = rule->qid; + } +} + +u_int32_t +pf_qname_to_qid(char *qname) +{ + struct pf_altq *altq; + + TAILQ_FOREACH(altq, pf_altqs_active, entries) + if (!strcmp(altq->qname, qname)) + return (altq->qid); + + return (0); +} + +void +pf_update_anchor_rules() +{ + struct pf_rule *rule; + int i; + + for (i = 0; i < PF_RULESET_MAX; ++i) + TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, + entries) + if (rule->anchorname[0]) + rule->anchor = pf_find_anchor(rule->anchorname); + else + rule->anchor = NULL; +} + +u_int16_t +pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) +{ + u_int32_t l; + + if (udp && !cksum) + return (0x0000); + l = cksum + old - new; + l = (l >> 16) + (l & 65535); + l = l & 65535; + if (udp && !l) + return (0xFFFF); + return (l); +} + +void +pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, + struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) +{ + struct pf_addr ao; + u_int16_t po = *p; + + PF_ACPY(&ao, a, af); + PF_ACPY(a, an, af); + + *p = pn; + + switch (af) { +#ifdef INET + case AF_INET: + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + ao.addr16[0], an->addr16[0], 0), + ao.addr16[1], an->addr16[1], 0); + *p = pn; + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + po, pn, u); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u), + po, pn, u); + break; +#endif /* INET6 */ + } +} + + +/* Changes a u_int32_t. Uses a void * so there are no align restrictions */ +void +pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) +{ + u_int32_t ao; + + memcpy(&ao, a, sizeof(ao)); + memcpy(a, &an, sizeof(u_int32_t)); + *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), + ao % 65536, an % 65536, u); +} + +#ifdef INET6 +void +pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) +{ + struct pf_addr ao; + + PF_ACPY(&ao, a, AF_INET6); + PF_ACPY(a, an, AF_INET6); + + *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*c, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u); +} +#endif /* INET6 */ + +void +pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, + struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, + u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) +{ + struct pf_addr oia, ooa; + + PF_ACPY(&oia, ia, af); + PF_ACPY(&ooa, oa, af); + + /* Change inner protocol port, fix inner protocol checksum. */ + if (ip != NULL) { + u_int16_t oip = *ip; + u_int32_t opc; + + if (pc != NULL) + opc = *pc; + *ip = np; + if (pc != NULL) + *pc = pf_cksum_fixup(*pc, oip, *ip, u); + *ic = pf_cksum_fixup(*ic, oip, *ip, 0); + if (pc != NULL) + *ic = pf_cksum_fixup(*ic, opc, *pc, 0); + } + /* Change inner ip address, fix inner ip and icmp checksums. */ + PF_ACPY(ia, na, af); + switch (af) { +#ifdef INET + case AF_INET: { + u_int32_t oh2c = *h2c; + + *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, + oia.addr16[0], ia->addr16[0], 0), + oia.addr16[1], ia->addr16[1], 0); + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + oia.addr16[0], ia->addr16[0], 0), + oia.addr16[1], ia->addr16[1], 0); + *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + oia.addr16[0], ia->addr16[0], u), + oia.addr16[1], ia->addr16[1], u), + oia.addr16[2], ia->addr16[2], u), + oia.addr16[3], ia->addr16[3], u), + oia.addr16[4], ia->addr16[4], u), + oia.addr16[5], ia->addr16[5], u), + oia.addr16[6], ia->addr16[6], u), + oia.addr16[7], ia->addr16[7], u); + break; +#endif /* INET6 */ + } + /* Change outer ip address, fix outer ip or icmpv6 checksum. */ + PF_ACPY(oa, na, af); + switch (af) { +#ifdef INET + case AF_INET: + *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, + ooa.addr16[0], oa->addr16[0], 0), + ooa.addr16[1], oa->addr16[1], 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + ooa.addr16[0], oa->addr16[0], u), + ooa.addr16[1], oa->addr16[1], u), + ooa.addr16[2], oa->addr16[2], u), + ooa.addr16[3], oa->addr16[3], u), + ooa.addr16[4], oa->addr16[4], u), + ooa.addr16[5], oa->addr16[5], u), + ooa.addr16[6], oa->addr16[6], u), + ooa.addr16[7], oa->addr16[7], u); + break; +#endif /* INET6 */ + } +} + +void +pf_send_tcp(const struct pf_rule *r, sa_family_t af, + const struct pf_addr *saddr, const struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, + u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl) +{ + struct mbuf *m; + struct m_tag *mtag; + int len, tlen; +#ifdef INET + struct ip *h; +#endif /* INET */ +#ifdef INET6 + struct ip6_hdr *h6; +#endif /* INET6 */ + struct tcphdr *th; + char *opt; + + /* maximum segment size tcp option */ + tlen = sizeof(struct tcphdr); + if (mss) + tlen += 4; + + switch (af) { +#ifdef INET + case AF_INET: + len = sizeof(struct ip) + tlen; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + len = sizeof(struct ip6_hdr) + tlen; + break; +#endif /* INET6 */ + } + + /* create outgoing mbuf */ + mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); + if (mtag == NULL) + return; + m = m_gethdr(M_DONTWAIT, MT_HEADER); + if (m == NULL) { + m_tag_free(mtag); + return; + } + m_tag_prepend(m, mtag); +#ifdef ALTQ + if (r != NULL && r->qid) { + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = af; + atag->hdr = mtod(m, struct ip *); + m_tag_prepend(m, mtag); + } + } +#endif + m->m_data += max_linkhdr; + m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.rcvif = NULL; + bzero(m->m_data, len); + switch (af) { +#ifdef INET + case AF_INET: + h = mtod(m, struct ip *); + + /* IP header fields included in the TCP checksum */ + h->ip_p = IPPROTO_TCP; + h->ip_len = htons(tlen); + h->ip_src.s_addr = saddr->v4.s_addr; + h->ip_dst.s_addr = daddr->v4.s_addr; + + th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + h6 = mtod(m, struct ip6_hdr *); + + /* IP header fields included in the TCP checksum */ + h6->ip6_nxt = IPPROTO_TCP; + h6->ip6_plen = htons(tlen); + memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); + memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); + + th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); + break; +#endif /* INET6 */ + } + + /* TCP header */ + th->th_sport = sport; + th->th_dport = dport; + th->th_seq = htonl(seq); + th->th_ack = htonl(ack); + th->th_off = tlen >> 2; + th->th_flags = flags; + th->th_win = htons(win); + + if (mss) { + opt = (char *)(th + 1); + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + HTONS(mss); + bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); + } + + switch (af) { +#ifdef INET + case AF_INET: + /* TCP checksum */ + th->th_sum = in_cksum(m, len); + + /* Finish the IP header */ + h->ip_v = 4; + h->ip_hl = sizeof(*h) >> 2; + h->ip_tos = IPTOS_LOWDELAY; + h->ip_len = htons(len); + h->ip_off = htons(ip_mtudisc ? IP_DF : 0); + h->ip_ttl = ttl ? ttl : ip_defttl; + h->ip_sum = 0; + ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL, + (void *)NULL); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + /* TCP checksum */ + th->th_sum = in6_cksum(m, IPPROTO_TCP, + sizeof(struct ip6_hdr), tlen); + + h6->ip6_vfc |= IPV6_VERSION; + h6->ip6_hlim = IPV6_DEFHLIM; + + ip6_output(m, NULL, NULL, 0, NULL, NULL); + break; +#endif /* INET6 */ + } +} + +void +pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, + struct pf_rule *r) +{ + struct m_tag *mtag; + struct mbuf *m0; + + mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); + if (mtag == NULL) + return; + m0 = m_copy(m, 0, M_COPYALL); + if (m0 == NULL) { + m_tag_free(mtag); + return; + } + m_tag_prepend(m0, mtag); + +#ifdef ALTQ + if (r->qid) { + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = af; + atag->hdr = mtod(m0, struct ip *); + m_tag_prepend(m0, mtag); + } + } +#endif + + switch (af) { +#ifdef INET + case AF_INET: + icmp_error(m0, type, code, 0, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + icmp6_error(m0, type, code, 0); + break; +#endif /* INET6 */ + } +} + +/* + * Return 1 if the addresses a and b match (with mask m), otherwise return 0. + * If n is 0, they match if they are equal. If n is != 0, they match if they + * are different. + */ +int +pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, + struct pf_addr *b, sa_family_t af) +{ + int match = 0; + + switch (af) { +#ifdef INET + case AF_INET: + if ((a->addr32[0] & m->addr32[0]) == + (b->addr32[0] & m->addr32[0])) + match++; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (((a->addr32[0] & m->addr32[0]) == + (b->addr32[0] & m->addr32[0])) && + ((a->addr32[1] & m->addr32[1]) == + (b->addr32[1] & m->addr32[1])) && + ((a->addr32[2] & m->addr32[2]) == + (b->addr32[2] & m->addr32[2])) && + ((a->addr32[3] & m->addr32[3]) == + (b->addr32[3] & m->addr32[3]))) + match++; + break; +#endif /* INET6 */ + } + if (match) { + if (n) + return (0); + else + return (1); + } else { + if (n) + return (1); + else + return (0); + } +} + +int +pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) +{ + switch (op) { + case PF_OP_IRG: + return ((p > a1) && (p < a2)); + case PF_OP_XRG: + return ((p < a1) || (p > a2)); + case PF_OP_RRG: + return ((p >= a1) && (p <= a2)); + case PF_OP_EQ: + return (p == a1); + case PF_OP_NE: + return (p != a1); + case PF_OP_LT: + return (p < a1); + case PF_OP_LE: + return (p <= a1); + case PF_OP_GT: + return (p > a1); + case PF_OP_GE: + return (p >= a1); + } + return (0); /* never reached */ +} + +int +pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) +{ + NTOHS(a1); + NTOHS(a2); + NTOHS(p); + return (pf_match(op, a1, a2, p)); +} + +int +pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) +{ + if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + return (0); + return (pf_match(op, a1, a2, u)); +} + +int +pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) +{ + if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + return (0); + return (pf_match(op, a1, a2, g)); +} + +struct pf_tag * +pf_get_tag(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL) + return ((struct pf_tag *)(mtag + 1)); + else + return (NULL); +} + +int +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat, + struct pf_rule *rdr, struct pf_tag *pftag, int *tag) +{ + if (*tag == -1) { /* find mbuf tag */ + pftag = pf_get_tag(m); + if (pftag != NULL) + *tag = pftag->tag; + else + *tag = 0; + if (nat != NULL && nat->tag) + *tag = nat->tag; + if (rdr != NULL && rdr->tag) + *tag = rdr->tag; + } + + return ((!r->match_tag_not && r->match_tag == *tag) || + (r->match_tag_not && r->match_tag != *tag)); +} + +int +pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag) +{ + struct m_tag *mtag; + + if (tag <= 0) + return (0); + + if (pftag == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT); + if (mtag == NULL) + return (1); + ((struct pf_tag *)(mtag + 1))->tag = tag; + m_tag_prepend(m, mtag); + } else + pftag->tag = tag; + + return (0); +} + +#define PF_STEP_INTO_ANCHOR(r, a, s, n) \ + do { \ + if ((r) == NULL || (r)->anchor == NULL || \ + (s) != NULL || (a) != NULL) \ + panic("PF_STEP_INTO_ANCHOR"); \ + (a) = (r); \ + (s) = TAILQ_FIRST(&(r)->anchor->rulesets); \ + (r) = NULL; \ + while ((s) != NULL && ((r) = \ + TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ + (s) = TAILQ_NEXT((s), entries); \ + if ((r) == NULL) { \ + (r) = TAILQ_NEXT((a), entries); \ + (a) = NULL; \ + } \ + } while (0) + +#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n) \ + do { \ + if ((r) != NULL || (a) == NULL || (s) == NULL) \ + panic("PF_STEP_OUT_OF_ANCHOR"); \ + (s) = TAILQ_NEXT((s), entries); \ + while ((s) != NULL && ((r) = \ + TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ + (s) = TAILQ_NEXT((s), entries); \ + if ((r) == NULL) { \ + (r) = TAILQ_NEXT((a), entries); \ + (a) = NULL; \ + } \ + } while (0) + +#ifdef INET6 +void +pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, + struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | + ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); + break; +#endif /* INET */ + case AF_INET6: + naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | + ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); + naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | + ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); + naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | + ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); + naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | + ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); + break; + } +} + +void +pf_addr_inc(struct pf_addr *addr, u_int8_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); + break; +#endif /* INET */ + case AF_INET6: + if (addr->addr32[3] == 0xffffffff) { + addr->addr32[3] = 0; + if (addr->addr32[2] == 0xffffffff) { + addr->addr32[2] = 0; + if (addr->addr32[1] == 0xffffffff) { + addr->addr32[1] = 0; + addr->addr32[0] = + htonl(ntohl(addr->addr32[0]) + 1); + } else + addr->addr32[1] = + htonl(ntohl(addr->addr32[1]) + 1); + } else + addr->addr32[2] = + htonl(ntohl(addr->addr32[2]) + 1); + } else + addr->addr32[3] = + htonl(ntohl(addr->addr32[3]) + 1); + break; + } +} +#endif /* INET6 */ + +#define mix(a,b,c) \ + do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ + } while (0) + +/* + * hash function based on bridge_hash in if_bridge.c + */ +void +pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, + struct pf_poolhashkey *key, sa_family_t af) +{ + u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; + + switch (af) { +#ifdef INET + case AF_INET: + a += inaddr->addr32[0]; + b += key->key32[1]; + mix(a, b, c); + hash->addr32[0] = c + key->key32[2]; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + a += inaddr->addr32[0]; + b += inaddr->addr32[2]; + mix(a, b, c); + hash->addr32[0] = c; + a += inaddr->addr32[1]; + b += inaddr->addr32[3]; + c += key->key32[1]; + mix(a, b, c); + hash->addr32[1] = c; + a += inaddr->addr32[2]; + b += inaddr->addr32[1]; + c += key->key32[2]; + mix(a, b, c); + hash->addr32[2] = c; + a += inaddr->addr32[3]; + b += inaddr->addr32[0]; + c += key->key32[3]; + mix(a, b, c); + hash->addr32[3] = c; + break; +#endif /* INET6 */ + } +} + +int +pf_map_addr(u_int8_t af, struct pf_pool *rpool, struct pf_addr *saddr, + struct pf_addr *naddr, struct pf_addr *init_addr) +{ + unsigned char hash[16]; + struct pf_addr *raddr; + struct pf_addr *rmask; + struct pf_pooladdr *acur = rpool->cur; + + if (rpool->cur->addr.type == PF_ADDR_NOROUTE) + return (1); + if (rpool->cur->addr.type == PF_ADDR_DYNIFTL && + rpool->cur->addr.p.dyn->undefined) + return (1); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) + return (1); /* unsupported */ + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + } + + switch (rpool->opts & PF_POOL_TYPEMASK) { + case PF_POOL_NONE: + PF_ACPY(naddr, raddr, af); + break; + case PF_POOL_BITMASK: + PF_POOLMASK(naddr, raddr, rmask, saddr, af); + break; + case PF_POOL_RANDOM: + if (init_addr != NULL && PF_AZERO(init_addr, af)) { + switch (af) { +#ifdef INET + case AF_INET: + rpool->counter.addr32[0] = arc4random(); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (rmask->addr32[3] != 0xffffffff) + rpool->counter.addr32[3] = arc4random(); + else + break; + if (rmask->addr32[2] != 0xffffffff) + rpool->counter.addr32[2] = arc4random(); + else + break; + if (rmask->addr32[1] != 0xffffffff) + rpool->counter.addr32[1] = arc4random(); + else + break; + if (rmask->addr32[0] != 0xffffffff) + rpool->counter.addr32[0] = arc4random(); + break; +#endif /* INET6 */ + } + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + PF_ACPY(init_addr, naddr, af); + + } else { + PF_AINC(&rpool->counter, af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + } + break; + case PF_POOL_SRCHASH: + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); + PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); + break; + case PF_POOL_ROUNDROBIN: + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if (!pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + goto get_addr; + + try_next: + if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + rpool->cur = TAILQ_FIRST(&rpool->list); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contain no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + PF_ACPY(&rpool->counter, raddr, af); + } + + get_addr: + PF_ACPY(naddr, &rpool->counter, af); + PF_AINC(&rpool->counter, af); + break; + } + + if (pf_status.debug >= PF_DEBUG_MISC && + (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + printf("pf_map_addr: selected address: "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + + return (0); +} + +int +pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_pool *rpool, + struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high) +{ + struct pf_tree_node key; + struct pf_addr init_addr; + u_int16_t cut; + + bzero(&init_addr, sizeof(init_addr)); + if (pf_map_addr(af, rpool, saddr, naddr, &init_addr)) + return (1); + + do { + key.af = af; + key.proto = proto; + PF_ACPY(&key.addr[0], daddr, key.af); + PF_ACPY(&key.addr[1], naddr, key.af); + key.port[0] = dport; + + /* + * port search; start random, step; + * similar 2 portloop in in_pcbbind + */ + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { + key.port[1] = 0; + if (pf_find_state(&tree_ext_gwy, &key) == NULL) + return (0); + } else if (low == 0 && high == 0) { + key.port[1] = *nport; + if (pf_find_state(&tree_ext_gwy, &key) == NULL) { + return (0); + } + } else if (low == high) { + key.port[1] = htons(low); + if (pf_find_state(&tree_ext_gwy, &key) == NULL) { + *nport = htons(low); + return (0); + } + } else { + u_int16_t tmp; + + if (low > high) { + tmp = low; + low = high; + high = tmp; + } + /* low < high */ + cut = arc4random() % (1 + high - low) + low; + /* low <= cut <= high */ + for (tmp = cut; tmp <= high; ++(tmp)) { + key.port[1] = htons(tmp); + if (pf_find_state(&tree_ext_gwy, &key) == + NULL) { + *nport = htons(tmp); + return (0); + } + } + for (tmp = cut - 1; tmp >= low; --(tmp)) { + key.port[1] = htons(tmp); + if (pf_find_state(&tree_ext_gwy, &key) == + NULL) { + *nport = htons(tmp); + return (0); + } + } + } + + switch (rpool->opts & PF_POOL_TYPEMASK) { + case PF_POOL_RANDOM: + case PF_POOL_ROUNDROBIN: + if (pf_map_addr(af, rpool, saddr, naddr, &init_addr)) + return (1); + break; + case PF_POOL_NONE: + case PF_POOL_SRCHASH: + case PF_POOL_BITMASK: + default: + return (1); + break; + } + } while (! PF_AEQ(&init_addr, naddr, af) ); + + return (1); /* none available */ +} + +struct pf_rule * +pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct ifnet *ifp, struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, int rs_num) +{ + struct pf_rule *r, *rm = NULL, *anchorrule = NULL; + struct pf_ruleset *ruleset = NULL; + + r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); + while (r && rm == NULL) { + struct pf_rule_addr *src = NULL, *dst = NULL; + struct pf_addr_wrap *xdst = NULL; + + if (r->action == PF_BINAT && direction == PF_IN) { + src = &r->dst; + if (r->rpool.cur != NULL) + xdst = &r->rpool.cur->addr; + } else { + src = &r->src; + dst = &r->dst; + } + + r->evaluations++; + if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) || + (r->ifp == ifp && r->ifnot))) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != pd->af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : + PF_SKIP_DST_ADDR].ptr; + else if (src->port_op && !pf_match_port(src->port_op, + src->port[0], src->port[1], sport)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : + PF_SKIP_DST_PORT].ptr; + else if (dst != NULL && + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) + r = TAILQ_NEXT(r, entries); + else if (dst != NULL && dst->port_op && + !pf_match_port(dst->port_op, dst->port[0], + dst->port[1], dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != + IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, + off, pd->hdr.tcp), r->os_fingerprint))) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->anchor == NULL) + rm = r; + else + PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num); + if (r == NULL && anchorrule != NULL) + PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset, + rs_num); + } + if (rm != NULL && (rm->action == PF_NONAT || + rm->action == PF_NORDR || rm->action == PF_NOBINAT)) + return (NULL); + return (rm); +} + +struct pf_rule * +pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, + struct ifnet *ifp, + struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport) +{ + struct pf_rule *r = NULL; + + if (direction == PF_OUT) { + r = pf_match_translation(pd, m, off, direction, ifp, saddr, + sport, daddr, dport, PF_RULESET_BINAT); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, ifp, + saddr, sport, daddr, dport, PF_RULESET_NAT); + } else { + r = pf_match_translation(pd, m, off, direction, ifp, saddr, + sport, daddr, dport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, ifp, + saddr, sport, daddr, dport, PF_RULESET_BINAT); + } + + if (r != NULL) { + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + break; + case PF_NAT: + if (pf_get_sport(pd->af, pd->proto, &r->rpool, saddr, + daddr, dport, naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1])) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation " + "(%u-%u) failed\n", + r->rpool.proxy_port[0], + r->rpool.proxy_port[1])); + return (NULL); + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == + PF_ADDR_DYNIFTL && + r->rpool.cur->addr.p.dyn->undefined) + return (NULL); + else + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, + saddr, pd->af); + break; + case PF_IN: + if (r->src.addr.type == PF_ADDR_DYNIFTL && + r->src.addr.p.dyn->undefined) + return (NULL); + else + PF_POOLMASK(naddr, + &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, saddr, + pd->af); + break; + } + break; + case PF_RDR: { + if (pf_map_addr(r->af, &r->rpool, saddr, naddr, NULL)) + return (NULL); + + if (r->rpool.proxy_port[1]) { + u_int32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - + ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - + r->rpool.proxy_port[0] + 1)) + + r->rpool.proxy_port[0]; + + /* wrap around if necessary */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((u_int16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); + break; + } + default: + return (NULL); + break; + } + } + + return (r); +} + +int +pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, sa_family_t af, + int proto, struct pf_pdesc *pd) +{ + struct pf_addr *saddr, *daddr; + u_int16_t sport, dport; + struct inpcbtable *tb; + struct inpcb *inp; + + *uid = UID_MAX; + *gid = GID_MAX; + switch (proto) { + case IPPROTO_TCP: + sport = pd->hdr.tcp->th_sport; + dport = pd->hdr.tcp->th_dport; + tb = &tcbtable; + break; + case IPPROTO_UDP: + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; + tb = &udbtable; + break; + default: + return (0); + } + if (direction == PF_IN) { + saddr = pd->src; + daddr = pd->dst; + } else { + u_int16_t p; + + p = sport; + sport = dport; + dport = p; + saddr = pd->dst; + daddr = pd->src; + } + switch(af) { + case AF_INET: + inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); + if (inp == NULL) { + inp = in_pcblookup(tb, &saddr->v4, sport, &daddr->v4, + dport, INPLOOKUP_WILDCARD); + if (inp == NULL) + return (0); + } + break; +#ifdef INET6 + case AF_INET6: + inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, + dport); + if (inp == NULL) { + inp = in_pcblookup(tb, &saddr->v6, sport, &daddr->v6, + dport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6); + if (inp == NULL) + return (0); + } + break; +#endif /* INET6 */ + + default: + return (0); + } + *uid = inp->inp_socket->so_euid; + *gid = inp->inp_socket->so_egid; + return (1); +} + +u_int8_t +pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) +{ + int hlen; + u_int8_t hdr[60]; + u_int8_t *opt, optlen; + u_int8_t wscale = 0; + + hlen = th_off << 2; /* hlen <= sizeof(hdr) */ + if (hlen <= sizeof(struct tcphdr)) + return (0); + if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) + return (0); + opt = hdr + sizeof(struct tcphdr); + hlen -= sizeof(struct tcphdr); + while (hlen >= 3) { + switch (*opt) { + case TCPOPT_EOL: + case TCPOPT_NOP: + ++opt; + --hlen; + break; + case TCPOPT_WINDOW: + wscale = opt[2]; + if (wscale > TCP_MAX_WINSHIFT) + wscale = TCP_MAX_WINSHIFT; + wscale |= PF_WSCALE_FLAG; + /* fallthrough */ + default: + optlen = opt[1]; + if (optlen < 2) + optlen = 2; + hlen -= optlen; + opt += optlen; + } + } + return (wscale); +} + +u_int16_t +pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) +{ + int hlen; + u_int8_t hdr[60]; + u_int8_t *opt, optlen; + u_int16_t mss = tcp_mssdflt; + + hlen = th_off << 2; /* hlen <= sizeof(hdr) */ + if (hlen <= sizeof(struct tcphdr)) + return (0); + if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) + return (0); + opt = hdr + sizeof(struct tcphdr); + hlen -= sizeof(struct tcphdr); + while (hlen >= TCPOLEN_MAXSEG) { + switch (*opt) { + case TCPOPT_EOL: + case TCPOPT_NOP: + ++opt; + --hlen; + break; + case TCPOPT_MAXSEG: + bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); + /* fallthrough */ + default: + optlen = opt[1]; + if (optlen < 2) + optlen = 2; + hlen -= optlen; + opt += optlen; + } + } + return (mss); +} + +u_int16_t +pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) +{ +#ifdef INET + struct sockaddr_in *dst; + struct route ro; +#endif /* INET */ +#ifdef INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro6; +#endif /* INET6 */ + struct rtentry *rt = NULL; + int hlen; + u_int16_t mss = tcp_mssdflt; + + switch (af) { +#ifdef INET + case AF_INET: + hlen = sizeof(struct ip); + bzero(&ro, sizeof(ro)); + dst = (struct sockaddr_in *)&ro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + rtalloc_noclone(&ro, NO_CLONING); + rt = ro.ro_rt; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + hlen = sizeof(struct ip6_hdr); + bzero(&ro6, sizeof(ro6)); + dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = addr->v6; + rtalloc_noclone((struct route *)&ro6, NO_CLONING); + rt = ro6.ro_rt; + break; +#endif /* INET6 */ + } + + if (rt && rt->rt_ifp) { + mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); + mss = max(tcp_mssdflt, mss); + RTFREE(rt); + } + mss = min(mss, offer); + mss = max(mss, 64); /* sanity - at least max opt space */ + return (mss); +} + +void +pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) +{ + struct pf_rule *r = s->rule.ptr; + + s->rt_ifp = NULL; + if (!r->rt || r->rt == PF_FASTROUTE) + return; + switch (s->af) { +#ifdef INET + case AF_INET: + pf_map_addr(AF_INET, &r->rpool, saddr, + &s->rt_addr, NULL); + s->rt_ifp = r->rpool.cur->ifp; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_map_addr(AF_INET6, &r->rpool, saddr, + &s->rt_addr, NULL); + s->rt_ifp = r->rpool.cur->ifp; + break; +#endif /* INET6 */ + } +} + +int +pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, + struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nat = NULL, *rdr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr baddr, naddr; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t bport, nport = 0; + sa_family_t af = pd->af; + int lookup = -1; + uid_t uid; + gid_t gid; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + u_short reason; + int rewrite = 0; + struct pf_tag *pftag = NULL; + int tag = -1; + u_int16_t mss = tcp_mssdflt; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + bport = nport = th->th_sport; + /* check outgoing packet for BINAT/NAT */ + if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, + saddr, th->th_sport, daddr, th->th_dport, + &naddr, &nport)) != NULL) { + PF_ACPY(&baddr, saddr, af); + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &naddr, nport, 0, af); + rewrite++; + if (nat->natpass) + r = NULL; + } + } else { + bport = nport = th->th_dport; + /* check incoming packet for BINAT/RDR */ + if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, + th->th_sport, daddr, th->th_dport, + &naddr, &nport)) != NULL) { + PF_ACPY(&baddr, daddr, af); + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &naddr, nport, 0, af); + rewrite++; + if (rdr->natpass) + r = NULL; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) || + (r->ifp == ifp && r->ifnot))) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != IPPROTO_TCP) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], th->th_sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], th->th_dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if ((r->flagset & th->th_flags) != r->flags) + r = TAILQ_NEXT(r, entries); + else if (r->uid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_TCP, + pd), 1)) && + !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], + uid)) + r = TAILQ_NEXT(r, entries); + else if (r->gid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_TCP, + pd), 1)) && + !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], + gid)) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && + !pf_match_tag(m, r, nat, rdr, pftag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + r->packets++; + r->bytes += pd->tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd->tot_len; + } + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) { + if (rewrite) + m_copyback(m, off, sizeof(*th), th); + PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset); + } + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + /* undo NAT changes, if they have taken place */ + if (nat != NULL) { + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &baddr, bport, 0, af); + rewrite++; + } else if (rdr != NULL) { + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &baddr, bport, 0, af); + rewrite++; + } + if (((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURN)) && + !(th->th_flags & TH_RST)) { + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; + pf_send_tcp(r, af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl); + } else if ((af == AF_INET) && r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if ((af == AF_INET6) && r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action == PF_DROP) + return (PF_DROP); + + if (pf_tag_packet(m, pftag, tag)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + if (r->keep_state || nat != NULL || rdr != NULL || + (pd->flags & PFDESC_TCP_NORM)) { + /* create new state */ + u_int16_t len; + struct pf_state *s = NULL; + + len = pd->tot_len - off - (th->th_off << 2); + if (!r->max_states || r->states < r->max_states) + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + if (nat != NULL) + s->nat_rule.ptr = nat; + else + s->nat_rule.ptr = rdr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = IPPROTO_TCP; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + s->gwy.port = th->th_sport; /* sport */ + PF_ACPY(&s->ext.addr, daddr, af); + s->ext.port = th->th_dport; + if (nat != NULL) { + PF_ACPY(&s->lan.addr, &baddr, af); + s->lan.port = bport; + } else { + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + s->lan.port = s->gwy.port; + } + } else { + PF_ACPY(&s->lan.addr, daddr, af); + s->lan.port = th->th_dport; + PF_ACPY(&s->ext.addr, saddr, af); + s->ext.port = th->th_sport; + if (rdr != NULL) { + PF_ACPY(&s->gwy.addr, &baddr, af); + s->gwy.port = bport; + } else { + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->gwy.port = s->lan.port; + } + } + + s->src.seqlo = ntohl(th->th_seq); + s->src.seqhi = s->src.seqlo + len + 1; + if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_MODULATE) { + /* Generate sequence number modulator */ + while ((s->src.seqdiff = arc4random()) == 0) + ; + pf_change_a(&th->th_seq, &th->th_sum, + htonl(s->src.seqlo + s->src.seqdiff), 0); + rewrite = 1; + } else + s->src.seqdiff = 0; + if (th->th_flags & TH_SYN) { + s->src.seqhi++; + s->src.wscale = pf_get_wscale(m, off, th->th_off, af); + } + s->src.max_win = MAX(ntohs(th->th_win), 1); + if (s->src.wscale & PF_WSCALE_MASK) { + /* Remove scale factor from initial window */ + int win = s->src.max_win; + win += 1 << (s->src.wscale & PF_WSCALE_MASK); + s->src.max_win = (win - 1) >> + (s->src.wscale & PF_WSCALE_MASK); + } + if (th->th_flags & TH_FIN) + s->src.seqhi++; + s->dst.seqhi = 1; + s->dst.max_win = 1; + s->src.state = TCPS_SYN_SENT; + s->dst.state = TCPS_CLOSED; + s->creation = time.tv_sec; + s->expire = time.tv_sec; + s->timeout = PFTM_TCP_FIRST_PACKET; + s->packets[0] = 1; + s->bytes[0] = pd->tot_len; + pf_set_rt_ifp(s, saddr); + + if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, + off, pd, th, &s->src, &s->dst)) { + REASON_SET(&reason, PFRES_MEMORY); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } + if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && + pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src, + &s->dst, &rewrite)) { + pf_normalize_tcp_cleanup(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } + if (pf_insert_state(s)) { + pf_normalize_tcp_cleanup(s); + REASON_SET(&reason, PFRES_MEMORY); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_SYNPROXY) { + s->src.state = PF_TCPS_PROXY_SRC; + if (nat != NULL) + pf_change_ap(saddr, &th->th_sport, + pd->ip_sum, &th->th_sum, &baddr, + bport, 0, af); + else if (rdr != NULL) + pf_change_ap(daddr, &th->th_dport, + pd->ip_sum, &th->th_sum, &baddr, + bport, 0, af); + s->src.seqhi = arc4random(); + /* Find mss option */ + mss = pf_get_mss(m, off, th->th_off, af); + mss = pf_calc_mss(saddr, af, mss); + mss = pf_calc_mss(daddr, af, mss); + s->src.mss = mss; + pf_send_tcp(r, af, daddr, saddr, th->th_dport, + th->th_sport, s->src.seqhi, + ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0); + return (PF_SYNPROXY_DROP); + } + } + + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, sizeof(*th), th); + + return (PF_PASS); +} + +int +pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, + struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nat = NULL, *rdr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr baddr, naddr; + struct udphdr *uh = pd->hdr.udp; + u_int16_t bport, nport = 0; + sa_family_t af = pd->af; + int lookup = -1; + uid_t uid; + gid_t gid; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + u_short reason; + int rewrite = 0; + struct pf_tag *pftag = NULL; + int tag = -1; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + bport = nport = uh->uh_sport; + /* check outgoing packet for BINAT/NAT */ + if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, + saddr, uh->uh_sport, daddr, uh->uh_dport, + &naddr, &nport)) != NULL) { + PF_ACPY(&baddr, saddr, af); + pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &naddr, nport, 1, af); + rewrite++; + if (nat->natpass) + r = NULL; + } + } else { + bport = nport = uh->uh_dport; + /* check incoming packet for BINAT/RDR */ + if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, + uh->uh_sport, daddr, uh->uh_dport, &naddr, &nport)) + != NULL) { + PF_ACPY(&baddr, daddr, af); + pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &naddr, nport, 1, af); + rewrite++; + if (rdr->natpass) + r = NULL; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) || + (r->ifp == ifp && r->ifnot))) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != IPPROTO_UDP) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], uh->uh_sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], uh->uh_dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (r->uid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_UDP, + pd), 1)) && + !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], + uid)) + r = TAILQ_NEXT(r, entries); + else if (r->gid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_UDP, + pd), 1)) && + !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], + gid)) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && + !pf_match_tag(m, r, nat, rdr, pftag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + r->packets++; + r->bytes += pd->tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd->tot_len; + } + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) { + if (rewrite) + m_copyback(m, off, sizeof(*uh), uh); + PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset); + } + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + /* undo NAT changes, if they have taken place */ + if (nat != NULL) { + pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &baddr, bport, 1, af); + rewrite++; + } else if (rdr != NULL) { + pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &baddr, bport, 1, af); + rewrite++; + } + if ((af == AF_INET) && r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if ((af == AF_INET6) && r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action == PF_DROP) + return (PF_DROP); + + if (pf_tag_packet(m, pftag, tag)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + if (r->keep_state || nat != NULL || rdr != NULL) { + /* create new state */ + struct pf_state *s = NULL; + + if (!r->max_states || r->states < r->max_states) + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + if (nat != NULL) + s->nat_rule.ptr = nat; + else + s->nat_rule.ptr = rdr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = IPPROTO_UDP; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + s->gwy.port = uh->uh_sport; + PF_ACPY(&s->ext.addr, daddr, af); + s->ext.port = uh->uh_dport; + if (nat != NULL) { + PF_ACPY(&s->lan.addr, &baddr, af); + s->lan.port = bport; + } else { + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + s->lan.port = s->gwy.port; + } + } else { + PF_ACPY(&s->lan.addr, daddr, af); + s->lan.port = uh->uh_dport; + PF_ACPY(&s->ext.addr, saddr, af); + s->ext.port = uh->uh_sport; + if (rdr != NULL) { + PF_ACPY(&s->gwy.addr, &baddr, af); + s->gwy.port = bport; + } else { + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->gwy.port = s->lan.port; + } + } + s->src.state = PFUDPS_SINGLE; + s->dst.state = PFUDPS_NO_TRAFFIC; + s->creation = time.tv_sec; + s->expire = time.tv_sec; + s->timeout = PFTM_UDP_FIRST_PACKET; + s->packets[0] = 1; + s->bytes[0] = pd->tot_len; + pf_set_rt_ifp(s, saddr); + if (pf_insert_state(s)) { + REASON_SET(&reason, PFRES_MEMORY); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + } + + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, sizeof(*uh), uh); + + return (PF_PASS); +} + +int +pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, + struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nat = NULL, *rdr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr baddr, naddr; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + u_short reason; + u_int16_t icmpid; + sa_family_t af = pd->af; + u_int8_t icmptype, icmpcode; + int state_icmp = 0; + struct pf_tag *pftag = NULL; + int tag = -1; +#ifdef INET6 + int rewrite = 0; +#endif /* INET6 */ + + switch (pd->proto) { +#ifdef INET + case IPPROTO_ICMP: + icmptype = pd->hdr.icmp->icmp_type; + icmpcode = pd->hdr.icmp->icmp_code; + icmpid = pd->hdr.icmp->icmp_id; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + icmptype = pd->hdr.icmp6->icmp6_type; + icmpcode = pd->hdr.icmp6->icmp6_code; + icmpid = pd->hdr.icmp6->icmp6_id; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ + } + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + /* check outgoing packet for BINAT/NAT */ + if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, saddr, 0, + daddr, 0, &naddr, NULL)) != NULL) { + PF_ACPY(&baddr, saddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, + &naddr, 0); + rewrite++; + break; +#endif /* INET6 */ + } + if (nat->natpass) + r = NULL; + } + } else { + /* check incoming packet for BINAT/RDR */ + if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, 0, + daddr, 0, &naddr, NULL)) != NULL) { + PF_ACPY(&baddr, daddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, + &naddr, 0); + rewrite++; + break; +#endif /* INET6 */ + } + if (rdr->natpass) + r = NULL; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) || + (r->ifp == ifp && r->ifnot))) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->type && r->type != icmptype + 1) + r = TAILQ_NEXT(r, entries); + else if (r->code && r->code != icmpcode + 1) + r = TAILQ_NEXT(r, entries); + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && + !pf_match_tag(m, r, nat, rdr, pftag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + r->packets++; + r->bytes += pd->tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd->tot_len; + } + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) { +#ifdef INET6 + if (rewrite) + m_copyback(m, off, sizeof(struct icmp6_hdr), + pd->hdr.icmp6); +#endif /* INET6 */ + PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset); + } + + if (r->action != PF_PASS) + return (PF_DROP); + + if (pf_tag_packet(m, pftag, tag)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + if (!state_icmp && (r->keep_state || + nat != NULL || rdr != NULL)) { + /* create new state */ + struct pf_state *s = NULL; + + if (!r->max_states || r->states < r->max_states) + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + if (nat != NULL) + s->nat_rule.ptr = nat; + else + s->nat_rule.ptr = rdr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = pd->proto; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + s->gwy.port = icmpid; + PF_ACPY(&s->ext.addr, daddr, af); + s->ext.port = icmpid; + if (nat != NULL) + PF_ACPY(&s->lan.addr, &baddr, af); + else + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + s->lan.port = icmpid; + } else { + PF_ACPY(&s->lan.addr, daddr, af); + s->lan.port = icmpid; + PF_ACPY(&s->ext.addr, saddr, af); + s->ext.port = icmpid; + if (rdr != NULL) + PF_ACPY(&s->gwy.addr, &baddr, af); + else + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->gwy.port = icmpid; + } + s->creation = time.tv_sec; + s->expire = time.tv_sec; + s->timeout = PFTM_ICMP_FIRST_PACKET; + s->packets[0] = 1; + s->bytes[0] = pd->tot_len; + pf_set_rt_ifp(s, saddr); + if (pf_insert_state(s)) { + REASON_SET(&reason, PFRES_MEMORY); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + } + +#ifdef INET6 + /* copy back packet headers if we performed IPv6 NAT operations */ + if (rewrite) + m_copyback(m, off, sizeof(struct icmp6_hdr), + pd->hdr.icmp6); +#endif /* INET6 */ + + return (PF_PASS); +} + +int +pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, + struct ifnet *ifp, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, + struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nat = NULL, *rdr = NULL; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr baddr, naddr; + sa_family_t af = pd->af; + u_short reason; + struct pf_tag *pftag = NULL; + int tag = -1; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + /* check outgoing packet for BINAT/NAT */ + if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, saddr, 0, + daddr, 0, &naddr, NULL)) != NULL) { + PF_ACPY(&baddr, saddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(saddr, &naddr, af); + break; +#endif /* INET6 */ + } + if (nat->natpass) + r = NULL; + } + } else { + /* check incoming packet for BINAT/RDR */ + if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, 0, + daddr, 0, &naddr, NULL)) != NULL) { + PF_ACPY(&baddr, daddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(daddr, &naddr, af); + break; +#endif /* INET6 */ + } + if (rdr->natpass) + r = NULL; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) || + (r->ifp == ifp && r->ifnot))) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && + !pf_match_tag(m, r, nat, rdr, pftag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + r->packets++; + r->bytes += pd->tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd->tot_len; + } + REASON_SET(&reason, PFRES_MATCH); + if (r->log) + PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset); + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + struct pf_addr *a = NULL; + + if (nat != NULL) + a = saddr; + else if (rdr != NULL) + a = daddr; + if (a != NULL) { + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&a->v4.s_addr, pd->ip_sum, + baddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(a, &baddr, af); + break; +#endif /* INET6 */ + } + } + if ((af == AF_INET) && r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if ((af == AF_INET6) && r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action != PF_PASS) + return (PF_DROP); + + if (pf_tag_packet(m, pftag, tag)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + if (r->keep_state || nat != NULL || rdr != NULL) { + /* create new state */ + struct pf_state *s = NULL; + + if (!r->max_states || r->states < r->max_states) + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + if (nat != NULL) + s->nat_rule.ptr = nat; + else + s->nat_rule.ptr = rdr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = pd->proto; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + PF_ACPY(&s->ext.addr, daddr, af); + if (nat != NULL) + PF_ACPY(&s->lan.addr, &baddr, af); + else + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + } else { + PF_ACPY(&s->lan.addr, daddr, af); + PF_ACPY(&s->ext.addr, saddr, af); + if (rdr != NULL) + PF_ACPY(&s->gwy.addr, &baddr, af); + else + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + } + s->src.state = PFOTHERS_SINGLE; + s->dst.state = PFOTHERS_NO_TRAFFIC; + s->creation = time.tv_sec; + s->expire = time.tv_sec; + s->timeout = PFTM_OTHER_FIRST_PACKET; + s->packets[0] = 1; + s->bytes[0] = pd->tot_len; + pf_set_rt_ifp(s, saddr); + if (pf_insert_state(s)) { + REASON_SET(&reason, PFRES_MEMORY); + if (r->log) + PFLOG_PACKET(ifp, h, m, af, direction, reason, + r, a, ruleset); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + } + + return (PF_PASS); +} + +int +pf_test_fragment(struct pf_rule **rm, int direction, struct ifnet *ifp, + struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, + struct pf_ruleset **rsm) +{ + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + sa_family_t af = pd->af; + u_short reason; + struct pf_tag *pftag = NULL; + int tag = -1; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) || + (r->ifp == ifp && r->ifnot))) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->src.port_op || r->dst.port_op || + r->flagset || r->type || r->code || + r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && + !pf_match_tag(m, r, NULL, NULL, pftag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else { + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + r->packets++; + r->bytes += pd->tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd->tot_len; + } + REASON_SET(&reason, PFRES_MATCH); + if (r->log) + PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset); + + if (r->action != PF_PASS) + return (PF_DROP); + + if (pf_tag_packet(m, pftag, tag)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + return (PF_PASS); +} + +int +pf_test_state_tcp(struct pf_state **state, int direction, struct ifnet *ifp, + struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd, + u_short *reason) +{ + struct pf_tree_node key; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t win = ntohs(th->th_win); + u_int32_t ack, end, seq; + u_int8_t sws, dws; + int ackskew, dirndx; + int copyback = 0; + struct pf_state_peer *src, *dst; + + key.af = pd->af; + key.proto = IPPROTO_TCP; + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = th->th_sport; + key.port[1] = th->th_dport; + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + dirndx = 0; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + dirndx = 1; + } + + if ((*state)->src.state == PF_TCPS_PROXY_SRC) { + if (direction != (*state)->direction) + return (PF_SYNPROXY_DROP); + if (th->th_flags & TH_SYN) { + if (ntohl(th->th_seq) != (*state)->src.seqlo) + return (PF_DROP); + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + (*state)->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0); + return (PF_SYNPROXY_DROP); + } else if (!(th->th_flags & TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + return (PF_DROP); + else + (*state)->src.state = PF_TCPS_PROXY_DST; + } + if ((*state)->src.state == PF_TCPS_PROXY_DST) { + struct pf_state_host *src, *dst; + + if (direction == PF_OUT) { + src = &(*state)->gwy; + dst = &(*state)->ext; + } else { + src = &(*state)->ext; + dst = &(*state)->lan; + } + if (direction == (*state)->direction) { + if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + return (PF_DROP); + (*state)->src.max_win = MAX(ntohs(th->th_win), 1); + if ((*state)->dst.seqhi == 1) + (*state)->dst.seqhi = arc4random(); + pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + &dst->addr, src->port, dst->port, + (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0); + return (PF_SYNPROXY_DROP); + } else if (((th->th_flags & (TH_SYN|TH_ACK)) != + (TH_SYN|TH_ACK)) || + (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) + return (PF_DROP); + else { + (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); + (*state)->dst.seqlo = ntohl(th->th_seq); + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ntohl(th->th_seq) + 1, + TH_ACK, (*state)->src.max_win, 0, 0); + pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + &dst->addr, src->port, dst->port, + (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, + TH_ACK, (*state)->dst.max_win, 0, 0); + (*state)->src.seqdiff = (*state)->dst.seqhi - + (*state)->src.seqlo; + (*state)->dst.seqdiff = (*state)->src.seqhi - + (*state)->dst.seqlo; + (*state)->src.seqhi = (*state)->src.seqlo + + (*state)->src.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->dst.max_win; + (*state)->src.wscale = (*state)->dst.wscale = 0; + (*state)->src.state = (*state)->dst.state = + TCPS_ESTABLISHED; + return (PF_SYNPROXY_DROP); + } + } + + if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { + sws = src->wscale & PF_WSCALE_MASK; + dws = dst->wscale & PF_WSCALE_MASK; + } else + sws = dws = 0; + + /* + * Sequence tracking algorithm from Guido van Rooij's paper: + * http://www.madison-gurkha.com/publications/tcp_filtering/ + * tcp_filtering.ps + */ + + seq = ntohl(th->th_seq); + if (src->seqlo == 0) { + /* First packet from this end. Set its state */ + + if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && + src->scrub == NULL) { + if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + } + + /* Deferred generation of sequence number modulator */ + if (dst->seqdiff && !src->seqdiff) { + while ((src->seqdiff = arc4random()) == 0) + ; + ack = ntohl(th->th_ack) - dst->seqdiff; + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + copyback = 1; + } else { + ack = ntohl(th->th_ack); + } + + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) { + end++; + if (dst->wscale & PF_WSCALE_FLAG) { + src->wscale = pf_get_wscale(m, off, th->th_off, + pd->af); + if (src->wscale & PF_WSCALE_FLAG) { + /* Remove scale factor from initial + * window */ + sws = src->wscale & PF_WSCALE_MASK; + win = ((u_int32_t)win + (1 << sws) - 1) + >> sws; + dws = dst->wscale & PF_WSCALE_MASK; + } else { + /* fixup other window */ + dst->max_win <<= dst->wscale & + PF_WSCALE_MASK; + /* in case of a retrans SYN|ACK */ + dst->wscale = 0; + } + } + } + if (th->th_flags & TH_FIN) + end++; + + src->seqlo = seq; + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + + /* + * May need to slide the window (seqhi may have been set by + * the crappy stack check or if we picked up the connection + * after establishment) + */ + if (src->seqhi == 1 || + SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) + src->seqhi = end + MAX(1, dst->max_win << dws); + if (win > src->max_win) + src->max_win = win; + + } else { + ack = ntohl(th->th_ack) - dst->seqdiff; + if (src->seqdiff) { + /* Modulate sequence numbers */ + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + copyback = 1; + } + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) + end++; + if (th->th_flags & TH_FIN) + end++; + } + + if ((th->th_flags & TH_ACK) == 0) { + /* Let it pass through the ack skew check */ + ack = dst->seqlo; + } else if ((ack == 0 && + (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || + /* broken tcp stacks do not set ack */ + (dst->state < TCPS_SYN_SENT)) { + /* + * Many stacks (ours included) will set the ACK number in an + * FIN|ACK if the SYN times out -- no sequence to ACK. + */ + ack = dst->seqlo; + } + + if (seq == end) { + /* Ease sequencing restrictions on no data packets */ + seq = src->seqlo; + end = seq; + } + + ackskew = dst->seqlo - ack; + +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ + if (SEQ_GEQ(src->seqhi, end) && + /* Last octet inside other's window space */ + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && + /* Retrans: not more than one window back */ + (ackskew >= -MAXACKWINDOW) && + /* Acking not more than one reassembled fragment backwards */ + (ackskew <= (MAXACKWINDOW << sws))) { + /* Acking not more than one window forward */ + + (*state)->packets[dirndx]++; + (*state)->bytes[dirndx] += pd->tot_len; + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + + /* update states */ + if (th->th_flags & TH_SYN) + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_ACK) { + if (dst->state == TCPS_SYN_SENT) + dst->state = TCPS_ESTABLISHED; + else if (dst->state == TCPS_CLOSING) + dst->state = TCPS_FIN_WAIT_2; + } + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* update expire time */ + (*state)->expire = time.tv_sec; + if (src->state >= TCPS_FIN_WAIT_2 && + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_CLOSED; + else if (src->state >= TCPS_FIN_WAIT_2 || + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_FIN_WAIT; + else if (src->state < TCPS_ESTABLISHED || + dst->state < TCPS_ESTABLISHED) + (*state)->timeout = PFTM_TCP_OPENING; + else if (src->state >= TCPS_CLOSING || + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_CLOSING; + else + (*state)->timeout = PFTM_TCP_ESTABLISHED; + + /* Fall through to PASS packet */ + + } else if ((dst->state < TCPS_SYN_SENT || + dst->state >= TCPS_FIN_WAIT_2 || + src->state >= TCPS_FIN_WAIT_2) && + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && + /* Within a window forward of the originating packet */ + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { + /* Within a window backward of the originating packet */ + + /* + * This currently handles three situations: + * 1) Stupid stacks will shotgun SYNs before their peer + * replies. + * 2) When PF catches an already established stream (the + * firewall rebooted, the state table was flushed, routes + * changed...) + * 3) Packets get funky immediately after the connection + * closes (this should catch Solaris spurious ACK|FINs + * that web servers like to spew after a close) + * + * This must be a little more careful than the above code + * since packet floods will also be caught here. We don't + * update the TTL here to mitigate the damage of a packet + * flood and so the same code can handle awkward establishment + * and a loosened connection close. + * In the establishment case, a correct peer response will + * validate the connection, go through the normal state code + * and keep updating the state TTL. + */ + + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: loose state match: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n", + seq, ack, pd->p_len, ackskew, + (*state)->packets[0], (*state)->packets[1]); + } + + (*state)->packets[dirndx]++; + (*state)->bytes[dirndx] += pd->tot_len; + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + /* + * Cannot set dst->seqhi here since this could be a shotgunned + * SYN and not an already established connection. + */ + + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* Fall through to PASS packet */ + + } else { + if ((*state)->dst.state == TCPS_SYN_SENT && + (*state)->src.state == TCPS_SYN_SENT) { + /* Send RST for state mismatches during handshake */ + if (!(th->th_flags & TH_RST)) { + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; + pf_send_tcp((*state)->rule.ptr, pd->af, + pd->dst, pd->src, th->th_dport, + th->th_sport, ntohl(th->th_ack), ack, + TH_RST|TH_ACK, 0, 0, + (*state)->rule.ptr->return_ttl); + } + src->seqlo = 0; + src->seqhi = 1; + src->max_win = 1; + } else if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD state: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d " + "dir=%s,%s\n", seq, ack, pd->p_len, ackskew, + (*state)->packets[0], (*state)->packets[1], + direction == PF_IN ? "in" : "out", + direction == (*state)->direction ? "fwd" : "rev"); + printf("pf: State failure on: %c %c %c %c | %c %c\n", + SEQ_GEQ(src->seqhi, end) ? ' ' : '1', + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? + ' ': '2', + (ackskew >= -MAXACKWINDOW) ? ' ' : '3', + (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); + } + return (PF_DROP); + } + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, src, dst, + ©back)) + return (PF_DROP); + } + + /* Any packets which have gotten here are to be passed */ + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE(*state)) { + if (direction == PF_OUT) + pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &(*state)->gwy.addr, + (*state)->gwy.port, 0, pd->af); + else + pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &(*state)->lan.addr, + (*state)->lan.port, 0, pd->af); + m_copyback(m, off, sizeof(*th), th); + } else if (copyback) { + /* Copyback sequence modulation or stateful scrub changes */ + m_copyback(m, off, sizeof(*th), th); + } + + (*state)->rule.ptr->packets++; + (*state)->rule.ptr->bytes += pd->tot_len; + if ((*state)->nat_rule.ptr != NULL) { + (*state)->nat_rule.ptr->packets++; + (*state)->nat_rule.ptr->bytes += pd->tot_len; + } + if ((*state)->anchor.ptr != NULL) { + (*state)->anchor.ptr->packets++; + (*state)->anchor.ptr->bytes += pd->tot_len; + } + return (PF_PASS); +} + +int +pf_test_state_udp(struct pf_state **state, int direction, struct ifnet *ifp, + struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd) +{ + struct pf_state_peer *src, *dst; + struct pf_tree_node key; + struct udphdr *uh = pd->hdr.udp; + int dirndx; + + key.af = pd->af; + key.proto = IPPROTO_UDP; + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = uh->uh_sport; + key.port[1] = uh->uh_dport; + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + dirndx = 0; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + dirndx = 1; + } + + (*state)->packets[dirndx]++; + (*state)->bytes[dirndx] += pd->tot_len; + + /* update states */ + if (src->state < PFUDPS_SINGLE) + src->state = PFUDPS_SINGLE; + if (dst->state == PFUDPS_SINGLE) + dst->state = PFUDPS_MULTIPLE; + + /* update expire time */ + (*state)->expire = time.tv_sec; + if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) + (*state)->timeout = PFTM_UDP_MULTIPLE; + else + (*state)->timeout = PFTM_UDP_SINGLE; + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE(*state)) { + if (direction == PF_OUT) + pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &(*state)->gwy.addr, + (*state)->gwy.port, 1, pd->af); + else + pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &(*state)->lan.addr, + (*state)->lan.port, 1, pd->af); + m_copyback(m, off, sizeof(*uh), uh); + } + + (*state)->rule.ptr->packets++; + (*state)->rule.ptr->bytes += pd->tot_len; + if ((*state)->nat_rule.ptr != NULL) { + (*state)->nat_rule.ptr->packets++; + (*state)->nat_rule.ptr->bytes += pd->tot_len; + } + if ((*state)->anchor.ptr != NULL) { + (*state)->anchor.ptr->packets++; + (*state)->anchor.ptr->bytes += pd->tot_len; + } + return (PF_PASS); +} + +int +pf_test_state_icmp(struct pf_state **state, int direction, struct ifnet *ifp, + struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd) +{ + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + u_int16_t icmpid, *icmpsum; + u_int8_t icmptype; + int state_icmp = 0, dirndx; + + switch (pd->proto) { +#ifdef INET + case IPPROTO_ICMP: + icmptype = pd->hdr.icmp->icmp_type; + icmpid = pd->hdr.icmp->icmp_id; + icmpsum = &pd->hdr.icmp->icmp_cksum; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + icmptype = pd->hdr.icmp6->icmp6_type; + icmpid = pd->hdr.icmp6->icmp6_id; + icmpsum = &pd->hdr.icmp6->icmp6_cksum; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ + } + + if (!state_icmp) { + + /* + * ICMP query/reply message not related to a TCP/UDP packet. + * Search for an ICMP state. + */ + struct pf_tree_node key; + + key.af = pd->af; + key.proto = pd->proto; + PF_ACPY(&key.addr[0], saddr, key.af); + PF_ACPY(&key.addr[1], daddr, key.af); + key.port[0] = icmpid; + key.port[1] = icmpid; + + STATE_LOOKUP(); + + dirndx = (direction == (*state)->direction) ? 0 : 1; + (*state)->packets[dirndx]++; + (*state)->bytes[dirndx] += pd->tot_len; + (*state)->expire = time.tv_sec; + (*state)->timeout = PFTM_ICMP_ERROR_REPLY; + + /* translate source/destination address, if necessary */ + if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) { + if (direction == PF_OUT) { + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + (*state)->gwy.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &(*state)->gwy.addr, 0); + m_copyback(m, off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + break; +#endif /* INET6 */ + } + } else { + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + (*state)->lan.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(daddr, + &pd->hdr.icmp6->icmp6_cksum, + &(*state)->lan.addr, 0); + m_copyback(m, off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + break; +#endif /* INET6 */ + } + } + } + + return (PF_PASS); + + } else { + /* + * ICMP error message in response to a TCP/UDP packet. + * Extract the inner TCP/UDP header and search for that state. + */ + + struct pf_pdesc pd2; +#ifdef INET + struct ip h2; +#endif /* INET */ +#ifdef INET6 + struct ip6_hdr h2_6; + int terminal = 0; +#endif /* INET6 */ + int ipoff2; + int off2; + + pd2.af = pd->af; + switch (pd->af) { +#ifdef INET + case AF_INET: + /* offset of h2 in mbuf chain */ + ipoff2 = off + ICMP_MINLEN; + + if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(ip)\n")); + return (PF_DROP); + } + /* + * ICMP error messages don't refer to non-first + * fragments + */ + if (h2.ip_off & htons(IP_OFFMASK)) + return (PF_DROP); + + /* offset of protocol header that follows h2 */ + off2 = ipoff2 + (h2.ip_hl << 2); + + pd2.proto = h2.ip_p; + pd2.src = (struct pf_addr *)&h2.ip_src; + pd2.dst = (struct pf_addr *)&h2.ip_dst; + pd2.ip_sum = &h2.ip_sum; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + ipoff2 = off + sizeof(struct icmp6_hdr); + + if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(ip6)\n")); + return (PF_DROP); + } + pd2.proto = h2_6.ip6_nxt; + pd2.src = (struct pf_addr *)&h2_6.ip6_src; + pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; + pd2.ip_sum = NULL; + off2 = ipoff2 + sizeof(h2_6); + do { + switch (pd2.proto) { + case IPPROTO_FRAGMENT: + /* + * ICMPv6 error messages for + * non-first fragments + */ + return (PF_DROP); + case IPPROTO_AH: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: { + /* get next header and header length */ + struct ip6_ext opt6; + + if (!pf_pull_hdr(m, off2, &opt6, + sizeof(opt6), NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMPv6 short opt\n")); + return (PF_DROP); + } + if (pd2.proto == IPPROTO_AH) + off2 += (opt6.ip6e_len + 2) * 4; + else + off2 += (opt6.ip6e_len + 1) * 8; + pd2.proto = opt6.ip6e_nxt; + /* goto the next header */ + break; + } + default: + terminal++; + break; + } + } while (!terminal); + break; +#endif /* INET6 */ + } + + switch (pd2.proto) { + case IPPROTO_TCP: { + struct tcphdr th; + u_int32_t seq; + struct pf_tree_node key; + struct pf_state_peer *src, *dst; + u_int8_t dws; + + /* + * Only the first 8 bytes of the TCP header can be + * expected. Don't access any TCP header fields after + * th_seq, an ackskew test is not possible. + */ + if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(tcp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_TCP; + PF_ACPY(&key.addr[0], pd2.dst, pd2.af); + key.port[0] = th.th_dport; + PF_ACPY(&key.addr[1], pd2.src, pd2.af); + key.port[1] = th.th_sport; + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->dst; + dst = &(*state)->src; + } else { + src = &(*state)->src; + dst = &(*state)->dst; + } + + if (src->wscale && dst->wscale && !(th.th_flags & TH_SYN)) + dws = dst->wscale & PF_WSCALE_MASK; + else + dws = 0; + + /* Demodulate sequence number */ + seq = ntohl(th.th_seq) - src->seqdiff; + if (src->seqdiff) + pf_change_a(&th.th_seq, &th.th_sum, + htonl(seq), 0); + + if (!SEQ_GEQ(src->seqhi, seq) || + !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD ICMP %d:%d ", + icmptype, pd->hdr.icmp->icmp_code); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" state: "); + pf_print_state(*state); + printf(" seq=%u\n", seq); + } + return (PF_DROP); + } + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &th.th_sport, + saddr, &(*state)->lan.addr, + (*state)->lan.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } else { + pf_change_icmp(pd2.dst, &th.th_dport, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } + switch (pd2.af) { +#ifdef INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), + &h2); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m_copyback(m, off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), + &h2_6); + break; +#endif /* INET6 */ + } + m_copyback(m, off2, 8, &th); + } else if (src->seqdiff) { + m_copyback(m, off2, 8, &th); + } + + return (PF_PASS); + break; + } + case IPPROTO_UDP: { + struct udphdr uh; + struct pf_tree_node key; + + if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(udp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_UDP; + PF_ACPY(&key.addr[0], pd2.dst, pd2.af); + key.port[0] = uh.uh_dport; + PF_ACPY(&key.addr[1], pd2.src, pd2.af); + key.port[1] = uh.uh_sport; + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &uh.uh_sport, + daddr, &(*state)->lan.addr, + (*state)->lan.port, &uh.uh_sum, + pd2.ip_sum, icmpsum, + pd->ip_sum, 1, pd2.af); + } else { + pf_change_icmp(pd2.dst, &uh.uh_dport, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, &uh.uh_sum, + pd2.ip_sum, icmpsum, + pd->ip_sum, 1, pd2.af); + } + switch (pd2.af) { +#ifdef INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m_copyback(m, off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), + &h2_6); + break; +#endif /* INET6 */ + } + m_copyback(m, off2, sizeof(uh), &uh); + } + + return (PF_PASS); + break; + } +#ifdef INET + case IPPROTO_ICMP: { + struct icmp iih; + struct pf_tree_node key; + + if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short i" + "(icmp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_ICMP; + PF_ACPY(&key.addr[0], pd2.dst, pd2.af); + key.port[0] = iih.icmp_id; + PF_ACPY(&key.addr[1], pd2.src, pd2.af); + key.port[1] = iih.icmp_id; + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &iih.icmp_id, + daddr, &(*state)->lan.addr, + (*state)->lan.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET); + } else { + pf_change_icmp(pd2.dst, &iih.icmp_id, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET); + } + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); + m_copyback(m, off2, ICMP_MINLEN, &iih); + } + + return (PF_PASS); + break; + } +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: { + struct icmp6_hdr iih; + struct pf_tree_node key; + + if (!pf_pull_hdr(m, off2, &iih, + sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(icmp6)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_ICMPV6; + PF_ACPY(&key.addr[0], pd2.dst, pd2.af); + key.port[0] = iih.icmp6_id; + PF_ACPY(&key.addr[1], pd2.src, pd2.af); + key.port[1] = iih.icmp6_id; + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &iih.icmp6_id, + daddr, &(*state)->lan.addr, + (*state)->lan.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET6); + } else { + pf_change_icmp(pd2.dst, &iih.icmp6_id, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET6); + } + m_copyback(m, off, sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); + m_copyback(m, off2, sizeof(struct icmp6_hdr), + &iih); + } + + return (PF_PASS); + break; + } +#endif /* INET6 */ + default: { + struct pf_tree_node key; + + key.af = pd2.af; + key.proto = pd2.proto; + PF_ACPY(&key.addr[0], pd2.dst, pd2.af); + key.port[0] = 0; + PF_ACPY(&key.addr[1], pd2.src, pd2.af); + key.port[1] = 0; + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, NULL, + daddr, &(*state)->lan.addr, + 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } else { + pf_change_icmp(pd2.dst, NULL, + saddr, &(*state)->gwy.addr, + 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } + switch (pd2.af) { +#ifdef INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m_copyback(m, off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), + &h2_6); + break; +#endif /* INET6 */ + } + } + + return (PF_PASS); + break; + } + } + } +} + +int +pf_test_state_other(struct pf_state **state, int direction, struct ifnet *ifp, + struct pf_pdesc *pd) +{ + struct pf_state_peer *src, *dst; + struct pf_tree_node key; + int dirndx; + + key.af = pd->af; + key.proto = pd->proto; + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = 0; + key.port[1] = 0; + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + dirndx = 0; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + dirndx = 1; + } + + (*state)->packets[dirndx]++; + (*state)->bytes[dirndx] += pd->tot_len; + + /* update states */ + if (src->state < PFOTHERS_SINGLE) + src->state = PFOTHERS_SINGLE; + if (dst->state == PFOTHERS_SINGLE) + dst->state = PFOTHERS_MULTIPLE; + + /* update expire time */ + (*state)->expire = time.tv_sec; + if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) + (*state)->timeout = PFTM_OTHER_MULTIPLE; + else + (*state)->timeout = PFTM_OTHER_SINGLE; + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE(*state)) { + if (direction == PF_OUT) + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, (*state)->gwy.addr.v4.s_addr, + 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af); + break; +#endif /* INET6 */ + } + else + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, (*state)->lan.addr.v4.s_addr, + 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af); + break; +#endif /* INET6 */ + } + } + + (*state)->rule.ptr->packets++; + (*state)->rule.ptr->bytes += pd->tot_len; + if ((*state)->nat_rule.ptr != NULL) { + (*state)->nat_rule.ptr->packets++; + (*state)->nat_rule.ptr->bytes += pd->tot_len; + } + if ((*state)->anchor.ptr != NULL) { + (*state)->anchor.ptr->packets++; + (*state)->anchor.ptr->bytes += pd->tot_len; + } + return (PF_PASS); +} + +/* + * ipoff and off are measured from the start of the mbuf chain. + * h must be at "ipoff" on the mbuf chain. + */ +void * +pf_pull_hdr(struct mbuf *m, int off, void *p, int len, + u_short *actionp, u_short *reasonp, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + + if (fragoff) { + if (fragoff >= len) + ACTION_SET(actionp, PF_PASS); + else { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_FRAG); + } + return (NULL); + } + if (m->m_pkthdr.len < off + len || ntohs(h->ip_len) < off + len) { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_SHORT); + return (NULL); + } + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + + if (m->m_pkthdr.len < off + len || + (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < + (unsigned)(off + len)) { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_SHORT); + return (NULL); + } + break; + } +#endif /* INET6 */ + } + m_copydata(m, off, len, p); + return (p); +} + +int +pf_routable(struct pf_addr *addr, sa_family_t af) +{ + struct sockaddr_in *dst; + struct route ro; + int ret = 0; + + bzero(&ro, sizeof(ro)); + dst = satosin(&ro.ro_dst); + dst->sin_family = af; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + rtalloc_noclone(&ro, NO_CLONING); + + if (ro.ro_rt != NULL) { + ret = 1; + RTFREE(ro.ro_rt); + } + + return (ret); +} + +#ifdef INET +void +pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, + struct pf_state *s) +{ + struct mbuf *m0, *m1; + struct route iproute; + struct route *ro; + struct sockaddr_in *dst; + struct ip *ip; + struct ifnet *ifp = NULL; + struct m_tag *mtag; + struct pf_addr naddr; + int error = 0; + + if (m == NULL || *m == NULL || r == NULL || + (dir != PF_IN && dir != PF_OUT) || oifp == NULL) + panic("pf_route: invalid parameters"); + + if (r->rt == PF_DUPTO) { + m0 = *m; + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); + if (mtag == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); + if (mtag == NULL) + goto bad; + m_tag_prepend(m0, mtag); + } + m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT); + if (m0 == NULL) + return; + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + return; + m0 = *m; + } + + if (m0->m_len < sizeof(struct ip)) + panic("pf_route: m0->m_len < sizeof(struct ip)"); + ip = mtod(m0, struct ip *); + + ro = &iproute; + bzero((caddr_t)ro, sizeof(*ro)); + dst = satosin(&ro->ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = ip->ip_dst; + + if (r->rt == PF_FASTROUTE) { + rtalloc(ro); + if (ro->ro_rt == 0) { + ipstat.ips_noroute++; + goto bad; + } + + ifp = ro->ro_rt->rt_ifp; + ro->ro_rt->rt_use++; + + if (ro->ro_rt->rt_flags & RTF_GATEWAY) + dst = satosin(ro->ro_rt->rt_gateway); + } else { + if (TAILQ_EMPTY(&r->rpool.list)) + panic("pf_route: TAILQ_EMPTY(&r->rpool.list)"); + if (s == NULL) { + pf_map_addr(AF_INET, &r->rpool, + (struct pf_addr *)&ip->ip_src, + &naddr, NULL); + if (!PF_AZERO(&naddr, AF_INET)) + dst->sin_addr.s_addr = naddr.v4.s_addr; + ifp = r->rpool.cur->ifp; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET)) + dst->sin_addr.s_addr = + s->rt_addr.v4.s_addr; + ifp = s->rt_ifp; + } + } + + if (ifp == NULL) + goto bad; + + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); + if (mtag == NULL) { + struct m_tag *mtag; + + mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); + if (mtag == NULL) + goto bad; + m_tag_prepend(m0, mtag); + } + + if (oifp != ifp && mtag == NULL) { + if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + if (m0->m_len < sizeof(struct ip)) + panic("pf_route: m0->m_len < sizeof(struct ip)"); + ip = mtod(m0, struct ip *); + } + + /* Copied from ip_output. */ + if (ntohs(ip->ip_len) <= ifp->if_mtu) { + if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && + ifp->if_bridge == NULL) { + m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT; + ipstat.ips_outhwcsum++; + } else { + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); + } + /* Update relevant hardware checksum stats for TCP/UDP */ + if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) + tcpstat.tcps_outhwcsum++; + else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) + udpstat.udps_outhwcsum++; + error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & htons(IP_DF)) { + ipstat.ips_cantfrag++; + if (r->rt != PF_DUPTO) { + icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, + ifp); + goto done; + } else + goto bad; + } + + m1 = m0; + error = ip_fragment(m0, ifp, ifp->if_mtu); + if (error == EMSGSIZE) + goto bad; + + for (m0 = m1; m0; m0 = m1) { + m1 = m0->m_nextpkt; + m0->m_nextpkt = 0; + if (error == 0) + error = (*ifp->if_output)(ifp, m0, sintosa(dst), + NULL); + else + m_freem(m0); + } + + if (error == 0) + ipstat.ips_fragmented++; + +done: + if (r->rt != PF_DUPTO) + *m = NULL; + if (ro == &iproute && ro->ro_rt) + RTFREE(ro->ro_rt); + return; + +bad: + m_freem(m0); + goto done; +} +#endif /* INET */ + +#ifdef INET6 +void +pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, + struct pf_state *s) +{ + struct mbuf *m0; + struct m_tag *mtag; + struct route_in6 ip6route; + struct route_in6 *ro; + struct sockaddr_in6 *dst; + struct ip6_hdr *ip6; + struct ifnet *ifp = NULL; + struct pf_addr naddr; + int error = 0; + + if (m == NULL || *m == NULL || r == NULL || + (dir != PF_IN && dir != PF_OUT) || oifp == NULL) + panic("pf_route6: invalid parameters"); + + if (r->rt == PF_DUPTO) { + m0 = *m; + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); + if (mtag == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); + if (mtag == NULL) + goto bad; + m_tag_prepend(m0, mtag); + } + m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT); + if (m0 == NULL) + return; + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + return; + m0 = *m; + } + + if (m0->m_len < sizeof(struct ip6_hdr)) + panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + ip6 = mtod(m0, struct ip6_hdr *); + + ro = &ip6route; + bzero((caddr_t)ro, sizeof(*ro)); + dst = (struct sockaddr_in6 *)&ro->ro_dst; + dst->sin6_family = AF_INET6; + dst->sin6_len = sizeof(*dst); + dst->sin6_addr = ip6->ip6_dst; + + /* Cheat. */ + if (r->rt == PF_FASTROUTE) { + mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); + if (mtag == NULL) + goto bad; + m_tag_prepend(m0, mtag); + ip6_output(m0, NULL, NULL, 0, NULL, NULL); + return; + } + + if (TAILQ_EMPTY(&r->rpool.list)) + panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)"); + if (s == NULL) { + pf_map_addr(AF_INET6, &r->rpool, + (struct pf_addr *)&ip6->ip6_src, &naddr, NULL); + if (!PF_AZERO(&naddr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst->sin6_addr, + &naddr, AF_INET6); + ifp = r->rpool.cur->ifp; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst->sin6_addr, + &s->rt_addr, AF_INET6); + ifp = s->rt_ifp; + } + + if (ifp == NULL) + goto bad; + + if (oifp != ifp) { + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL); + if (mtag == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT); + if (mtag == NULL) + goto bad; + m_tag_prepend(m0, mtag); + if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + } + } + + /* + * If the packet is too large for the outgoing interface, + * send back an icmp6 error. + */ + if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) + dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); + if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { + error = nd6_output(ifp, ifp, m0, dst, NULL); + } else { + in6_ifstat_inc(ifp, ifs6_in_toobig); + if (r->rt != PF_DUPTO) + icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); + else + goto bad; + } + +done: + if (r->rt != PF_DUPTO) + *m = NULL; + return; + +bad: + m_freem(m0); + goto done; +} +#endif /* INET6 */ + + +/* + * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag + * off is the offset where the protocol header starts + * len is the total length of protocol header plus payload + * returns 0 when the checksum is valid, otherwise returns 1. + */ +int +pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) +{ + u_int16_t flag_ok, flag_bad; + u_int16_t sum; + + switch (p) { + case IPPROTO_TCP: + flag_ok = M_TCP_CSUM_IN_OK; + flag_bad = M_TCP_CSUM_IN_BAD; + break; + case IPPROTO_UDP: + flag_ok = M_UDP_CSUM_IN_OK; + flag_bad = M_UDP_CSUM_IN_BAD; + break; + case IPPROTO_ICMP: +#ifdef INET6 + case IPPROTO_ICMPV6: +#endif /* INET6 */ + flag_ok = flag_bad = 0; + break; + default: + return (1); + } + if (m->m_pkthdr.csum & flag_ok) + return (0); + if (m->m_pkthdr.csum & flag_bad) + return (1); + if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) + return (1); + if (m->m_pkthdr.len < off + len) + return (1); + switch (af) { + case AF_INET: + if (p == IPPROTO_ICMP) { + if (m->m_len < off) + return (1); + m->m_data += off; + m->m_len -= off; + sum = in_cksum(m, len); + m->m_data -= off; + m->m_len += off; + } else { + if (m->m_len < sizeof(struct ip)) + return (1); + sum = in4_cksum(m, p, off, len); + } + break; +#ifdef INET6 + case AF_INET6: + if (m->m_len < sizeof(struct ip6_hdr)) + return (1); + sum = in6_cksum(m, p, off, len); + break; +#endif /* INET6 */ + default: + return (1); + } + if (sum) { + m->m_pkthdr.csum |= flag_bad; + switch (p) { + case IPPROTO_TCP: + tcpstat.tcps_rcvbadsum++; + break; + case IPPROTO_UDP: + udpstat.udps_badsum++; + break; + case IPPROTO_ICMP: + icmpstat.icps_checksum++; + break; +#ifdef INET6 + case IPPROTO_ICMPV6: + icmp6stat.icp6s_checksum++; + break; +#endif /* INET6 */ + } + return (1); + } + m->m_pkthdr.csum |= flag_ok; + return (0); +} + +#ifdef INET +int +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) +{ + u_short action, reason = 0, log = 0; + struct mbuf *m = *m0; + struct ip *h; + struct pf_rule *a = NULL, *r = &pf_default_rule, *tr; + struct pf_state *s = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_pdesc pd; + int off; + int pqid = 0; + + if (!pf_status.running || + (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + return (PF_PASS); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("non-M_PKTHDR is passed to pf_test"); +#endif + + if (m->m_pkthdr.len < (int)sizeof(*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + /* We do IP header normalization and packet reassembly here */ + if (pf_normalize_ip(m0, dir, ifp, &reason) != PF_PASS) { + action = PF_DROP; + goto done; + } + m = *m0; + h = mtod(m, struct ip *); + + off = h->ip_hl << 2; + if (off < (int)sizeof(*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + memset(&pd, 0, sizeof(pd)); + pd.src = (struct pf_addr *)&h->ip_src; + pd.dst = (struct pf_addr *)&h->ip_dst; + pd.ip_sum = &h->ip_sum; + pd.proto = h->ip_p; + pd.af = AF_INET; + pd.tos = h->ip_tos; + pd.tot_len = ntohs(h->ip_len); + + /* handle fragments that didn't get reassembled by normalization */ + if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { + action = pf_test_fragment(&r, dir, ifp, m, h, + &pd, &a, &ruleset); + goto done; + } + + switch (h->ip_p) { + + case IPPROTO_TCP: { + struct tcphdr th; + + pd.hdr.tcp = &th; + if (!pf_pull_hdr(m, off, &th, sizeof(th), + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) { + action = PF_DROP; + goto done; + } + pd.p_len = pd.tot_len - off - (th.th_off << 2); + if ((th.th_flags & TH_ACK) && pd.p_len == 0) + pqid = 1; + action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &pd); + if (action == PF_DROP) + break; + action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h, &pd, + &reason); + if (action == PF_PASS) { + r = s->rule.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_tcp(&r, &s, dir, ifp, + m, 0, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_UDP: { + struct udphdr uh; + + pd.hdr.udp = &uh; + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &pd); + if (action == PF_PASS) { + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_udp(&r, &s, dir, ifp, + m, 0, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_ICMP: { + struct icmp ih; + + pd.hdr.icmp = &ih; + if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_icmp(&s, dir, ifp, m, 0, off, h, &pd); + if (action == PF_PASS) { + r = s->rule.ptr; + r->packets++; + r->bytes += ntohs(h->ip_len); + a = s->anchor.ptr; + if (a != NULL) { + a->packets++; + a->bytes += ntohs(h->ip_len); + } + log = s->log; + } else if (s == NULL) + action = pf_test_icmp(&r, &s, dir, ifp, + m, 0, off, h, &pd, &a, &ruleset); + break; + } + + default: + action = pf_test_state_other(&s, dir, ifp, &pd); + if (action == PF_PASS) { + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_other(&r, &s, dir, ifp, m, off, h, + &pd, &a, &ruleset); + break; + } + + if (ifp == status_ifp) { + pf_status.bcounters[0][dir == PF_OUT] += pd.tot_len; + pf_status.pcounters[0][dir == PF_OUT][action != PF_PASS]++; + } + +done: + tr = r; + if (r == &pf_default_rule && s != NULL && s->nat_rule.ptr != NULL) + tr = s->nat_rule.ptr; + if (tr->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->src.not); + if (tr->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->dst.not); + + if (action == PF_PASS && h->ip_hl > 5 && + !((s && s->allow_opts) || r->allow_opts)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet with ip options\n")); + } + +#ifdef ALTQ + if (action == PF_PASS && r->qid) { + struct m_tag *mtag; + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + if (pqid || pd.tos == IPTOS_LOWDELAY) + atag->qid = r->pqid; + else + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = AF_INET; + atag->hdr = h; + m_tag_prepend(m, mtag); + } + } +#endif + + if (log) + PFLOG_PACKET(ifp, h, m, AF_INET, dir, reason, r, a, ruleset); + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) + /* pf_route can free the mbuf causing *m0 to become NULL */ + pf_route(m0, r, dir, ifp, s); + + return (action); +} +#endif /* INET */ + +#ifdef INET6 +int +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) +{ + u_short action, reason = 0, log = 0; + struct mbuf *m = *m0; + struct ip6_hdr *h; + struct pf_rule *a = NULL, *r = &pf_default_rule, *tr; + struct pf_state *s = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_pdesc pd; + int off, terminal = 0; + + if (!pf_status.running || + (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + return (PF_PASS); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("non-M_PKTHDR is passed to pf_test"); +#endif + + if (m->m_pkthdr.len < (int)sizeof(*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + /* We do IP header normalization and packet reassembly here */ + if (pf_normalize_ip6(m0, dir, ifp, &reason) != PF_PASS) { + action = PF_DROP; + goto done; + } + m = *m0; + h = mtod(m, struct ip6_hdr *); + + memset(&pd, 0, sizeof(pd)); + pd.src = (struct pf_addr *)&h->ip6_src; + pd.dst = (struct pf_addr *)&h->ip6_dst; + pd.ip_sum = NULL; + pd.af = AF_INET6; + pd.tos = 0; + pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); + + off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); + pd.proto = h->ip6_nxt; + do { + switch (pd.proto) { + case IPPROTO_FRAGMENT: + action = pf_test_fragment(&r, dir, ifp, m, h, + &pd, &a, &ruleset); + if (action == PF_DROP) + REASON_SET(&reason, PFRES_FRAG); + goto done; + case IPPROTO_AH: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: { + /* get next header and header length */ + struct ip6_ext opt6; + + if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), + NULL, NULL, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short opt\n")); + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + if (pd.proto == IPPROTO_AH) + off += (opt6.ip6e_len + 2) * 4; + else + off += (opt6.ip6e_len + 1) * 8; + pd.proto = opt6.ip6e_nxt; + /* goto the next header */ + break; + } + default: + terminal++; + break; + } + } while (!terminal); + + switch (pd.proto) { + + case IPPROTO_TCP: { + struct tcphdr th; + + pd.hdr.tcp = &th; + if (!pf_pull_hdr(m, off, &th, sizeof(th), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) { + action = PF_DROP; + goto done; + } + pd.p_len = pd.tot_len - off - (th.th_off << 2); + action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &pd); + if (action == PF_DROP) + break; + action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h, &pd, + &reason); + if (action == PF_PASS) { + r = s->rule.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_tcp(&r, &s, dir, ifp, + m, 0, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_UDP: { + struct udphdr uh; + + pd.hdr.udp = &uh; + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &pd); + if (action == PF_PASS) { + r = s->rule.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_udp(&r, &s, dir, ifp, + m, 0, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_ICMPV6: { + struct icmp6_hdr ih; + + pd.hdr.icmp6 = &ih; + if (!pf_pull_hdr(m, off, &ih, sizeof(ih), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_icmp(&s, dir, ifp, + m, 0, off, h, &pd); + if (action == PF_PASS) { + r = s->rule.ptr; + r->packets++; + r->bytes += h->ip6_plen; + log = s->log; + } else if (s == NULL) + action = pf_test_icmp(&r, &s, dir, ifp, + m, 0, off, h, &pd, &a, &ruleset); + break; + } + + default: + action = pf_test_other(&r, &s, dir, ifp, m, off, h, + &pd, &a, &ruleset); + break; + } + + if (ifp == status_ifp) { + pf_status.bcounters[1][dir == PF_OUT] += pd.tot_len; + pf_status.pcounters[1][dir == PF_OUT][action != PF_PASS]++; + } + +done: + tr = r; + if (r == &pf_default_rule && s != NULL && s->nat_rule.ptr != NULL) + tr = s->nat_rule.ptr; + if (tr->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->src.not); + if (tr->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->dst.not); + + /* XXX handle IPv6 options, if not allowed. not implemented. */ + +#ifdef ALTQ + if (action == PF_PASS && r->qid) { + struct m_tag *mtag; + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + if (pd.tos == IPTOS_LOWDELAY) + atag->qid = r->pqid; + else + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = AF_INET6; + atag->hdr = h; + m_tag_prepend(m, mtag); + } + } +#endif + + if (log) + PFLOG_PACKET(ifp, h, m, AF_INET6, dir, reason, r, a, ruleset); + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) + /* pf_route6 can free the mbuf causing *m0 to become NULL */ + pf_route6(m0, r, dir, ifp, s); + + return (action); +} +#endif /* INET6 */ diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c new file mode 100644 index 000000000000..0607f96b7e6c --- /dev/null +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -0,0 +1,2158 @@ +/* $OpenBSD: pf_ioctl.c,v 1.81 2003/08/22 21:50:34 david Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/filio.h> +#include <sys/fcntl.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/kernel.h> +#include <sys/time.h> +#include <sys/timeout.h> +#include <sys/pool.h> +#include <sys/malloc.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip_icmp.h> + +#include <net/pfvar.h> + +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet/in_pcb.h> +#endif /* INET6 */ + +#ifdef ALTQ +#include <altq/altq.h> +#endif + +void pfattach(int); +int pfopen(dev_t, int, int, struct proc *); +int pfclose(dev_t, int, int, struct proc *); +struct pf_pool *pf_get_pool(char *, char *, u_int32_t, + u_int8_t, u_int8_t, u_int8_t, u_int8_t, u_int8_t); +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +void pf_mv_pool(struct pf_palist *, struct pf_palist *); +void pf_empty_pool(struct pf_palist *); +int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); + +extern struct timeout pf_expire_to; + +struct pf_rule pf_default_rule; + +#define TAGID_MAX 50000 +TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags); + +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x + +void +pfattach(int num) +{ + u_int32_t *timeout = pf_default_rule.timeout; + + pool_init(&pf_tree_pl, sizeof(struct pf_tree_node), 0, 0, 0, "pftrpl", + NULL); + pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl", + &pool_allocator_nointr); + pool_init(&pf_addr_pl, sizeof(struct pf_addr_dyn), 0, 0, 0, "pfaddrpl", + &pool_allocator_nointr); + pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", + NULL); + pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", + NULL); + pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, + "pfpooladdrpl", NULL); + pfr_initialize(); + pf_osfp_initialize(); + + pool_sethardlimit(&pf_state_pl, pf_pool_limits[PF_LIMIT_STATES].limit, + NULL, 0); + + RB_INIT(&tree_lan_ext); + RB_INIT(&tree_ext_gwy); + TAILQ_INIT(&pf_anchors); + pf_init_ruleset(&pf_main_ruleset); + TAILQ_INIT(&pf_altqs[0]); + TAILQ_INIT(&pf_altqs[1]); + TAILQ_INIT(&pf_pabuf); + pf_altqs_active = &pf_altqs[0]; + pf_altqs_inactive = &pf_altqs[1]; + + /* default rule should never be garbage collected */ + pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; + pf_default_rule.action = PF_PASS; + pf_default_rule.nr = -1; + + /* initialize default timeouts */ + timeout[PFTM_TCP_FIRST_PACKET] = 120; /* First TCP packet */ + timeout[PFTM_TCP_OPENING] = 30; /* No response yet */ + timeout[PFTM_TCP_ESTABLISHED] = 24*60*60; /* Established */ + timeout[PFTM_TCP_CLOSING] = 15 * 60; /* Half closed */ + timeout[PFTM_TCP_FIN_WAIT] = 45; /* Got both FINs */ + timeout[PFTM_TCP_CLOSED] = 90; /* Got a RST */ + timeout[PFTM_UDP_FIRST_PACKET] = 60; /* First UDP packet */ + timeout[PFTM_UDP_SINGLE] = 30; /* Unidirectional */ + timeout[PFTM_UDP_MULTIPLE] = 60; /* Bidirectional */ + timeout[PFTM_ICMP_FIRST_PACKET] = 20; /* First ICMP packet */ + timeout[PFTM_ICMP_ERROR_REPLY] = 10; /* Got error response */ + timeout[PFTM_OTHER_FIRST_PACKET] = 60; /* First packet */ + timeout[PFTM_OTHER_SINGLE] = 30; /* Unidirectional */ + timeout[PFTM_OTHER_MULTIPLE] = 60; /* Bidirectional */ + timeout[PFTM_FRAG] = 30; /* Fragment expire */ + timeout[PFTM_INTERVAL] = 10; /* Expire interval */ + + timeout_set(&pf_expire_to, pf_purge_timeout, &pf_expire_to); + timeout_add(&pf_expire_to, timeout[PFTM_INTERVAL] * hz); + + pf_normalize_init(); + pf_status.debug = PF_DEBUG_URGENT; +} + +int +pfopen(dev_t dev, int flags, int fmt, struct proc *p) +{ + if (minor(dev) >= 1) + return (ENXIO); + return (0); +} + +int +pfclose(dev_t dev, int flags, int fmt, struct proc *p) +{ + if (minor(dev) >= 1) + return (ENXIO); + return (0); +} + +struct pf_pool * +pf_get_pool(char *anchorname, char *rulesetname, u_int32_t ticket, + u_int8_t rule_action, u_int8_t rule_number, u_int8_t r_last, + u_int8_t active, u_int8_t check_ticket) +{ + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num; + + ruleset = pf_find_ruleset(anchorname, rulesetname); + if (ruleset == NULL) + return (NULL); + rs_num = pf_get_ruleset_number(rule_action); + if (rs_num >= PF_RULESET_MAX) + return (NULL); + if (active) { + if (check_ticket && ticket != + ruleset->rules[rs_num].active.ticket) + return (NULL); + if (r_last) + rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, + pf_rulequeue); + else + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + } else { + if (check_ticket && ticket != + ruleset->rules[rs_num].inactive.ticket) + return (NULL); + if (r_last) + rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, + pf_rulequeue); + else + rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); + } + if (!r_last) { + while ((rule != NULL) && (rule->nr != rule_number)) + rule = TAILQ_NEXT(rule, entries); + } + if (rule == NULL) + return (NULL); + + return (&rule->rpool); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof(struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *anchorname) +{ + struct pf_anchor *anchor; + int n = -1; + + anchor = TAILQ_FIRST(&pf_anchors); + while (anchor != NULL && (n = strcmp(anchor->name, anchorname)) < 0) + anchor = TAILQ_NEXT(anchor, entries); + if (n == 0) + return (anchor); + else + return (NULL); +} + +struct pf_ruleset * +pf_find_ruleset(char *anchorname, char *rulesetname) +{ + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + + if (!anchorname[0] && !rulesetname[0]) + return (&pf_main_ruleset); + if (!anchorname[0] || !rulesetname[0]) + return (NULL); + anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; + rulesetname[PF_RULESET_NAME_SIZE-1] = 0; + anchor = pf_find_anchor(anchorname); + if (anchor == NULL) + return (NULL); + ruleset = TAILQ_FIRST(&anchor->rulesets); + while (ruleset != NULL && strcmp(ruleset->name, rulesetname) < 0) + ruleset = TAILQ_NEXT(ruleset, entries); + if (ruleset != NULL && !strcmp(ruleset->name, rulesetname)) + return (ruleset); + else + return (NULL); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(char *anchorname, char *rulesetname) +{ + struct pf_anchor *anchor, *a; + struct pf_ruleset *ruleset, *r; + + if (!anchorname[0] && !rulesetname[0]) + return (&pf_main_ruleset); + if (!anchorname[0] || !rulesetname[0]) + return (NULL); + anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; + rulesetname[PF_RULESET_NAME_SIZE-1] = 0; + a = TAILQ_FIRST(&pf_anchors); + while (a != NULL && strcmp(a->name, anchorname) < 0) + a = TAILQ_NEXT(a, entries); + if (a != NULL && !strcmp(a->name, anchorname)) + anchor = a; + else { + anchor = (struct pf_anchor *)malloc(sizeof(struct pf_anchor), + M_TEMP, M_NOWAIT); + if (anchor == NULL) + return (NULL); + memset(anchor, 0, sizeof(struct pf_anchor)); + bcopy(anchorname, anchor->name, sizeof(anchor->name)); + TAILQ_INIT(&anchor->rulesets); + if (a != NULL) + TAILQ_INSERT_BEFORE(a, anchor, entries); + else + TAILQ_INSERT_TAIL(&pf_anchors, anchor, entries); + } + r = TAILQ_FIRST(&anchor->rulesets); + while (r != NULL && strcmp(r->name, rulesetname) < 0) + r = TAILQ_NEXT(r, entries); + if (r != NULL && !strcmp(r->name, rulesetname)) + return (r); + ruleset = (struct pf_ruleset *)malloc(sizeof(struct pf_ruleset), + M_TEMP, M_NOWAIT); + if (ruleset != NULL) { + pf_init_ruleset(ruleset); + bcopy(rulesetname, ruleset->name, sizeof(ruleset->name)); + ruleset->anchor = anchor; + if (r != NULL) + TAILQ_INSERT_BEFORE(r, ruleset, entries); + else + TAILQ_INSERT_TAIL(&anchor->rulesets, ruleset, entries); + } + return (ruleset); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *anchor; + int i; + + if (ruleset == NULL || ruleset->anchor == NULL || ruleset->tables > 0 || ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr)) + return; + + anchor = ruleset->anchor; + TAILQ_REMOVE(&anchor->rulesets, ruleset, entries); + free(ruleset, M_TEMP); + + if (TAILQ_EMPTY(&anchor->rulesets)) { + TAILQ_REMOVE(&pf_anchors, anchor, entries); + free(anchor, M_TEMP); + } +} + +void +pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) +{ + struct pf_pooladdr *mv_pool_pa; + + while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { + TAILQ_REMOVE(poola, mv_pool_pa, entries); + TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); + } +} + +void +pf_empty_pool(struct pf_palist *poola) +{ + struct pf_pooladdr *empty_pool_pa; + + while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { + pf_dynaddr_remove(&empty_pool_pa->addr); + pf_tbladdr_remove(&empty_pool_pa->addr); + TAILQ_REMOVE(poola, empty_pool_pa, entries); + pool_put(&pf_pooladdr_pl, empty_pool_pa); + } +} + +void +pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +{ + if (rulequeue != NULL) { + if (rule->states <= 0) { + /* + * XXX - we need to remove the table *before* detaching + * the rule to make sure the table code does not delete + * the anchor under our feet. + */ + pf_tbladdr_remove(&rule->src.addr); + pf_tbladdr_remove(&rule->dst.addr); + } + TAILQ_REMOVE(rulequeue, rule, entries); + rule->entries.tqe_prev = NULL; + rule->nr = -1; + } + if (rule->states > 0 || rule->entries.tqe_prev != NULL) + return; + pf_tag_unref(rule->tag); + pf_tag_unref(rule->match_tag); + pf_dynaddr_remove(&rule->src.addr); + pf_dynaddr_remove(&rule->dst.addr); + if (rulequeue == NULL) { + pf_tbladdr_remove(&rule->src.addr); + pf_tbladdr_remove(&rule->dst.addr); + } + pf_empty_pool(&rule->rpool.list); + pool_put(&pf_rule_pl, rule); +} + +u_int16_t +pf_tagname2tag(char *tagname) +{ + struct pf_tagname *tag, *p = NULL; + u_int16_t new_tagid = 1; + + TAILQ_FOREACH(tag, &pf_tags, entries) + if (strcmp(tagname, tag->name) == 0) { + tag->ref++; + return (tag->tag); + } + + /* + * to avoid fragmentation, we do a linear search from the beginning + * and take the first free slot we find. if there is none or the list + * is empty, append a new entry at the end. + */ + + /* new entry */ + if (!TAILQ_EMPTY(&pf_tags)) + for (p = TAILQ_FIRST(&pf_tags); p != NULL && + p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) + new_tagid = p->tag + 1; + + if (new_tagid > TAGID_MAX) + return (0); + + /* allocate and fill new struct pf_tagname */ + tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname), + M_TEMP, M_NOWAIT); + if (tag == NULL) + return (0); + bzero(tag, sizeof(struct pf_tagname)); + strlcpy(tag->name, tagname, sizeof(tag->name)); + tag->tag = new_tagid; + tag->ref++; + + if (p != NULL) /* insert new entry before p */ + TAILQ_INSERT_BEFORE(p, tag, entries); + else /* either list empty or no free slot in between */ + TAILQ_INSERT_TAIL(&pf_tags, tag, entries); + + return (tag->tag); +} + +void +pf_tag2tagname(u_int16_t tagid, char *p) +{ + struct pf_tagname *tag; + + TAILQ_FOREACH(tag, &pf_tags, entries) + if (tag->tag == tagid) { + strlcpy(p, tag->name, PF_TAG_NAME_SIZE); + return; + } +} + +void +pf_tag_unref(u_int16_t tag) +{ + struct pf_tagname *p, *next; + + if (tag == 0) + return; + + for (p = TAILQ_FIRST(&pf_tags); p != NULL; p = next) { + next = TAILQ_NEXT(p, entries); + if (tag == p->tag) { + if (--p->ref == 0) { + TAILQ_REMOVE(&pf_tags, p, entries); + free(p, M_TEMP); + } + break; + } + } +} + +int +pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) +{ + struct pf_pooladdr *pa = NULL; + struct pf_pool *pool = NULL; + int s; + int error = 0; + + /* XXX keep in sync with switch() below */ + if (securelevel > 1) + switch (cmd) { + case DIOCGETRULES: + case DIOCGETRULE: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCGETSTATE: + case DIOCSETSTATUSIF: + case DIOCGETSTATUS: + case DIOCCLRSTATUS: + case DIOCNATLOOK: + case DIOCSETDEBUG: + case DIOCGETSTATES: + case DIOCGETTIMEOUT: + case DIOCCLRRULECTRS: + case DIOCGETLIMIT: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCGETQSTATS: + case DIOCGETANCHORS: + case DIOCGETANCHOR: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRCLRTSTATS: + case DIOCRCLRADDRS: + case DIOCRADDADDRS: + case DIOCRDELADDRS: + case DIOCRSETADDRS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRCLRASTATS: + case DIOCRTSTADDRS: + case DIOCOSFPGET: + break; + default: + return (EPERM); + } + + if (!(flags & FWRITE)) + switch (cmd) { + case DIOCGETRULES: + case DIOCGETRULE: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCGETSTATE: + case DIOCGETSTATUS: + case DIOCGETSTATES: + case DIOCGETTIMEOUT: + case DIOCGETLIMIT: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCGETQSTATS: + case DIOCGETANCHORS: + case DIOCGETANCHOR: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRTSTADDRS: + case DIOCOSFPGET: + break; + default: + return (EACCES); + } + + switch (cmd) { + + case DIOCSTART: + if (pf_status.running) + error = EEXIST; + else { + u_int32_t states = pf_status.states; + bzero(&pf_status, sizeof(struct pf_status)); + pf_status.running = 1; + pf_status.states = states; + pf_status.since = time.tv_sec; + if (status_ifp != NULL) + strlcpy(pf_status.ifname, + status_ifp->if_xname, IFNAMSIZ); + DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); + } + break; + + case DIOCSTOP: + if (!pf_status.running) + error = ENOENT; + else { + pf_status.running = 0; + DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); + } + break; + + case DIOCBEGINRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num; + + ruleset = pf_find_or_create_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + while ((rule = + TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr)) != NULL) + pf_rm_rule(ruleset->rules[rs_num].inactive.ptr, rule); + pr->ticket = ++ruleset->rules[rs_num].inactive.ticket; + break; + } + + case DIOCADDRULE: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule, *tail; + struct pf_pooladdr *pa; + int rs_num; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->rule.anchorname[0] && ruleset != &pf_main_ruleset) { + error = EINVAL; + break; + } + if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { + error = EBUSY; + break; + } + if (pr->pool_ticket != ticket_pabuf) { + error = EBUSY; + break; + } + rule = pool_get(&pf_rule_pl, PR_NOWAIT); + if (rule == NULL) { + error = ENOMEM; + break; + } + bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + rule->anchor = NULL; + rule->ifp = NULL; + TAILQ_INIT(&rule->rpool.list); + /* initialize refcounting */ + rule->states = 0; + rule->entries.tqe_prev = NULL; +#ifndef INET + if (rule->af == AF_INET) { + pool_put(&pf_rule_pl, rule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (rule->af == AF_INET6) { + pool_put(&pf_rule_pl, rule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, + pf_rulequeue); + if (tail) + rule->nr = tail->nr + 1; + else + rule->nr = 0; + if (rule->ifname[0]) { + rule->ifp = ifunit(rule->ifname); + if (rule->ifp == NULL) { + pool_put(&pf_rule_pl, rule); + error = EINVAL; + break; + } + } + + if (rule->tagname[0]) + if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) + error = EBUSY; + if (rule->match_tagname[0]) + if ((rule->match_tag = + pf_tagname2tag(rule->match_tagname)) == 0) + error = EBUSY; + if (rule->rt && !rule->direction) + error = EINVAL; + if (pf_dynaddr_setup(&rule->src.addr, rule->af)) + error = EINVAL; + if (pf_dynaddr_setup(&rule->dst.addr, rule->af)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &rule->src.addr)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) + error = EINVAL; + TAILQ_FOREACH(pa, &pf_pabuf, entries) + if (pf_tbladdr_setup(ruleset, &pa->addr)) + error = EINVAL; + + pf_mv_pool(&pf_pabuf, &rule->rpool.list); + if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || + (rule->action == PF_BINAT)) && !rule->anchorname[0]) || + (rule->rt > PF_FASTROUTE)) && + (TAILQ_FIRST(&rule->rpool.list) == NULL)) + error = EINVAL; + + if (error) { + pf_rm_rule(NULL, rule); + break; + } + rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); + rule->evaluations = rule->packets = rule->bytes = 0; + TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, + rule, entries); + break; + } + + case DIOCCOMMITRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rulequeue *old_rules; + struct pf_rule *rule; + int rs_num; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { + error = EBUSY; + break; + } + +#ifdef ALTQ + /* set queue IDs */ + if (rs_num == PF_RULESET_FILTER) + pf_rule_set_qid(ruleset->rules[rs_num].inactive.ptr); +#endif + + /* Swap rules, keep the old. */ + s = splsoftnet(); + old_rules = ruleset->rules[rs_num].active.ptr; + ruleset->rules[rs_num].active.ptr = + ruleset->rules[rs_num].inactive.ptr; + ruleset->rules[rs_num].inactive.ptr = old_rules; + ruleset->rules[rs_num].active.ticket = + ruleset->rules[rs_num].inactive.ticket; + pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); + + /* Purge the old rule list. */ + while ((rule = TAILQ_FIRST(old_rules)) != NULL) + pf_rm_rule(old_rules, rule); + pf_remove_if_empty_ruleset(ruleset); + pf_update_anchor_rules(); + splx(s); + break; + } + + case DIOCGETRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *tail; + int rs_num; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + s = splsoftnet(); + tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, + pf_rulequeue); + if (tail) + pr->nr = tail->nr + 1; + else + pr->nr = 0; + pr->ticket = ruleset->rules[rs_num].active.ticket; + splx(s); + break; + } + + case DIOCGETRULE: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num, i; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].active.ticket) { + error = EBUSY; + break; + } + s = splsoftnet(); + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((rule != NULL) && (rule->nr != pr->nr)) + rule = TAILQ_NEXT(rule, entries); + if (rule == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(rule, &pr->rule, sizeof(struct pf_rule)); + pf_dynaddr_copyout(&pr->rule.src.addr); + pf_dynaddr_copyout(&pr->rule.dst.addr); + pf_tbladdr_copyout(&pr->rule.src.addr); + pf_tbladdr_copyout(&pr->rule.dst.addr); + for (i = 0; i < PF_SKIP_COUNT; ++i) + if (rule->skip[i].ptr == NULL) + pr->rule.skip[i].nr = -1; + else + pr->rule.skip[i].nr = + rule->skip[i].ptr->nr; + splx(s); + break; + } + + case DIOCCHANGERULE: { + struct pfioc_rule *pcr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *oldrule = NULL, *newrule = NULL; + u_int32_t nr = 0; + int rs_num; + + if (!(pcr->action == PF_CHANGE_REMOVE || + pcr->action == PF_CHANGE_GET_TICKET) && + pcr->pool_ticket != ticket_pabuf) { + error = EBUSY; + break; + } + + if (pcr->action < PF_CHANGE_ADD_HEAD || + pcr->action > PF_CHANGE_GET_TICKET) { + error = EINVAL; + break; + } + ruleset = pf_find_ruleset(pcr->anchor, pcr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pcr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + + if (pcr->action == PF_CHANGE_GET_TICKET) { + pcr->ticket = ++ruleset->rules[rs_num].active.ticket; + break; + } else { + if (pcr->ticket != + ruleset->rules[rs_num].active.ticket) { + error = EINVAL; + break; + } + if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { + error = EINVAL; + break; + } + } + + if (pcr->action != PF_CHANGE_REMOVE) { + newrule = pool_get(&pf_rule_pl, PR_NOWAIT); + if (newrule == NULL) { + error = ENOMEM; + break; + } + bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + TAILQ_INIT(&newrule->rpool.list); + /* initialize refcounting */ + newrule->states = 0; + newrule->entries.tqe_prev = NULL; +#ifndef INET + if (newrule->af == AF_INET) { + pool_put(&pf_rule_pl, newrule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (newrule->af == AF_INET6) { + pool_put(&pf_rule_pl, newrule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (newrule->ifname[0]) { + newrule->ifp = ifunit(newrule->ifname); + if (newrule->ifp == NULL) { + pool_put(&pf_rule_pl, newrule); + error = EINVAL; + break; + } + } else + newrule->ifp = NULL; + +#ifdef ALTQ + /* set queue IDs */ + if (newrule->qname[0] != 0) { + newrule->qid = pf_qname_to_qid(newrule->qname); + if (newrule->pqname[0] != 0) + newrule->pqid = + pf_qname_to_qid(newrule->pqname); + else + newrule->pqid = newrule->qid; + } +#endif + if (newrule->tagname[0]) + if ((newrule->tag = + pf_tagname2tag(newrule->tagname)) == 0) + error = EBUSY; + if (newrule->match_tagname[0]) + if ((newrule->match_tag = pf_tagname2tag( + newrule->match_tagname)) == 0) + error = EBUSY; + + if (newrule->rt && !newrule->direction) + error = EINVAL; + if (pf_dynaddr_setup(&newrule->src.addr, newrule->af)) + error = EINVAL; + if (pf_dynaddr_setup(&newrule->dst.addr, newrule->af)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) + error = EINVAL; + + pf_mv_pool(&pf_pabuf, &newrule->rpool.list); + if (((((newrule->action == PF_NAT) || + (newrule->action == PF_RDR) || + (newrule->action == PF_BINAT) || + (newrule->rt > PF_FASTROUTE)) && + !newrule->anchorname[0])) && + (TAILQ_FIRST(&newrule->rpool.list) == NULL)) + error = EINVAL; + + if (error) { + pf_rm_rule(NULL, newrule); + break; + } + newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); + newrule->evaluations = newrule->packets = 0; + newrule->bytes = 0; + } + pf_empty_pool(&pf_pabuf); + + s = splsoftnet(); + + if (pcr->action == PF_CHANGE_ADD_HEAD) + oldrule = TAILQ_FIRST( + ruleset->rules[rs_num].active.ptr); + else if (pcr->action == PF_CHANGE_ADD_TAIL) + oldrule = TAILQ_LAST( + ruleset->rules[rs_num].active.ptr, pf_rulequeue); + else { + oldrule = TAILQ_FIRST( + ruleset->rules[rs_num].active.ptr); + while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) + oldrule = TAILQ_NEXT(oldrule, entries); + if (oldrule == NULL) { + pf_rm_rule(NULL, newrule); + error = EINVAL; + splx(s); + break; + } + } + + if (pcr->action == PF_CHANGE_REMOVE) + pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); + else { + if (oldrule == NULL) + TAILQ_INSERT_TAIL( + ruleset->rules[rs_num].active.ptr, + newrule, entries); + else if (pcr->action == PF_CHANGE_ADD_HEAD || + pcr->action == PF_CHANGE_ADD_BEFORE) + TAILQ_INSERT_BEFORE(oldrule, newrule, entries); + else + TAILQ_INSERT_AFTER( + ruleset->rules[rs_num].active.ptr, + oldrule, newrule, entries); + } + + nr = 0; + TAILQ_FOREACH(oldrule, + ruleset->rules[rs_num].active.ptr, entries) + oldrule->nr = nr++; + + pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); + pf_remove_if_empty_ruleset(ruleset); + pf_update_anchor_rules(); + + ruleset->rules[rs_num].active.ticket++; + splx(s); + break; + } + + case DIOCCLRSTATES: { + struct pf_tree_node *n; + + s = splsoftnet(); + RB_FOREACH(n, pf_state_tree, &tree_ext_gwy) + n->state->timeout = PFTM_PURGE; + pf_purge_expired_states(); + pf_status.states = 0; + splx(s); + break; + } + + case DIOCKILLSTATES: { + struct pf_tree_node *n; + struct pf_state *st; + struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; + int killed = 0; + + s = splsoftnet(); + RB_FOREACH(n, pf_state_tree, &tree_ext_gwy) { + st = n->state; + if ((!psk->psk_af || st->af == psk->psk_af) && + (!psk->psk_proto || psk->psk_proto == st->proto) && + PF_MATCHA(psk->psk_src.not, + &psk->psk_src.addr.v.a.addr, + &psk->psk_src.addr.v.a.mask, &st->lan.addr, + st->af) && + PF_MATCHA(psk->psk_dst.not, + &psk->psk_dst.addr.v.a.addr, + &psk->psk_dst.addr.v.a.mask, &st->ext.addr, + st->af) && + (psk->psk_src.port_op == 0 || + pf_match_port(psk->psk_src.port_op, + psk->psk_src.port[0], psk->psk_src.port[1], + st->lan.port)) && + (psk->psk_dst.port_op == 0 || + pf_match_port(psk->psk_dst.port_op, + psk->psk_dst.port[0], psk->psk_dst.port[1], + st->ext.port))) { + st->timeout = PFTM_PURGE; + killed++; + } + } + pf_purge_expired_states(); + splx(s); + psk->psk_af = killed; + break; + } + + case DIOCADDSTATE: { + struct pfioc_state *ps = (struct pfioc_state *)addr; + struct pf_state *state; + + if (ps->state.timeout >= PFTM_MAX && + ps->state.timeout != PFTM_UNTIL_PACKET) { + error = EINVAL; + break; + } + state = pool_get(&pf_state_pl, PR_NOWAIT); + if (state == NULL) { + error = ENOMEM; + break; + } + s = splsoftnet(); + bcopy(&ps->state, state, sizeof(struct pf_state)); + state->rule.ptr = NULL; + state->nat_rule.ptr = NULL; + state->anchor.ptr = NULL; + state->rt_ifp = NULL; + state->creation = time.tv_sec; + state->packets[0] = state->packets[1] = 0; + state->bytes[0] = state->bytes[1] = 0; + if (pf_insert_state(state)) { + pool_put(&pf_state_pl, state); + error = ENOMEM; + } + splx(s); + break; + } + + case DIOCGETSTATE: { + struct pfioc_state *ps = (struct pfioc_state *)addr; + struct pf_tree_node *n; + u_int32_t nr; + + nr = 0; + s = splsoftnet(); + RB_FOREACH(n, pf_state_tree, &tree_ext_gwy) { + if (nr >= ps->nr) + break; + nr++; + } + if (n == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(n->state, &ps->state, sizeof(struct pf_state)); + ps->state.rule.nr = n->state->rule.ptr->nr; + ps->state.nat_rule.nr = (n->state->nat_rule.ptr == NULL) ? + -1 : n->state->nat_rule.ptr->nr; + ps->state.anchor.nr = (n->state->anchor.ptr == NULL) ? + -1 : n->state->anchor.ptr->nr; + splx(s); + ps->state.expire = pf_state_expires(n->state); + if (ps->state.expire > time.tv_sec) + ps->state.expire -= time.tv_sec; + else + ps->state.expire = 0; + break; + } + + case DIOCGETSTATES: { + struct pfioc_states *ps = (struct pfioc_states *)addr; + struct pf_tree_node *n; + struct pf_state *p, pstore; + u_int32_t nr = 0; + int space = ps->ps_len; + + if (space == 0) { + s = splsoftnet(); + RB_FOREACH(n, pf_state_tree, &tree_ext_gwy) + nr++; + splx(s); + ps->ps_len = sizeof(struct pf_state) * nr; + return (0); + } + + s = splsoftnet(); + p = ps->ps_states; + RB_FOREACH(n, pf_state_tree, &tree_ext_gwy) { + int secs = time.tv_sec; + + if ((nr + 1) * sizeof(*p) > (unsigned)ps->ps_len) + break; + + bcopy(n->state, &pstore, sizeof(pstore)); + pstore.rule.nr = n->state->rule.ptr->nr; + pstore.nat_rule.nr = (n->state->nat_rule.ptr == NULL) ? + -1 : n->state->nat_rule.ptr->nr; + pstore.anchor.nr = (n->state->anchor.ptr == NULL) ? + -1 : n->state->anchor.ptr->nr; + pstore.creation = secs - pstore.creation; + pstore.expire = pf_state_expires(n->state); + if (pstore.expire > secs) + pstore.expire -= secs; + else + pstore.expire = 0; + error = copyout(&pstore, p, sizeof(*p)); + if (error) { + splx(s); + goto fail; + } + p++; + nr++; + } + ps->ps_len = sizeof(struct pf_state) * nr; + splx(s); + break; + } + + case DIOCGETSTATUS: { + struct pf_status *s = (struct pf_status *)addr; + bcopy(&pf_status, s, sizeof(struct pf_status)); + break; + } + + case DIOCSETSTATUSIF: { + struct pfioc_if *pi = (struct pfioc_if *)addr; + struct ifnet *ifp; + + if (pi->ifname[0] == 0) { + status_ifp = NULL; + bzero(pf_status.ifname, IFNAMSIZ); + break; + } + if ((ifp = ifunit(pi->ifname)) == NULL) { + error = EINVAL; + break; + } else if (ifp == status_ifp) + break; + status_ifp = ifp; + /* fallthrough into DIOCCLRSTATUS */ + } + + case DIOCCLRSTATUS: { + u_int32_t running = pf_status.running; + u_int32_t states = pf_status.states; + u_int32_t since = pf_status.since; + u_int32_t debug = pf_status.debug; + + bzero(&pf_status, sizeof(struct pf_status)); + pf_status.running = running; + pf_status.states = states; + pf_status.since = since; + pf_status.debug = debug; + if (status_ifp != NULL) + strlcpy(pf_status.ifname, + status_ifp->if_xname, IFNAMSIZ); + break; + } + + case DIOCNATLOOK: { + struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; + struct pf_state *st; + struct pf_tree_node key; + int direction = pnl->direction; + + key.af = pnl->af; + key.proto = pnl->proto; + + /* + * userland gives us source and dest of connection, reverse + * the lookup so we ask for what happens with the return + * traffic, enabling us to find it in the state tree. + */ + PF_ACPY(&key.addr[1], &pnl->saddr, pnl->af); + key.port[1] = pnl->sport; + PF_ACPY(&key.addr[0], &pnl->daddr, pnl->af); + key.port[0] = pnl->dport; + + if (!pnl->proto || + PF_AZERO(&pnl->saddr, pnl->af) || + PF_AZERO(&pnl->daddr, pnl->af) || + !pnl->dport || !pnl->sport) + error = EINVAL; + else { + s = splsoftnet(); + if (direction == PF_IN) + st = pf_find_state(&tree_ext_gwy, &key); + else + st = pf_find_state(&tree_lan_ext, &key); + if (st != NULL) { + if (direction == PF_IN) { + PF_ACPY(&pnl->rsaddr, &st->lan.addr, + st->af); + pnl->rsport = st->lan.port; + PF_ACPY(&pnl->rdaddr, &pnl->daddr, + pnl->af); + pnl->rdport = pnl->dport; + } else { + PF_ACPY(&pnl->rdaddr, &st->gwy.addr, + st->af); + pnl->rdport = st->gwy.port; + PF_ACPY(&pnl->rsaddr, &pnl->saddr, + pnl->af); + pnl->rsport = pnl->sport; + } + } else + error = ENOENT; + splx(s); + } + break; + } + + case DIOCSETTIMEOUT: { + struct pfioc_tm *pt = (struct pfioc_tm *)addr; + int old; + + if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || + pt->seconds < 0) { + error = EINVAL; + goto fail; + } + old = pf_default_rule.timeout[pt->timeout]; + pf_default_rule.timeout[pt->timeout] = pt->seconds; + pt->seconds = old; + break; + } + + case DIOCGETTIMEOUT: { + struct pfioc_tm *pt = (struct pfioc_tm *)addr; + + if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { + error = EINVAL; + goto fail; + } + pt->seconds = pf_default_rule.timeout[pt->timeout]; + break; + } + + case DIOCGETLIMIT: { + struct pfioc_limit *pl = (struct pfioc_limit *)addr; + + if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { + error = EINVAL; + goto fail; + } + pl->limit = pf_pool_limits[pl->index].limit; + break; + } + + case DIOCSETLIMIT: { + struct pfioc_limit *pl = (struct pfioc_limit *)addr; + int old_limit; + + if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { + error = EINVAL; + goto fail; + } + if (pool_sethardlimit(pf_pool_limits[pl->index].pp, + pl->limit, NULL, 0) != 0) { + error = EBUSY; + goto fail; + } + old_limit = pf_pool_limits[pl->index].limit; + pf_pool_limits[pl->index].limit = pl->limit; + pl->limit = old_limit; + break; + } + + case DIOCSETDEBUG: { + u_int32_t *level = (u_int32_t *)addr; + + pf_status.debug = *level; + break; + } + + case DIOCCLRRULECTRS: { + struct pf_ruleset *ruleset = &pf_main_ruleset; + struct pf_rule *rule; + + s = splsoftnet(); + TAILQ_FOREACH(rule, + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) + rule->evaluations = rule->packets = + rule->bytes = 0; + splx(s); + break; + } + +#ifdef ALTQ + case DIOCSTARTALTQ: { + struct pf_altq *altq; + struct ifnet *ifp; + struct tb_profile tb; + + /* enable all altq interfaces on active list */ + s = splsoftnet(); + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + if ((ifp = ifunit(altq->ifname)) == NULL) { + error = EINVAL; + break; + } + if (ifp->if_snd.altq_type != ALTQT_NONE) + error = altq_enable(&ifp->if_snd); + if (error != 0) + break; + /* set tokenbucket regulator */ + tb.rate = altq->ifbandwidth; + tb.depth = altq->tbrsize; + error = tbr_set(&ifp->if_snd, &tb); + if (error != 0) + break; + } + } + if (error == 0) + pfaltq_running = 1; + splx(s); + DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); + break; + } + + case DIOCSTOPALTQ: { + struct pf_altq *altq; + struct ifnet *ifp; + struct tb_profile tb; + int err; + + /* disable all altq interfaces on active list */ + s = splsoftnet(); + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + if ((ifp = ifunit(altq->ifname)) == NULL) { + error = EINVAL; + break; + } + if (ifp->if_snd.altq_type != ALTQT_NONE) { + err = altq_disable(&ifp->if_snd); + if (err != 0 && error == 0) + error = err; + } + /* clear tokenbucket regulator */ + tb.rate = 0; + err = tbr_set(&ifp->if_snd, &tb); + if (err != 0 && error == 0) + error = err; + } + } + if (error == 0) + pfaltq_running = 0; + splx(s); + DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); + break; + } + + case DIOCBEGINALTQS: { + u_int32_t *ticket = (u_int32_t *)addr; + struct pf_altq *altq; + + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + error = altq_remove(altq); + } + pool_put(&pf_altq_pl, altq); + } + *ticket = ++ticket_altqs_inactive; + break; + } + + case DIOCADDALTQ: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq, *a; + + if (pa->ticket != ticket_altqs_inactive) { + error = EBUSY; + break; + } + altq = pool_get(&pf_altq_pl, PR_NOWAIT); + if (altq == NULL) { + error = ENOMEM; + break; + } + bcopy(&pa->altq, altq, sizeof(struct pf_altq)); + + /* + * if this is for a queue, find the discipline and + * copy the necessary fields + */ + if (altq->qname[0] != 0) { + TAILQ_FOREACH(a, pf_altqs_inactive, entries) { + if (strncmp(a->ifname, altq->ifname, + IFNAMSIZ) == 0 && a->qname[0] == 0) { + altq->altq_disc = a->altq_disc; + break; + } + } + } + + error = altq_add(altq); + if (error) { + pool_put(&pf_altq_pl, altq); + break; + } + + TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); + bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + break; + } + + case DIOCCOMMITALTQS: { + u_int32_t *ticket = (u_int32_t *)addr; + struct pf_altqqueue *old_altqs; + struct pf_altq *altq; + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + int err; + + if (*ticket != ticket_altqs_inactive) { + error = EBUSY; + break; + } + + /* Swap altqs, keep the old. */ + s = splsoftnet(); + old_altqs = pf_altqs_active; + pf_altqs_active = pf_altqs_inactive; + pf_altqs_inactive = old_altqs; + ticket_altqs_active = ticket_altqs_inactive; + + /* Attach new disciplines */ + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + /* attach the discipline */ + error = altq_pfattach(altq); + if (error) { + splx(s); + goto fail; + } + } + } + + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + err = altq_pfdetach(altq); + if (err != 0 && error == 0) + error = err; + err = altq_remove(altq); + if (err != 0 && error == 0) + error = err; + } + pool_put(&pf_altq_pl, altq); + } + splx(s); + + /* update queue IDs */ + pf_rule_set_qid( + pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + TAILQ_FOREACH(anchor, &pf_anchors, entries) { + TAILQ_FOREACH(ruleset, &anchor->rulesets, entries) { + pf_rule_set_qid( + ruleset->rules[PF_RULESET_FILTER].active.ptr + ); + } + } + break; + } + + case DIOCGETALTQS: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq; + + pa->nr = 0; + s = splsoftnet(); + TAILQ_FOREACH(altq, pf_altqs_active, entries) + pa->nr++; + pa->ticket = ticket_altqs_active; + splx(s); + break; + } + + case DIOCGETALTQ: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq; + u_int32_t nr; + + if (pa->ticket != ticket_altqs_active) { + error = EBUSY; + break; + } + nr = 0; + s = splsoftnet(); + altq = TAILQ_FIRST(pf_altqs_active); + while ((altq != NULL) && (nr < pa->nr)) { + altq = TAILQ_NEXT(altq, entries); + nr++; + } + if (altq == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + splx(s); + break; + } + + case DIOCCHANGEALTQ: + /* CHANGEALTQ not supported yet! */ + error = ENODEV; + break; + + case DIOCGETQSTATS: { + struct pfioc_qstats *pq = (struct pfioc_qstats *)addr; + struct pf_altq *altq; + u_int32_t nr; + int nbytes; + + if (pq->ticket != ticket_altqs_active) { + error = EBUSY; + break; + } + nbytes = pq->nbytes; + nr = 0; + s = splsoftnet(); + altq = TAILQ_FIRST(pf_altqs_active); + while ((altq != NULL) && (nr < pq->nr)) { + altq = TAILQ_NEXT(altq, entries); + nr++; + } + if (altq == NULL) { + error = EBUSY; + splx(s); + break; + } + error = altq_getqstats(altq, pq->buf, &nbytes); + splx(s); + if (error == 0) { + pq->scheduler = altq->scheduler; + pq->nbytes = nbytes; + } + break; + } +#endif /* ALTQ */ + + case DIOCBEGINADDRS: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + pf_empty_pool(&pf_pabuf); + pp->ticket = ++ticket_pabuf; + break; + } + + case DIOCADDADDR: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + +#ifndef INET + if (pp->af == AF_INET) { + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (pp->af == AF_INET6) { + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (pp->addr.addr.type != PF_ADDR_ADDRMASK && + pp->addr.addr.type != PF_ADDR_DYNIFTL && + pp->addr.addr.type != PF_ADDR_TABLE) { + error = EINVAL; + break; + } + pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); + if (pa == NULL) { + error = ENOMEM; + break; + } + bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); + if (pa->ifname[0]) { + pa->ifp = ifunit(pa->ifname); + if (pa->ifp == NULL) { + pool_put(&pf_pooladdr_pl, pa); + error = EINVAL; + break; + } + } + if (pf_dynaddr_setup(&pa->addr, pp->af)) { + pf_dynaddr_remove(&pa->addr); + pool_put(&pf_pooladdr_pl, pa); + error = EINVAL; + break; + } + TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); + break; + } + + case DIOCGETADDRS: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + pp->nr = 0; + s = splsoftnet(); + pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, + pp->r_action, pp->r_num, 0, 1, 0); + if (pool == NULL) { + error = EBUSY; + splx(s); + break; + } + TAILQ_FOREACH(pa, &pool->list, entries) + pp->nr++; + splx(s); + break; + } + + case DIOCGETADDR: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + u_int32_t nr = 0; + + s = splsoftnet(); + pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, + pp->r_action, pp->r_num, 0, 1, 1); + if (pool == NULL) { + error = EBUSY; + splx(s); + break; + } + pa = TAILQ_FIRST(&pool->list); + while ((pa != NULL) && (nr < pp->nr)) { + pa = TAILQ_NEXT(pa, entries); + nr++; + } + if (pa == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); + pf_dynaddr_copyout(&pp->addr.addr); + pf_tbladdr_copyout(&pp->addr.addr); + splx(s); + break; + } + + case DIOCCHANGEADDR: { + struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; + struct pf_pooladdr *oldpa = NULL, *newpa = NULL; + struct pf_ruleset *ruleset; + + if (pca->action < PF_CHANGE_ADD_HEAD || + pca->action > PF_CHANGE_REMOVE) { + error = EINVAL; + break; + } + if (pca->addr.addr.type != PF_ADDR_ADDRMASK && + pca->addr.addr.type != PF_ADDR_DYNIFTL && + pca->addr.addr.type != PF_ADDR_TABLE) { + error = EINVAL; + break; + } + + ruleset = pf_find_ruleset(pca->anchor, pca->ruleset); + if (ruleset == NULL) { + error = EBUSY; + break; + } + pool = pf_get_pool(pca->anchor, pca->ruleset, pca->ticket, + pca->r_action, pca->r_num, pca->r_last, 1, 1); + if (pool == NULL) { + error = EBUSY; + break; + } + if (pca->action != PF_CHANGE_REMOVE) { + newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); + if (newpa == NULL) { + error = ENOMEM; + break; + } + bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); +#ifndef INET + if (pca->af == AF_INET) { + pool_put(&pf_pooladdr_pl, newpa); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (pca->af == AF_INET6) { + pool_put(&pf_pooladdr_pl, newpa); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (newpa->ifname[0]) { + newpa->ifp = ifunit(newpa->ifname); + if (newpa->ifp == NULL) { + pool_put(&pf_pooladdr_pl, newpa); + error = EINVAL; + break; + } + } else + newpa->ifp = NULL; + if (pf_dynaddr_setup(&newpa->addr, pca->af) || + pf_tbladdr_setup(ruleset, &newpa->addr)) { + pf_dynaddr_remove(&newpa->addr); + pool_put(&pf_pooladdr_pl, newpa); + error = EINVAL; + break; + } + } + + s = splsoftnet(); + + if (pca->action == PF_CHANGE_ADD_HEAD) + oldpa = TAILQ_FIRST(&pool->list); + else if (pca->action == PF_CHANGE_ADD_TAIL) + oldpa = TAILQ_LAST(&pool->list, pf_palist); + else { + int i = 0; + + oldpa = TAILQ_FIRST(&pool->list); + while ((oldpa != NULL) && (i < pca->nr)) { + oldpa = TAILQ_NEXT(oldpa, entries); + i++; + } + if (oldpa == NULL) { + error = EINVAL; + splx(s); + break; + } + } + + if (pca->action == PF_CHANGE_REMOVE) { + TAILQ_REMOVE(&pool->list, oldpa, entries); + pf_dynaddr_remove(&oldpa->addr); + pf_tbladdr_remove(&oldpa->addr); + pool_put(&pf_pooladdr_pl, oldpa); + } else { + if (oldpa == NULL) + TAILQ_INSERT_TAIL(&pool->list, newpa, entries); + else if (pca->action == PF_CHANGE_ADD_HEAD || + pca->action == PF_CHANGE_ADD_BEFORE) + TAILQ_INSERT_BEFORE(oldpa, newpa, entries); + else + TAILQ_INSERT_AFTER(&pool->list, oldpa, + newpa, entries); + } + + pool->cur = TAILQ_FIRST(&pool->list); + PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, + pca->af); + splx(s); + break; + } + + case DIOCGETANCHORS: { + struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; + struct pf_anchor *anchor; + + pa->nr = 0; + TAILQ_FOREACH(anchor, &pf_anchors, entries) + pa->nr++; + break; + } + + case DIOCGETANCHOR: { + struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; + struct pf_anchor *anchor; + u_int32_t nr = 0; + + anchor = TAILQ_FIRST(&pf_anchors); + while (anchor != NULL && nr < pa->nr) { + anchor = TAILQ_NEXT(anchor, entries); + nr++; + } + if (anchor == NULL) + error = EBUSY; + else + bcopy(anchor->name, pa->name, sizeof(pa->name)); + break; + } + + case DIOCGETRULESETS: { + struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + + pr->anchor[PF_ANCHOR_NAME_SIZE-1] = 0; + if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + error = EINVAL; + break; + } + pr->nr = 0; + TAILQ_FOREACH(ruleset, &anchor->rulesets, entries) + pr->nr++; + break; + } + + case DIOCGETRULESET: { + struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + u_int32_t nr = 0; + + if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + error = EINVAL; + break; + } + ruleset = TAILQ_FIRST(&anchor->rulesets); + while (ruleset != NULL && nr < pr->nr) { + ruleset = TAILQ_NEXT(ruleset, entries); + nr++; + } + if (ruleset == NULL) + error = EBUSY; + else + bcopy(ruleset->name, pr->name, sizeof(pr->name)); + break; + } + + case DIOCRCLRTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, + io->pfrio_flags); + break; + } + + case DIOCRADDTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_nadd, io->pfrio_flags); + break; + } + + case DIOCRDELTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_ndel, io->pfrio_flags); + break; + } + + case DIOCRGETTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags); + break; + } + + case DIOCRGETTSTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_tstats)) { + error = ENODEV; + break; + } + error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags); + break; + } + + case DIOCRCLRTSTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_nzero, io->pfrio_flags); + break; + } + + case DIOCRSETTFLAGS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size, + io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, + &io->pfrio_ndel, io->pfrio_flags); + break; + } + + case DIOCRCLRADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, + io->pfrio_flags); + break; + } + + case DIOCRADDADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags); + break; + } + + case DIOCRDELADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags); + break; + } + + case DIOCRSETADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, + &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags); + break; + } + + case DIOCRGETADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags); + break; + } + + case DIOCRGETASTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_astats)) { + error = ENODEV; + break; + } + error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags); + break; + } + + case DIOCRCLRASTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags); + break; + } + + case DIOCRTSTADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags); + break; + } + + case DIOCRINABEGIN: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_ina_begin(&io->pfrio_table, &io->pfrio_ticket, + &io->pfrio_ndel, io->pfrio_flags); + break; + } + + case DIOCRINACOMMIT: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_ina_commit(&io->pfrio_table, io->pfrio_ticket, + &io->pfrio_nadd, &io->pfrio_nchange, io->pfrio_flags); + break; + } + + case DIOCRINADEFINE: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, + io->pfrio_ticket, io->pfrio_flags); + break; + } + + case DIOCOSFPFLUSH: + s = splsoftnet(); + pf_osfp_flush(); + splx(s); + break; + + case DIOCOSFPADD: { + struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + s = splsoftnet(); + error = pf_osfp_add(io); + splx(s); + break; + } + + case DIOCOSFPGET: { + struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + s = splsoftnet(); + error = pf_osfp_get(io); + splx(s); + break; + } + + default: + error = ENODEV; + break; + } +fail: + + return (error); +} diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c new file mode 100644 index 000000000000..02b81985f2e4 --- /dev/null +++ b/sys/contrib/pf/net/pf_norm.c @@ -0,0 +1,1528 @@ +/* $OpenBSD: pf_norm.c,v 1.75 2003/08/29 01:49:08 dhartmei Exp $ */ + +/* + * Copyright 2001 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "pflog.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/filio.h> +#include <sys/fcntl.h> +#include <sys/socket.h> +#include <sys/kernel.h> +#include <sys/time.h> +#include <sys/pool.h> + +#include <dev/rndvar.h> +#include <net/if.h> +#include <net/if_types.h> +#include <net/bpf.h> +#include <net/route.h> +#include <net/if_pflog.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_seq.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> + +#ifdef INET6 +#include <netinet/ip6.h> +#endif /* INET6 */ + +#include <net/pfvar.h> + +struct pf_frent { + LIST_ENTRY(pf_frent) fr_next; + struct ip *fr_ip; + struct mbuf *fr_m; +}; + +struct pf_frcache { + LIST_ENTRY(pf_frcache) fr_next; + uint16_t fr_off; + uint16_t fr_end; +}; + +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ +#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) + +struct pf_fragment { + RB_ENTRY(pf_fragment) fr_entry; + TAILQ_ENTRY(pf_fragment) frag_next; + struct in_addr fr_src; + struct in_addr fr_dst; + u_int8_t fr_p; /* protocol of this fragment */ + u_int8_t fr_flags; /* status flags */ + u_int16_t fr_id; /* fragment id for reassemble */ + u_int16_t fr_max; /* fragment data max */ + u_int32_t fr_timeout; +#define fr_queue fr_u.fru_queue +#define fr_cache fr_u.fru_cache + union { + LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ + LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ + } fr_u; +}; + +TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; +TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; + +static __inline int pf_frag_compare(struct pf_fragment *, + struct pf_fragment *); +RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; +RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); +RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); + +/* Private prototypes */ +void pf_ip2key(struct pf_fragment *, struct ip *); +void pf_remove_fragment(struct pf_fragment *); +void pf_flush_fragments(void); +void pf_free_fragment(struct pf_fragment *); +struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); +struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, + struct pf_frent *, int); +struct mbuf *pf_fragcache(struct mbuf **, struct ip*, + struct pf_fragment **, int, int, int *); +u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t); +int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, + struct tcphdr *, int); + +#define DPFPRINTF(x) if (pf_status.debug >= PF_DEBUG_MISC) \ + { printf("%s: ", __func__); printf x ;} + +/* Globals */ +struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; +struct pool pf_state_scrub_pl; +int pf_nfrents, pf_ncache; + +void +pf_normalize_init(void) +{ + pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", + NULL); + pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", + NULL); + pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, + "pffrcache", NULL); + pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", + NULL); + pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, + "pfstscr", NULL); + + pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); + pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); + pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); + pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); + + TAILQ_INIT(&pf_fragqueue); + TAILQ_INIT(&pf_cachequeue); +} + +static __inline int +pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +{ + int diff; + + if ((diff = a->fr_id - b->fr_id)) + return (diff); + else if ((diff = a->fr_p - b->fr_p)) + return (diff); + else if (a->fr_src.s_addr < b->fr_src.s_addr) + return (-1); + else if (a->fr_src.s_addr > b->fr_src.s_addr) + return (1); + else if (a->fr_dst.s_addr < b->fr_dst.s_addr) + return (-1); + else if (a->fr_dst.s_addr > b->fr_dst.s_addr) + return (1); + return (0); +} + +void +pf_purge_expired_fragments(void) +{ + struct pf_fragment *frag; + u_int32_t expire = time.tv_sec - + pf_default_rule.timeout[PFTM_FRAG]; + + while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { + KASSERT(BUFFER_FRAGMENTS(frag)); + if (frag->fr_timeout > expire) + break; + + DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + pf_free_fragment(frag); + } + + while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { + KASSERT(!BUFFER_FRAGMENTS(frag)); + if (frag->fr_timeout > expire) + break; + + DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + pf_free_fragment(frag); + KASSERT(TAILQ_EMPTY(&pf_cachequeue) || + TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); + } +} + +/* + * Try to flush old fragments to make space for new ones + */ + +void +pf_flush_fragments(void) +{ + struct pf_fragment *frag; + int goal; + + goal = pf_nfrents * 9 / 10; + DPFPRINTF(("trying to free > %d frents\n", + pf_nfrents - goal)); + while (goal < pf_nfrents) { + frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); + if (frag == NULL) + break; + pf_free_fragment(frag); + } + + + goal = pf_ncache * 9 / 10; + DPFPRINTF(("trying to free > %d cache entries\n", + pf_ncache - goal)); + while (goal < pf_ncache) { + frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); + if (frag == NULL) + break; + pf_free_fragment(frag); + } +} + +/* Frees the fragments and all associated entries */ + +void +pf_free_fragment(struct pf_fragment *frag) +{ + struct pf_frent *frent; + struct pf_frcache *frcache; + + /* Free all fragments */ + if (BUFFER_FRAGMENTS(frag)) { + for (frent = LIST_FIRST(&frag->fr_queue); frent; + frent = LIST_FIRST(&frag->fr_queue)) { + LIST_REMOVE(frent, fr_next); + + m_freem(frent->fr_m); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + } + } else { + for (frcache = LIST_FIRST(&frag->fr_cache); frcache; + frcache = LIST_FIRST(&frag->fr_cache)) { + LIST_REMOVE(frcache, fr_next); + + KASSERT(LIST_EMPTY(&frag->fr_cache) || + LIST_FIRST(&frag->fr_cache)->fr_off > + frcache->fr_end); + + pool_put(&pf_cent_pl, frcache); + pf_ncache--; + } + } + + pf_remove_fragment(frag); +} + +void +pf_ip2key(struct pf_fragment *key, struct ip *ip) +{ + key->fr_p = ip->ip_p; + key->fr_id = ip->ip_id; + key->fr_src.s_addr = ip->ip_src.s_addr; + key->fr_dst.s_addr = ip->ip_dst.s_addr; +} + +struct pf_fragment * +pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) +{ + struct pf_fragment key; + struct pf_fragment *frag; + + pf_ip2key(&key, ip); + + frag = RB_FIND(pf_frag_tree, tree, &key); + if (frag != NULL) { + /* XXX Are we sure we want to update the timeout? */ + frag->fr_timeout = time.tv_sec; + if (BUFFER_FRAGMENTS(frag)) { + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); + } else { + TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); + } + } + + return (frag); +} + +/* Removes a fragment from the fragment queue and frees the fragment */ + +void +pf_remove_fragment(struct pf_fragment *frag) +{ + if (BUFFER_FRAGMENTS(frag)) { + RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + pool_put(&pf_frag_pl, frag); + } else { + RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); + TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); + pool_put(&pf_cache_pl, frag); + } +} + +#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) +struct mbuf * +pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, + struct pf_frent *frent, int mff) +{ + struct mbuf *m = *m0, *m2; + struct pf_frent *frea, *next; + struct pf_frent *frep = NULL; + struct ip *ip = frent->fr_ip; + int hlen = ip->ip_hl << 2; + u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; + u_int16_t max = ip_len + off; + + KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); + + /* Strip off ip header */ + m->m_data += hlen; + m->m_len -= hlen; + + /* Create a new reassembly queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) + goto drop_fragment; + } + + (*frag)->fr_flags = 0; + (*frag)->fr_max = 0; + (*frag)->fr_src = frent->fr_ip->ip_src; + (*frag)->fr_dst = frent->fr_ip->ip_dst; + (*frag)->fr_p = frent->fr_ip->ip_p; + (*frag)->fr_id = frent->fr_ip->ip_id; + (*frag)->fr_timeout = time.tv_sec; + LIST_INIT(&(*frag)->fr_queue); + + RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); + TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); + + /* We do not have a previous fragment */ + frep = NULL; + goto insert; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { + if (FR_IP_OFF(frea) > off) + break; + frep = frea; + } + + KASSERT(frep != NULL || frea != NULL); + + if (frep != NULL && + FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * + 4 > off) + { + u_int16_t precut; + + precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - + frep->fr_ip->ip_hl * 4 - off; + if (precut >= ip_len) + goto drop_fragment; + m_adj(frent->fr_m, precut); + DPFPRINTF(("overlap -%d\n", precut)); + /* Enforce 8 byte boundaries */ + ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); + off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + ip_len -= precut; + ip->ip_len = htons(ip_len); + } + + for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); + frea = next) + { + u_int16_t aftercut; + + aftercut = ip_len + off - FR_IP_OFF(frea); + DPFPRINTF(("adjust overlap %d\n", aftercut)); + if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl + * 4) + { + frea->fr_ip->ip_len = + htons(ntohs(frea->fr_ip->ip_len) - aftercut); + frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + + (aftercut >> 3)); + m_adj(frea->fr_m, aftercut); + break; + } + + /* This fragment is completely overlapped, loose it */ + next = LIST_NEXT(frea, fr_next); + m_freem(frea->fr_m); + LIST_REMOVE(frea, fr_next); + pool_put(&pf_frent_pl, frea); + pf_nfrents--; + } + + insert: + /* Update maximum data size */ + if ((*frag)->fr_max < max) + (*frag)->fr_max = max; + /* This is the last segment */ + if (!mff) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + if (frep == NULL) + LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); + else + LIST_INSERT_AFTER(frep, frent, fr_next); + + /* Check if we are completely reassembled */ + if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) + return (NULL); + + /* Check if we have all the data */ + off = 0; + for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { + next = LIST_NEXT(frep, fr_next); + + off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; + if (off < (*frag)->fr_max && + (next == NULL || FR_IP_OFF(next) != off)) + { + DPFPRINTF(("missing fragment at %d, next %d, max %d\n", + off, next == NULL ? -1 : FR_IP_OFF(next), + (*frag)->fr_max)); + return (NULL); + } + } + DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); + if (off < (*frag)->fr_max) + return (NULL); + + /* We have all the data */ + frent = LIST_FIRST(&(*frag)->fr_queue); + KASSERT(frent != NULL); + if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { + DPFPRINTF(("drop: too big: %d\n", off)); + pf_free_fragment(*frag); + *frag = NULL; + return (NULL); + } + next = LIST_NEXT(frent, fr_next); + + /* Magic from ip_input */ + ip = frent->fr_ip; + m = frent->fr_m; + m2 = m->m_next; + m->m_next = NULL; + m_cat(m, m2); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + for (frent = next; frent != NULL; frent = next) { + next = LIST_NEXT(frent, fr_next); + + m2 = frent->fr_m; + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + m_cat(m, m2); + } + + ip->ip_src = (*frag)->fr_src; + ip->ip_dst = (*frag)->fr_dst; + + /* Remove from fragment queue */ + pf_remove_fragment(*frag); + *frag = NULL; + + hlen = ip->ip_hl << 2; + ip->ip_len = htons(off + hlen); + m->m_len += hlen; + m->m_data -= hlen; + + /* some debugging cruft by sklower, below, will go away soon */ + /* XXX this should be done elsewhere */ + if (m->m_flags & M_PKTHDR) { + int plen = 0; + for (m2 = m; m2; m2 = m2->m_next) + plen += m2->m_len; + m->m_pkthdr.len = plen; + } + + DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); + return (m); + + drop_fragment: + /* Oops - fail safe - drop packet */ + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + m_freem(m); + return (NULL); +} + +struct mbuf * +pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, + int drop, int *nomem) +{ + struct mbuf *m = *m0; + struct pf_frcache *frp, *fra, *cur = NULL; + int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); + u_int16_t off = ntohs(h->ip_off) << 3; + u_int16_t max = ip_len + off; + int hosed = 0; + + KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); + + /* Create a new range queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) + goto no_mem; + } + + /* Get an entry for the queue */ + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + pool_put(&pf_cache_pl, *frag); + *frag = NULL; + goto no_mem; + } + pf_ncache++; + + (*frag)->fr_flags = PFFRAG_NOBUFFER; + (*frag)->fr_max = 0; + (*frag)->fr_src = h->ip_src; + (*frag)->fr_dst = h->ip_dst; + (*frag)->fr_p = h->ip_p; + (*frag)->fr_id = h->ip_id; + (*frag)->fr_timeout = time.tv_sec; + + cur->fr_off = off; + cur->fr_end = max; + LIST_INIT(&(*frag)->fr_cache); + LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); + + RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); + TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); + + DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); + + goto pass; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + frp = NULL; + LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { + if (fra->fr_off > off) + break; + frp = fra; + } + + KASSERT(frp != NULL || fra != NULL); + + if (frp != NULL) { + int precut; + + precut = frp->fr_end - off; + if (precut >= ip_len) { + /* Fragment is entirely a duplicate */ + DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", + h->ip_id, frp->fr_off, frp->fr_end, off, max)); + goto drop_fragment; + } + if (precut == 0) { + /* They are adjacent. Fixup cache entry */ + DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", + h->ip_id, frp->fr_off, frp->fr_end, off, max)); + frp->fr_end = max; + } else if (precut > 0) { + /* The first part of this payload overlaps with a + * fragment that has already been passed. + * Need to trim off the first part of the payload. + * But to do so easily, we need to create another + * mbuf to throw the original header into. + */ + + DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", + h->ip_id, precut, frp->fr_off, frp->fr_end, off, + max)); + + off += precut; + max -= precut; + /* Update the previous frag to encompass this one */ + frp->fr_end = max; + + if (!drop) { + /* XXX Optimization opportunity + * This is a very heavy way to trim the payload. + * we could do it much faster by diddling mbuf + * internals but that would be even less legible + * than this mbuf magic. For my next trick, + * I'll pull a rabbit out of my laptop. + */ + *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT); + if (*m0 == NULL) + goto no_mem; + KASSERT((*m0)->m_next == NULL); + m_adj(m, precut + (h->ip_hl << 2)); + m_cat(*m0, m); + m = *m0; + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + + + h = mtod(m, struct ip *); + + + KASSERT((int)m->m_len == ntohs(h->ip_len) - precut); + h->ip_off = htons(ntohs(h->ip_off) + (precut >> 3)); + h->ip_len = htons(ntohs(h->ip_len) - precut); + } else { + hosed++; + } + } else { + /* There is a gap between fragments */ + + DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", + h->ip_id, -precut, frp->fr_off, frp->fr_end, off, + max)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) + goto no_mem; + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = max; + LIST_INSERT_AFTER(frp, cur, fr_next); + } + } + + if (fra != NULL) { + int aftercut; + int merge = 0; + + aftercut = max - fra->fr_off; + if (aftercut == 0) { + /* Adjacent fragments */ + DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", + h->ip_id, off, max, fra->fr_off, fra->fr_end)); + fra->fr_off = off; + merge = 1; + } else if (aftercut > 0) { + /* Need to chop off the tail of this fragment */ + DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", + h->ip_id, aftercut, off, max, fra->fr_off, + fra->fr_end)); + fra->fr_off = off; + max -= aftercut; + + merge = 1; + + if (!drop) { + m_adj(m, -aftercut); + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + h = mtod(m, struct ip *); + KASSERT((int)m->m_len == ntohs(h->ip_len) - aftercut); + h->ip_len = htons(ntohs(h->ip_len) - aftercut); + } else { + hosed++; + } + } else { + /* There is a gap between fragments */ + DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", + h->ip_id, -aftercut, off, max, fra->fr_off, + fra->fr_end)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) + goto no_mem; + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = max; + LIST_INSERT_BEFORE(fra, cur, fr_next); + } + + + /* Need to glue together two separate fragment descriptors */ + if (merge) { + if (cur && fra->fr_off <= cur->fr_end) { + /* Need to merge in a previous 'cur' */ + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, cur->fr_off, cur->fr_end, off, + max, fra->fr_off, fra->fr_end)); + fra->fr_off = cur->fr_off; + LIST_REMOVE(cur, fr_next); + pool_put(&pf_cent_pl, cur); + pf_ncache--; + cur = NULL; + + } else if (frp && fra->fr_off <= frp->fr_end) { + /* Need to merge in a modified 'frp' */ + KASSERT(cur == NULL); + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, frp->fr_off, frp->fr_end, off, + max, fra->fr_off, fra->fr_end)); + fra->fr_off = frp->fr_off; + LIST_REMOVE(frp, fr_next); + pool_put(&pf_cent_pl, frp); + pf_ncache--; + frp = NULL; + + } + } + } + + if (hosed) { + /* + * We must keep tracking the overall fragment even when + * we're going to drop it anyway so that we know when to + * free the overall descriptor. Thus we drop the frag late. + */ + goto drop_fragment; + } + + + pass: + /* Update maximum data size */ + if ((*frag)->fr_max < max) + (*frag)->fr_max = max; + + /* This is the last segment */ + if (!mff) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + /* Check if we are completely reassembled */ + if (((*frag)->fr_flags & PFFRAG_SEENLAST) && + LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && + LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { + /* Remove from fragment queue */ + DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, + (*frag)->fr_max)); + pf_free_fragment(*frag); + *frag = NULL; + } + + return (m); + + no_mem: + *nomem = 1; + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + m_freem(m); + return (NULL); + + drop_fragment: + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + if (drop) { + /* This fragment has been deemed bad. Don't reass */ + if (((*frag)->fr_flags & PFFRAG_DROP) == 0) + DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", + h->ip_id)); + (*frag)->fr_flags |= PFFRAG_DROP; + } + + m_freem(m); + return (NULL); +} + +int +pf_normalize_ip(struct mbuf **m0, int dir, struct ifnet *ifp, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_rule *r; + struct pf_frent *frent; + struct pf_fragment *frag = NULL; + struct ip *h = mtod(m, struct ip *); + int mff = (ntohs(h->ip_off) & IP_MF); + int hlen = h->ip_hl << 2; + u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + u_int16_t max; + int ip_len; + int ip_off; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && r->ifp != ifp) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != AF_INET) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != h->ip_p) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else + break; + } + + if (r == NULL) + return (PF_PASS); + else + r->packets++; + + /* Check for illegal packets */ + if (hlen < (int)sizeof(struct ip)) + goto drop; + + if (hlen > ntohs(h->ip_len)) + goto drop; + + /* Clear IP_DF if the rule uses the no-df option */ + if (r->rule_flag & PFRULE_NODF) + h->ip_off &= htons(~IP_DF); + + /* We will need other tests here */ + if (!fragoff && !mff) + goto no_fragment; + + /* We're dealing with a fragment now. Don't allow fragments + * with IP_DF to enter the cache. If the flag was cleared by + * no-df above, fine. Otherwise drop it. + */ + if (h->ip_off & htons(IP_DF)) { + DPFPRINTF(("IP_DF\n")); + goto bad; + } + + ip_len = ntohs(h->ip_len) - hlen; + ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + + /* All fragments are 8 byte aligned */ + if (mff && (ip_len & 0x7)) { + DPFPRINTF(("mff and %d\n", ip_len)); + goto bad; + } + + /* Respect maximum length */ + if (fragoff + ip_len > IP_MAXPACKET) { + DPFPRINTF(("max packet %d\n", fragoff + ip_len)); + goto bad; + } + max = fragoff + ip_len; + + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { + /* Fully buffer all of the fragments */ + + frag = pf_find_fragment(h, &pf_frag_tree); + + /* Check if we saw the last fragment already */ + if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && + max > frag->fr_max) + goto bad; + + /* Get an entry for the fragment queue */ + frent = pool_get(&pf_frent_pl, PR_NOWAIT); + if (frent == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + pf_nfrents++; + frent->fr_ip = h; + frent->fr_m = m; + + /* Might return a completely reassembled mbuf, or NULL */ + DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); + *m0 = m = pf_reassemble(m0, &frag, frent, mff); + + if (m == NULL) + return (PF_DROP); + + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + goto drop; + + h = mtod(m, struct ip *); + } else { + /* non-buffering fragment cache (drops or masks overlaps) */ + int nomem = 0; + + if (dir == PF_OUT) { + if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) != + NULL) { + /* Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; + } + } + + frag = pf_find_fragment(h, &pf_cache_tree); + + /* Check if we saw the last fragment already */ + if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && + max > frag->fr_max) { + if (r->rule_flag & PFRULE_FRAGDROP) + frag->fr_flags |= PFFRAG_DROP; + goto bad; + } + + *m0 = m = pf_fragcache(m0, h, &frag, mff, + (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); + if (m == NULL) { + if (nomem) + goto no_mem; + goto drop; + } + + if (dir == PF_IN) { + struct m_tag *mtag; + + mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT); + if (mtag == NULL) + goto no_mem; + m_tag_prepend(m, mtag); + } + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + goto drop; + goto fragment_pass; + } + + no_fragment: + /* At this point, only IP_DF is allowed in ip_off */ + h->ip_off &= htons(IP_DF); + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip_ttl < r->min_ttl) + h->ip_ttl = r->min_ttl; + + if (r->rule_flag & PFRULE_RANDOMID) + h->ip_id = ip_randomid(); + + return (PF_PASS); + + fragment_pass: + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip_ttl < r->min_ttl) + h->ip_ttl = r->min_ttl; + + return (PF_PASS); + + no_mem: + REASON_SET(reason, PFRES_MEMORY); + if (r != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + drop: + REASON_SET(reason, PFRES_NORM); + if (r != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + bad: + DPFPRINTF(("dropping bad fragment\n")); + + /* Free associated fragments */ + if (frag != NULL) + pf_free_fragment(frag); + + REASON_SET(reason, PFRES_FRAG); + if (r != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET, dir, *reason, r, NULL, NULL); + + return (PF_DROP); +} + +#ifdef INET6 +int +pf_normalize_ip6(struct mbuf **m0, int dir, struct ifnet *ifp, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_rule *r; + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + int off; + struct ip6_ext ext; + struct ip6_opt opt; + struct ip6_opt_jumbo jumbo; + struct ip6_frag frag; + u_int32_t jumbolen = 0, plen; + u_int16_t fragoff = 0; + int optend; + int ooff; + u_int8_t proto; + int terminal; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && r->ifp != ifp) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != AF_INET6) + r = r->skip[PF_SKIP_AF].ptr; +#if 0 /* header chain! */ + else if (r->proto && r->proto != h->ip6_nxt) + r = r->skip[PF_SKIP_PROTO].ptr; +#endif + else if (PF_MISMATCHAW(&r->src.addr, + (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else + break; + } + + if (r == NULL) + return (PF_PASS); + else + r->packets++; + + /* Check for illegal packets */ + if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) + goto drop; + + off = sizeof(struct ip6_hdr); + proto = h->ip6_nxt; + terminal = 0; + do { + switch (proto) { + case IPPROTO_FRAGMENT: + goto fragment; + break; + case IPPROTO_AH: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, + NULL, AF_INET6)) + goto shortpkt; + if (proto == IPPROTO_AH) + off += (ext.ip6e_len + 2) * 4; + else + off += (ext.ip6e_len + 1) * 8; + proto = ext.ip6e_nxt; + break; + case IPPROTO_HOPOPTS: + if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, + NULL, AF_INET6)) + goto shortpkt; + optend = off + (ext.ip6e_len + 1) * 8; + ooff = off + sizeof(ext); + do { + if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, + sizeof(opt.ip6o_type), NULL, NULL, + AF_INET6)) + goto shortpkt; + if (opt.ip6o_type == IP6OPT_PAD1) { + ooff++; + continue; + } + if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), + NULL, NULL, AF_INET6)) + goto shortpkt; + if (ooff + sizeof(opt) + opt.ip6o_len > optend) + goto drop; + switch (opt.ip6o_type) { + case IP6OPT_JUMBO: + if (h->ip6_plen != 0) + goto drop; + if (!pf_pull_hdr(m, ooff, &jumbo, + sizeof(jumbo), NULL, NULL, + AF_INET6)) + goto shortpkt; + memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, + sizeof(jumbolen)); + jumbolen = ntohl(jumbolen); + if (jumbolen <= IPV6_MAXPACKET) + goto drop; + if (sizeof(struct ip6_hdr) + jumbolen != + m->m_pkthdr.len) + goto drop; + break; + default: + break; + } + ooff += sizeof(opt) + opt.ip6o_len; + } while (ooff < optend); + + off = optend; + proto = ext.ip6e_nxt; + break; + default: + terminal = 1; + break; + } + } while (!terminal); + + /* jumbo payload option must be present, or plen > 0 */ + if (ntohs(h->ip6_plen) == 0) + plen = jumbolen; + else + plen = ntohs(h->ip6_plen); + if (plen == 0) + goto drop; + if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) + goto shortpkt; + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip6_hlim < r->min_ttl) + h->ip6_hlim = r->min_ttl; + + return (PF_PASS); + + fragment: + if (ntohs(h->ip6_plen) == 0 || jumbolen) + goto drop; + plen = ntohs(h->ip6_plen); + + if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) + goto shortpkt; + fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); + if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) + goto badfrag; + + /* do something about it */ + return (PF_PASS); + + shortpkt: + REASON_SET(reason, PFRES_SHORT); + if (r != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + drop: + REASON_SET(reason, PFRES_NORM); + if (r != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + badfrag: + REASON_SET(reason, PFRES_FRAG); + if (r != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + return (PF_DROP); +} +#endif + +int +pf_normalize_tcp(int dir, struct ifnet *ifp, struct mbuf *m, int ipoff, + int off, void *h, struct pf_pdesc *pd) +{ + struct pf_rule *r, *rm = NULL; + struct tcphdr *th = pd->hdr.tcp; + int rewrite = 0; + u_short reason; + u_int8_t flags; + sa_family_t af = pd->af; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->ifp != NULL && r->ifp != ifp) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], th->th_sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], th->th_dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), + r->os_fingerprint)) + r = TAILQ_NEXT(r, entries); + else { + rm = r; + break; + } + } + + if (rm == NULL) + return (PF_PASS); + else + r->packets++; + + if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) + pd->flags |= PFDESC_TCP_NORM; + + flags = th->th_flags; + if (flags & TH_SYN) { + /* Illegal packet */ + if (flags & TH_RST) + goto tcp_drop; + + if (flags & TH_FIN) + flags &= ~TH_FIN; + } else { + /* Illegal packet */ + if (!(flags & (TH_ACK|TH_RST))) + goto tcp_drop; + } + + if (!(flags & TH_ACK)) { + /* These flags are only valid if ACK is set */ + if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) + goto tcp_drop; + } + + /* Check for illegal header length */ + if (th->th_off < (sizeof(struct tcphdr) >> 2)) + goto tcp_drop; + + /* If flags changed, or reserved data set, then adjust */ + if (flags != th->th_flags || th->th_x2 != 0) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)(&th->th_ack + 1); + th->th_flags = flags; + th->th_x2 = 0; + nv = *(u_int16_t *)(&th->th_ack + 1); + + th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv); + rewrite = 1; + } + + /* Remove urgent pointer, if TH_URG is not set */ + if (!(flags & TH_URG) && th->th_urp) { + th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0); + th->th_urp = 0; + rewrite = 1; + } + + /* Process options */ + if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) + rewrite = 1; + + /* copy back packet headers if we sanitized */ + if (rewrite) + m_copyback(m, off, sizeof(*th), th); + + return (PF_PASS); + + tcp_drop: + REASON_SET(&reason, PFRES_NORM); + if (rm != NULL && r->log) + PFLOG_PACKET(ifp, h, m, AF_INET, dir, reason, r, NULL, NULL); + return (PF_DROP); +} + +int +pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) +{ + u_int8_t hdr[60]; + u_int8_t *opt; + + KASSERT(src->scrub == NULL); + + src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (src->scrub == NULL) + return (1); + bzero(src->scrub, sizeof(*src->scrub)); + + switch (pd->af) { +#ifdef INET + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + src->scrub->pfss_ttl = h->ip_ttl; + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + src->scrub->pfss_ttl = h->ip6_hlim; + break; + } +#endif /* INET6 */ + } + + + /* + * All normalizations below are only begun if we see the start of + * the connections. They must all set an enabled bit in pfss_flags + */ + if ((th->th_flags & TH_SYN) == 0) + return 0; + + + if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && + pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + /* Diddle with TCP options */ + int hlen; + opt = hdr + sizeof(struct tcphdr); + hlen = (th->th_off << 2) - sizeof(struct tcphdr); + while (hlen >= TCPOLEN_TIMESTAMP) { + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_TIMESTAMP: + if (opt[1] >= TCPOLEN_TIMESTAMP) { + src->scrub->pfss_flags |= + PFSS_TIMESTAMP; + src->scrub->pfss_ts_mod = arc4random(); + } + /* FALLTHROUGH */ + default: + hlen -= opt[1]; + opt += opt[1]; + break; + } + } + } + + return (0); +} + +void +pf_normalize_tcp_cleanup(struct pf_state *state) +{ + if (state->src.scrub) + pool_put(&pf_state_scrub_pl, state->src.scrub); + if (state->dst.scrub) + pool_put(&pf_state_scrub_pl, state->dst.scrub); + + /* Someday... flush the TCP segment reassembly descriptors. */ +} + +int +pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, + u_short *reason, struct tcphdr *th, struct pf_state_peer *src, + struct pf_state_peer *dst, int *writeback) +{ + u_int8_t hdr[60]; + u_int8_t *opt; + int copyback = 0; + + KASSERT(src->scrub || dst->scrub); + + /* + * Enforce the minimum TTL seen for this connection. Negate a common + * technique to evade an intrusion detection system and confuse + * firewall state code. + */ + switch (pd->af) { +#ifdef INET + case AF_INET: { + if (src->scrub) { + struct ip *h = mtod(m, struct ip *); + if (h->ip_ttl > src->scrub->pfss_ttl) + src->scrub->pfss_ttl = h->ip_ttl; + h->ip_ttl = src->scrub->pfss_ttl; + } + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + if (dst->scrub) { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + if (h->ip6_hlim > src->scrub->pfss_ttl) + src->scrub->pfss_ttl = h->ip6_hlim; + h->ip6_hlim = src->scrub->pfss_ttl; + } + break; + } +#endif /* INET6 */ + } + + if (th->th_off > (sizeof(struct tcphdr) >> 2) && + ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || + (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && + pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + /* Diddle with TCP options */ + int hlen; + opt = hdr + sizeof(struct tcphdr); + hlen = (th->th_off << 2) - sizeof(struct tcphdr); + while (hlen >= TCPOLEN_TIMESTAMP) { + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_TIMESTAMP: + /* Modulate the timestamps. Can be used for + * NAT detection, OS uptime determination or + * reboot detection. + */ + if (opt[1] >= TCPOLEN_TIMESTAMP) { + u_int32_t ts_value; + if (src->scrub && + (src->scrub->pfss_flags & + PFSS_TIMESTAMP)) { + memcpy(&ts_value, &opt[2], + sizeof(u_int32_t)); + ts_value = htonl(ntohl(ts_value) + + src->scrub->pfss_ts_mod); + pf_change_a(&opt[2], + &th->th_sum, ts_value, 0); + copyback = 1; + } + if (dst->scrub && + (dst->scrub->pfss_flags & + PFSS_TIMESTAMP)) { + memcpy(&ts_value, &opt[6], + sizeof(u_int32_t)); + ts_value = htonl(ntohl(ts_value) + - dst->scrub->pfss_ts_mod); + pf_change_a(&opt[6], + &th->th_sum, ts_value, 0); + copyback = 1; + } + } + /* FALLTHROUGH */ + default: + hlen -= opt[1]; + opt += opt[1]; + break; + } + } + if (copyback) { + /* Copyback the options, caller copys back header */ + *writeback = 1; + m_copyback(m, off + sizeof(struct tcphdr), + (th->th_off << 2) - sizeof(struct tcphdr), hdr + + sizeof(struct tcphdr)); + } + } + + + /* I have a dream.... TCP segment reassembly.... */ + return (0); +} +int +pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, + int off) +{ + u_int16_t *mss; + int thoff; + int opt, cnt, optlen = 0; + int rewrite = 0; + u_char *optp; + + thoff = th->th_off << 2; + cnt = thoff - sizeof(struct tcphdr); + optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr); + + for (; cnt > 0; cnt -= optlen, optp += optlen) { + opt = optp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = optp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case TCPOPT_MAXSEG: + mss = (u_int16_t *)(optp + 2); + if ((ntohs(*mss)) > r->max_mss) { + th->th_sum = pf_cksum_fixup(th->th_sum, + *mss, htons(r->max_mss)); + *mss = htons(r->max_mss); + rewrite = 1; + } + break; + default: + break; + } + } + + return (rewrite); +} diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c new file mode 100644 index 000000000000..c01d6de64078 --- /dev/null +++ b/sys/contrib/pf/net/pf_osfp.c @@ -0,0 +1,524 @@ +/* $OpenBSD: pf_osfp.c,v 1.3 2003/08/27 18:23:36 frantzen Exp $ */ + +/* + * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include <sys/param.h> +#include <sys/socket.h> +#ifdef _KERNEL +# include <sys/systm.h> +#endif /* _KERNEL */ +#include <sys/mbuf.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#include <net/if.h> +#include <net/pfvar.h> + +#ifdef INET6 +#include <netinet/ip6.h> +#endif /* INET6 */ + + +#ifdef _KERNEL +# define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +typedef struct pool pool_t; + +#else +/* Userland equivalents so we can lend code to tcpdump et al. */ + +# include <arpa/inet.h> +# include <errno.h> +# include <stdio.h> +# include <stdlib.h> +# define pool_t int +# define pool_get(pool, flags) malloc(*(pool)) +# define pool_put(pool, item) free(item) +# define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) + +# ifdef PFDEBUG +# include <stdarg.h> +# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +# else +# define DPFPRINTF(format, x...) ((void)0) +# endif /* PFDEBUG */ +#endif /* _KERNEL */ + + +SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; +pool_t pf_osfp_entry_pl; +pool_t pf_osfp_pl; + +struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, + struct pf_os_fingerprint *, u_int8_t); +struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, + struct pf_os_fingerprint *); +void pf_osfp_insert(struct pf_osfp_list *, + struct pf_os_fingerprint *); + + +#ifdef _KERNEL +/* + * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only) + * Returns the list of possible OSes. + */ +struct pf_osfp_enlist * +pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, + const struct tcphdr *tcp) +{ + struct ip *ip; + char hdr[60]; + + /* XXX don't have a fingerprint database for IPv6 :-( */ + if (pd->af != PF_INET || pd->proto != IPPROTO_TCP || (tcp->th_off << 2) + < sizeof(*tcp)) + return (NULL); + + ip = mtod(m, struct ip *); + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, pd->af)) + return (NULL); + + return (pf_osfp_fingerprint_hdr(ip, (struct tcphdr *)hdr)); +} +#endif /* _KERNEL */ + +struct pf_osfp_enlist * +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) +{ + struct pf_os_fingerprint fp, *fpresult; + int cnt, optlen = 0; + u_int8_t *optp; + + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN || (ip->ip_off & + htons(IP_OFFMASK))) + return (NULL); + + memset(&fp, 0, sizeof(fp)); + + fp.fp_psize = ntohs(ip->ip_len); + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & htons(IP_DF)) + fp.fp_flags |= PF_OSFP_DF; + fp.fp_wsize = ntohs(tcp->th_win); + + + cnt = (tcp->th_off << 2) - sizeof(*tcp); + optp = (caddr_t)tcp + sizeof(*tcp); + for (; cnt > 0; cnt -= optlen, optp += optlen) { + if (*optp == TCPOPT_EOL) + break; + + fp.fp_optcnt++; + if (*optp == TCPOPT_NOP) { + fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | + PF_OSFP_TCPOPT_NOP; + optlen = 1; + } else { + if (cnt < 2) + return (NULL); + optlen = optp[1]; + if (optlen > cnt || optlen < 2) + return (NULL); + switch (*optp) { + case TCPOPT_MAXSEG: + if (optlen >= TCPOLEN_MAXSEG) + memcpy(&fp.fp_mss, &optp[2], + sizeof(fp.fp_mss)); + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS; + NTOHS(fp.fp_mss); + break; + case TCPOPT_WINDOW: + if (optlen >= TCPOLEN_WINDOW) + memcpy(&fp.fp_wscale, &optp[2], + sizeof(fp.fp_wscale)); + NTOHS(fp.fp_wscale); + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | + PF_OSFP_TCPOPT_WSCALE; + break; + case TCPOPT_SACK_PERMITTED: + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK; + break; + case TCPOPT_TIMESTAMP: + if (optlen >= TCPOLEN_TIMESTAMP) { + u_int32_t ts; + memcpy(&ts, &optp[2], sizeof(ts)); + if (ts == 0) + fp.fp_flags |= PF_OSFP_TS0; + + } + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS; + break; + default: + return (NULL); + } + } + optlen = MAX(optlen, 1); /* paranoia */ + } + + DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " + "(TS=%s,M=%s%d,W=%s%d)\n", + inet_ntoa(ip->ip_src), ntohs(tcp->th_sport), + fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, + fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, + (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", + (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" : + (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", + fp.fp_mss, + (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : + (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", + fp.fp_wscale); + + if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, + PF_OSFP_MAXTTL_OFFSET))) + return (&fpresult->fp_oses); + return (NULL); +} + +/* Match a fingerprint ID against a list of OSes */ +int +pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) +{ + struct pf_osfp_entry *entry; + int os_class, os_version, os_subtype; + int en_class, en_version, en_subtype; + + if (os == PF_OSFP_ANY) + return (1); + if (list == NULL) { + DPFPRINTF("osfp no match against %x\n", os); + return (os == PF_OSFP_UNKNOWN); + } + PF_OSFP_UNPACK(os, os_class, os_version, os_subtype); + SLIST_FOREACH(entry, list, fp_entry) { + PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype); + if ((os_class == PF_OSFP_ANY || en_class == os_class) && + (os_version == PF_OSFP_ANY || en_version == os_version) && + (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) { + DPFPRINTF("osfp matched %s %s %s %x==%x\n", + entry->fp_class_nm, entry->fp_version_nm, + entry->fp_subtype_nm, os, entry->fp_os); + return (1); + } + } + DPFPRINTF("fingerprint 0x%x didn't match\n", os); + return (0); +} + +/* Initialize the OS fingerprint system */ +void +pf_osfp_initialize(void) +{ + pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, + "pfosfpen", NULL); + pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, + "pfosfp", NULL); + SLIST_INIT(&pf_osfp_list); +} + +/* Flush the fingerprint list */ +void +pf_osfp_flush(void) +{ + struct pf_os_fingerprint *fp; + struct pf_osfp_entry *entry; + + while ((fp = SLIST_FIRST(&pf_osfp_list))) { + SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); + while ((entry = SLIST_FIRST(&fp->fp_oses))) { + SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); + pool_put(&pf_osfp_entry_pl, entry); + } + pool_put(&pf_osfp_pl, fp); + } +} + + +/* Add a fingerprint */ +int +pf_osfp_add(struct pf_osfp_ioctl *fpioc) +{ + struct pf_os_fingerprint *fp, fpadd; + struct pf_osfp_entry *entry; + + memset(&fpadd, 0, sizeof(fpadd)); + fpadd.fp_tcpopts = fpioc->fp_tcpopts; + fpadd.fp_wsize = fpioc->fp_wsize; + fpadd.fp_psize = fpioc->fp_psize; + fpadd.fp_mss = fpioc->fp_mss; + fpadd.fp_flags = fpioc->fp_flags; + fpadd.fp_optcnt = fpioc->fp_optcnt; + fpadd.fp_wscale = fpioc->fp_wscale; + fpadd.fp_ttl = fpioc->fp_ttl; + + DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " + "(TS=%s,M=%s%d,W=%s%d) %x\n", + fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, + fpioc->fp_os.fp_subtype_nm, + (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" : + (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" : + (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "", + fpadd.fp_wsize, + fpadd.fp_ttl, + (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0, + (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "", + fpadd.fp_psize, + (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt, + (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "", + (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", + fpadd.fp_mss, + (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", + fpadd.fp_wscale, + fpioc->fp_os.fp_os); + + + if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { + SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { + if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) + return (EEXIST); + } + if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) + return (ENOMEM); + } else { + if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL) + return (ENOMEM); + memset(fp, 0, sizeof(*fp)); + fp->fp_tcpopts = fpioc->fp_tcpopts; + fp->fp_wsize = fpioc->fp_wsize; + fp->fp_psize = fpioc->fp_psize; + fp->fp_mss = fpioc->fp_mss; + fp->fp_flags = fpioc->fp_flags; + fp->fp_optcnt = fpioc->fp_optcnt; + fp->fp_wscale = fpioc->fp_wscale; + fp->fp_ttl = fpioc->fp_ttl; + SLIST_INIT(&fp->fp_oses); + if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) { + pool_put(&pf_osfp_pl, fp); + return (ENOMEM); + } + pf_osfp_insert(&pf_osfp_list, fp); + } + memcpy(entry, &fpioc->fp_os, sizeof(*entry)); + + /* Make sure the strings are NUL terminated */ + entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0'; + entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0'; + entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0'; + + SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry); + +#ifdef PFDEBUG + if ((fp = pf_osfp_validate())) + printf("Invalid fingerprint list\n"); +#endif /* PFDEBUG */ + return (0); +} + + +/* Find a fingerprint in the list */ +struct pf_os_fingerprint * +pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, + u_int8_t ttldiff) +{ + struct pf_os_fingerprint *f; + +#define MATCH_INT(_MOD, _DC, _field) \ + if ((f->fp_flags & _DC) == 0) { \ + if ((f->fp_flags & _MOD) == 0) { \ + if (f->_field != find->_field) \ + continue; \ + } else { \ + if (f->_field == 0 || find->_field % f->_field) \ + continue; \ + } \ + } + + SLIST_FOREACH(f, list, fp_next) { + if (f->fp_tcpopts != find->fp_tcpopts || + f->fp_optcnt != find->fp_optcnt || + f->fp_ttl < find->fp_ttl || + f->fp_ttl - find->fp_ttl > ttldiff || + (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) != + (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0))) + continue; + + MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize) + MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss) + MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale) + if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) { + if (f->fp_flags & PF_OSFP_WSIZE_MSS) { + if (find->fp_mss == 0) + continue; + +/* Some "smart" NAT devices and DSL routers will tweak the MSS size and + * will set it to whatever is suitable for the link type. + */ +#define SMART_MSS 1460 + if ((find->fp_wsize % find->fp_mss || + find->fp_wsize / find->fp_mss != + f->fp_wsize) && + (find->fp_wsize % SMART_MSS || + find->fp_wsize / SMART_MSS != + f->fp_wsize)) + continue; + } else if (f->fp_flags & PF_OSFP_WSIZE_MTU) { + if (find->fp_mss == 0) + continue; + +#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) +#define SMART_MTU (SMART_MSS + MTUOFF) + if ((find->fp_wsize % (find->fp_mss + MTUOFF) || + find->fp_wsize / (find->fp_mss + MTUOFF) != + f->fp_wsize) && + (find->fp_wsize % SMART_MTU || + find->fp_wsize / SMART_MTU != + f->fp_wsize)) + continue; + } else if (f->fp_flags & PF_OSFP_WSIZE_MOD) { + if (f->fp_wsize == 0 || find->fp_wsize % + f->fp_wsize) + continue; + } else { + if (f->fp_wsize != find->fp_wsize) + continue; + } + } + return (f); + } + + return (NULL); +} + +/* Find an exact fingerprint in the list */ +struct pf_os_fingerprint * +pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) +{ + struct pf_os_fingerprint *f; + + SLIST_FOREACH(f, list, fp_next) { + if (f->fp_tcpopts == find->fp_tcpopts && + f->fp_wsize == find->fp_wsize && + f->fp_psize == find->fp_psize && + f->fp_mss == find->fp_mss && + f->fp_flags == find->fp_flags && + f->fp_optcnt == find->fp_optcnt && + f->fp_wscale == find->fp_wscale && + f->fp_ttl == find->fp_ttl) + return (f); + } + + return (NULL); +} + +/* Insert a fingerprint into the list */ +void +pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins) +{ + struct pf_os_fingerprint *f, *prev = NULL; + + /* XXX need to go semi tree based. can key on tcp options */ + + SLIST_FOREACH(f, list, fp_next) + prev = f; + if (prev) + SLIST_INSERT_AFTER(prev, ins, fp_next); + else + SLIST_INSERT_HEAD(list, ins, fp_next); +} + +/* Fill a fingerprint by its number (from an ioctl) */ +int +pf_osfp_get(struct pf_osfp_ioctl *fpioc) +{ + struct pf_os_fingerprint *fp; + struct pf_osfp_entry *entry; + int num = fpioc->fp_getnum; + int i = 0; + + + memset(fpioc, 0, sizeof(*fpioc)); + SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { + SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { + if (i++ == num) { + fpioc->fp_mss = fp->fp_mss; + fpioc->fp_wsize = fp->fp_wsize; + fpioc->fp_flags = fp->fp_flags; + fpioc->fp_psize = fp->fp_psize; + fpioc->fp_ttl = fp->fp_ttl; + fpioc->fp_wscale = fp->fp_wscale; + fpioc->fp_getnum = num; + memcpy(&fpioc->fp_os, entry, + sizeof(fpioc->fp_os)); + return (0); + } + } + } + + return (EBUSY); +} + + +/* Validate that each signature is reachable */ +struct pf_os_fingerprint * +pf_osfp_validate(void) +{ + struct pf_os_fingerprint *f, *f2, find; + + SLIST_FOREACH(f, &pf_osfp_list, fp_next) { + memcpy(&find, f, sizeof(find)); + + /* We do a few MSS/th_win percolations to make things unique */ + if (find.fp_mss == 0) + find.fp_mss = 128; + if (f->fp_flags & PF_OSFP_WSIZE_MSS) + find.fp_wsize *= find.fp_mss, 1; + else if (f->fp_flags & PF_OSFP_WSIZE_MTU) + find.fp_wsize *= (find.fp_mss + 40); + else if (f->fp_flags & PF_OSFP_WSIZE_MOD) + find.fp_wsize *= 2; + if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { + if (f2) + printf("Found \"%s %s %s\" instead of " + "\"%s %s %s\"\n", + SLIST_FIRST(&f2->fp_oses)->fp_class_nm, + SLIST_FIRST(&f2->fp_oses)->fp_version_nm, + SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm, + SLIST_FIRST(&f->fp_oses)->fp_class_nm, + SLIST_FIRST(&f->fp_oses)->fp_version_nm, + SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); + else + printf("Couldn't find \"%s %s %s\"\n", + SLIST_FIRST(&f->fp_oses)->fp_class_nm, + SLIST_FIRST(&f->fp_oses)->fp_version_nm, + SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); + return (f); + } + } + return (NULL); +} diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c new file mode 100644 index 000000000000..e6ce25fa46e0 --- /dev/null +++ b/sys/contrib/pf/net/pf_table.c @@ -0,0 +1,2018 @@ +/* $OpenBSD: pf_table.c,v 1.41 2003/08/22 15:19:23 henning Exp $ */ + +/* + * Copyright (c) 2002 Cedric Berger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/socket.h> +#include <sys/mbuf.h> +#include <sys/kernel.h> + +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/ip_ipsp.h> +#include <net/pfvar.h> + +#define ACCEPT_FLAGS(oklist) \ + do { \ + if ((flags & ~(oklist)) & \ + PFR_FLAG_ALLMASK) \ + return (EINVAL); \ + } while (0) + +#define FILLIN_SIN(sin, addr) \ + do { \ + (sin).sin_len = sizeof(sin); \ + (sin).sin_family = AF_INET; \ + (sin).sin_addr = (addr); \ + } while (0) + +#define FILLIN_SIN6(sin6, addr) \ + do { \ + (sin6).sin6_len = sizeof(sin6); \ + (sin6).sin6_family = AF_INET6; \ + (sin6).sin6_addr = (addr); \ + } while (0) + +#define SWAP(type, a1, a2) \ + do { \ + type tmp = a1; \ + a1 = a2; \ + a2 = tmp; \ + } while (0) + +#define SUNION2PF(su, af) (((af)==AF_INET) ? \ + (struct pf_addr *)&(su)->sin.sin_addr : \ + (struct pf_addr *)&(su)->sin6.sin6_addr) + +#define AF_BITS(af) (((af)==AF_INET)?32:128) +#define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af)) +#define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af)) +#define KENTRY_RNF_ROOT(ke) \ + ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0) + +#define NO_ADDRESSES (-1) +#define ENQUEUE_UNMARKED_ONLY (1) +#define INVERT_NEG_FLAG (1) + +struct pfr_walktree { + enum pfrw_op { + PFRW_MARK, + PFRW_SWEEP, + PFRW_ENQUEUE, + PFRW_GET_ADDRS, + PFRW_GET_ASTATS, + PFRW_POOL_GET + } pfrw_op; + union { + struct pfr_addr *pfrw1_addr; + struct pfr_astats *pfrw1_astats; + struct pfr_kentryworkq *pfrw1_workq; + struct pfr_kentry *pfrw1_kentry; + } pfrw_1; + int pfrw_free; +}; +#define pfrw_addr pfrw_1.pfrw1_addr +#define pfrw_astats pfrw_1.pfrw1_astats +#define pfrw_workq pfrw_1.pfrw1_workq +#define pfrw_kentry pfrw_1.pfrw1_kentry +#define pfrw_cnt pfrw_free + +#define senderr(e) do { rv = (e); goto _bad; } while (0) + +struct pool pfr_ktable_pl; +struct pool pfr_kentry_pl; +struct sockaddr_in pfr_sin; +struct sockaddr_in6 pfr_sin6; +union sockaddr_union pfr_mask; +struct pf_addr pfr_ffaddr; + +void pfr_copyout_addr(struct pfr_addr *, + struct pfr_kentry *ke); +int pfr_validate_addr(struct pfr_addr *); +void pfr_enqueue_addrs(struct pfr_ktable *, + struct pfr_kentryworkq *, int *, int); +void pfr_mark_addrs(struct pfr_ktable *); +struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, + struct pfr_addr *, int); +struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); +void pfr_destroy_kentries(struct pfr_kentryworkq *); +void pfr_destroy_kentry(struct pfr_kentry *); +void pfr_insert_kentries(struct pfr_ktable *, + struct pfr_kentryworkq *, long); +void pfr_remove_kentries(struct pfr_ktable *, + struct pfr_kentryworkq *); +void pfr_clstats_kentries(struct pfr_kentryworkq *, long, + int); +void pfr_reset_feedback(struct pfr_addr *, int); +void pfr_prepare_network(union sockaddr_union *, int, int); +int pfr_route_kentry(struct pfr_ktable *, + struct pfr_kentry *); +int pfr_unroute_kentry(struct pfr_ktable *, + struct pfr_kentry *); +int pfr_walktree(struct radix_node *, void *); +int pfr_validate_table(struct pfr_table *, int); +void pfr_commit_ktable(struct pfr_ktable *, long); +void pfr_insert_ktables(struct pfr_ktableworkq *); +void pfr_insert_ktable(struct pfr_ktable *); +void pfr_setflags_ktables(struct pfr_ktableworkq *); +void pfr_setflags_ktable(struct pfr_ktable *, int); +void pfr_clstats_ktables(struct pfr_ktableworkq *, long, + int); +void pfr_clstats_ktable(struct pfr_ktable *, long, int); +struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int); +void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +void pfr_destroy_ktable(struct pfr_ktable *, int); +int pfr_ktable_compare(struct pfr_ktable *, + struct pfr_ktable *); +struct pfr_ktable *pfr_lookup_table(struct pfr_table *); +void pfr_clean_node_mask(struct pfr_ktable *, + struct pfr_kentryworkq *); +int pfr_table_count(struct pfr_table *, int); +int pfr_skip_table(struct pfr_table *, + struct pfr_ktable *, int); +struct pfr_kentry *pfr_kentry_byidx(struct pfr_ktable *, int, int); + +RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); +RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); + +struct pfr_ktablehead pfr_ktables; +struct pfr_table pfr_nulltable; +int pfr_ktable_cnt; + +void +pfr_initialize(void) +{ + pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, + "pfrktable", NULL); + pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, + "pfrkentry", NULL); + + pfr_sin.sin_len = sizeof(pfr_sin); + pfr_sin.sin_family = AF_INET; + pfr_sin6.sin6_len = sizeof(pfr_sin6); + pfr_sin6.sin6_family = AF_INET6; + + memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); +} + +int +pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + int s; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + pfr_enqueue_addrs(kt, &workq, ndel, 0); + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_remove_kentries(kt, &workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + if (kt->pfrkt_cnt) { + printf("pfr_clr_addrs: corruption detected (%d).\n", + kt->pfrkt_cnt); + kt->pfrkt_cnt = 0; + } + } + return (0); +} + +int +pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nadd, int flags) +{ + struct pfr_ktable *kt, *tmpkt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p, *q; + struct pfr_addr ad; + int i, rv, s, xadd = 0; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + if (tmpkt == NULL) + return (ENOMEM); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + q = pfr_lookup_addr(tmpkt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + if (q != NULL) + ad.pfra_fback = PFR_FB_DUPLICATE; + else if (p == NULL) + ad.pfra_fback = PFR_FB_ADDED; + else if (p->pfrke_not != ad.pfra_not) + ad.pfra_fback = PFR_FB_CONFLICT; + else + ad.pfra_fback = PFR_FB_NONE; + } + if (p == NULL && q == NULL) { + p = pfr_create_kentry(&ad); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(tmpkt, p)) { + pfr_destroy_kentry(p); + ad.pfra_fback = PFR_FB_NONE; + } else { + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xadd++; + } + } + if (flags & PFR_FLAG_FEEDBACK) + if (copyout(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + pfr_clean_node_mask(tmpkt, &workq); + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_insert_kentries(kt, &workq, tzero); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } else + pfr_destroy_kentries(&workq); + if (nadd != NULL) + *nadd = xadd; + pfr_destroy_ktable(tmpkt, 0); + return (0); +_bad: + pfr_clean_node_mask(tmpkt, &workq); + pfr_destroy_kentries(&workq); + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size); + pfr_destroy_ktable(tmpkt, 0); + return (rv); +} + +int +pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *ndel, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, rv, s, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + pfr_mark_addrs(kt); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + if (p == NULL) + ad.pfra_fback = PFR_FB_NONE; + else if (p->pfrke_not != ad.pfra_not) + ad.pfra_fback = PFR_FB_CONFLICT; + else if (p->pfrke_mark) + ad.pfra_fback = PFR_FB_DUPLICATE; + else + ad.pfra_fback = PFR_FB_DELETED; + } + if (p != NULL && p->pfrke_not == ad.pfra_not && + !p->pfrke_mark) { + p->pfrke_mark = 1; + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xdel++; + } + if (flags & PFR_FLAG_FEEDBACK) + if (copyout(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_remove_kentries(kt, &workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +_bad: + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size); + return (rv); +} + +int +pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *size2, int *nadd, int *ndel, int *nchange, int flags) +{ + struct pfr_ktable *kt, *tmpkt; + struct pfr_kentryworkq addq, delq, changeq; + struct pfr_kentry *p, *q; + struct pfr_addr ad; + int i, rv, s, xadd = 0, xdel = 0, xchange = 0; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + if (tmpkt == NULL) + return (ENOMEM); + pfr_mark_addrs(kt); + SLIST_INIT(&addq); + SLIST_INIT(&delq); + SLIST_INIT(&changeq); + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + ad.pfra_fback = PFR_FB_NONE; + p = pfr_lookup_addr(kt, &ad, 1); + if (p != NULL) { + if (p->pfrke_mark) { + ad.pfra_fback = PFR_FB_DUPLICATE; + goto _skip; + } + p->pfrke_mark = 1; + if (p->pfrke_not != ad.pfra_not) { + SLIST_INSERT_HEAD(&changeq, p, pfrke_workq); + ad.pfra_fback = PFR_FB_CHANGED; + xchange++; + } + } else { + q = pfr_lookup_addr(tmpkt, &ad, 1); + if (q != NULL) { + ad.pfra_fback = PFR_FB_DUPLICATE; + goto _skip; + } + p = pfr_create_kentry(&ad); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(tmpkt, p)) { + pfr_destroy_kentry(p); + ad.pfra_fback = PFR_FB_NONE; + } else { + SLIST_INSERT_HEAD(&addq, p, pfrke_workq); + ad.pfra_fback = PFR_FB_ADDED; + xadd++; + } + } +_skip: + if (flags & PFR_FLAG_FEEDBACK) + if (copyout(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); + if ((flags & PFR_FLAG_FEEDBACK) && *size2) { + if (*size2 < size+xdel) { + *size2 = size+xdel; + senderr(0); + } + i = 0; + SLIST_FOREACH(p, &delq, pfrke_workq) { + pfr_copyout_addr(&ad, p); + ad.pfra_fback = PFR_FB_DELETED; + if (copyout(&ad, addr+size+i, sizeof(ad))) + senderr(EFAULT); + i++; + } + } + pfr_clean_node_mask(tmpkt, &addq); + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_insert_kentries(kt, &addq, tzero); + pfr_remove_kentries(kt, &delq); + pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } else + pfr_destroy_kentries(&addq); + if (nadd != NULL) + *nadd = xadd; + if (ndel != NULL) + *ndel = xdel; + if (nchange != NULL) + *nchange = xchange; + if ((flags & PFR_FLAG_FEEDBACK) && *size2) + *size2 = size+xdel; + pfr_destroy_ktable(tmpkt, 0); + return (0); +_bad: + pfr_clean_node_mask(tmpkt, &addq); + pfr_destroy_kentries(&addq); + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size); + pfr_destroy_ktable(tmpkt, 0); + return (rv); +} + +int +pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nmatch, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, xmatch = 0; + + ACCEPT_FLAGS(PFR_FLAG_REPLACE); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + return (EFAULT); + if (pfr_validate_addr(&ad)) + return (EINVAL); + if (ADDR_NETWORK(&ad)) + return (EINVAL); + p = pfr_lookup_addr(kt, &ad, 0); + if (flags & PFR_FLAG_REPLACE) + pfr_copyout_addr(&ad, p); + ad.pfra_fback = (p == NULL) ? PFR_FB_NONE : + (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); + if (p != NULL && !p->pfrke_not) + xmatch++; + if (copyout(&ad, addr+i, sizeof(ad))) + return (EFAULT); + } + if (nmatch != NULL) + *nmatch = xmatch; + return (0); +} + +int +pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, + int flags) +{ + struct pfr_ktable *kt; + struct pfr_walktree w; + int rv; + + ACCEPT_FLAGS(0); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_cnt > *size) { + *size = kt->pfrkt_cnt; + return (0); + } + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_GET_ADDRS; + w.pfrw_addr = addr; + w.pfrw_free = kt->pfrkt_cnt; + rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!rv) + rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + if (rv) + return (rv); + + if (w.pfrw_free) { + printf("pfr_get_addrs: corruption detected (%d).\n", + w.pfrw_free); + return (ENOTTY); + } + *size = kt->pfrkt_cnt; + return (0); +} + +int +pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, + int flags) +{ + struct pfr_ktable *kt; + struct pfr_walktree w; + struct pfr_kentryworkq workq; + int rv, s; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */ + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_cnt > *size) { + *size = kt->pfrkt_cnt; + return (0); + } + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_GET_ASTATS; + w.pfrw_astats = addr; + w.pfrw_free = kt->pfrkt_cnt; + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!rv) + rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + if (!rv && (flags & PFR_FLAG_CLSTATS)) { + pfr_enqueue_addrs(kt, &workq, NULL, 0); + pfr_clstats_kentries(&workq, tzero, 0); + } + if (flags & PFR_FLAG_ATOMIC) + splx(s); + if (rv) + return (rv); + + if (w.pfrw_free) { + printf("pfr_get_astats: corruption detected (%d).\n", + w.pfrw_free); + return (ENOTTY); + } + *size = kt->pfrkt_cnt; + return (0); +} + +int +pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nzero, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, rv, s, xzero = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + ad.pfra_fback = (p != NULL) ? + PFR_FB_CLEARED : PFR_FB_NONE; + if (copyout(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + if (p != NULL) { + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xzero++; + } + } + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_clstats_kentries(&workq, 0, 0); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (nzero != NULL) + *nzero = xzero; + return (0); +_bad: + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size); + return (rv); +} + +int +pfr_validate_addr(struct pfr_addr *ad) +{ + int i; + + switch (ad->pfra_af) { + case AF_INET: + if (ad->pfra_net > 32) + return (-1); + break; + case AF_INET6: + if (ad->pfra_net > 128) + return (-1); + break; + default: + return (-1); + } + if (ad->pfra_net < 128 && + (((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8)))) + return (-1); + for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++) + if (((caddr_t)ad)[i]) + return (-1); + if (ad->pfra_not && ad->pfra_not != 1) + return (-1); + if (ad->pfra_fback) + return (-1); + return (0); +} + +void +pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, + int *naddr, int sweep) +{ + struct pfr_walktree w; + + SLIST_INIT(workq); + bzero(&w, sizeof(w)); + w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE; + w.pfrw_workq = workq; + if (kt->pfrkt_ip4 != NULL) + if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) + printf("pfr_enqueue_addrs: IPv4 walktree failed.\n"); + if (kt->pfrkt_ip6 != NULL) + if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) + printf("pfr_enqueue_addrs: IPv6 walktree failed.\n"); + if (naddr != NULL) + *naddr = w.pfrw_cnt; +} + +void +pfr_mark_addrs(struct pfr_ktable *kt) +{ + struct pfr_walktree w; + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_MARK; + if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) + printf("pfr_mark_addrs: IPv4 walktree failed.\n"); + if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) + printf("pfr_mark_addrs: IPv6 walktree failed.\n"); +} + + +struct pfr_kentry * +pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) +{ + union sockaddr_union sa, mask; + struct radix_node_head *head; + struct pfr_kentry *ke; + int s; + + bzero(&sa, sizeof(sa)); + if (ad->pfra_af == AF_INET) { + FILLIN_SIN(sa.sin, ad->pfra_ip4addr); + head = kt->pfrkt_ip4; + } else { + FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr); + head = kt->pfrkt_ip6; + } + if (ADDR_NETWORK(ad)) { + pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); + s = splsoftnet(); /* rn_lookup makes use of globals */ + ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); + splx(s); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + } else { + ke = (struct pfr_kentry *)rn_match(&sa, head); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + if (exact && ke && KENTRY_NETWORK(ke)) + ke = NULL; + } + return (ke); +} + +struct pfr_kentry * +pfr_create_kentry(struct pfr_addr *ad) +{ + struct pfr_kentry *ke; + + ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); + if (ke == NULL) + return (NULL); + bzero(ke, sizeof(*ke)); + + if (ad->pfra_af == AF_INET) + FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); + else + FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr); + ke->pfrke_af = ad->pfra_af; + ke->pfrke_net = ad->pfra_net; + ke->pfrke_not = ad->pfra_not; + return (ke); +} + +void +pfr_destroy_kentries(struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p, *q; + + for (p = SLIST_FIRST(workq); p != NULL; p = q) { + q = SLIST_NEXT(p, pfrke_workq); + pfr_destroy_kentry(p); + } +} + +void +pfr_destroy_kentry(struct pfr_kentry *ke) +{ + pool_put(&pfr_kentry_pl, ke); +} + +void +pfr_insert_kentries(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq, long tzero) +{ + struct pfr_kentry *p; + int rv, n = 0; + + SLIST_FOREACH(p, workq, pfrke_workq) { + rv = pfr_route_kentry(kt, p); + if (rv) { + printf("pfr_insert_kentries: cannot route entry " + "(code=%d).\n", rv); + break; + } + p->pfrke_tzero = tzero; + n++; + } + kt->pfrkt_cnt += n; +} + +void +pfr_remove_kentries(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p; + int n = 0; + + SLIST_FOREACH(p, workq, pfrke_workq) { + pfr_unroute_kentry(kt, p); + n++; + } + kt->pfrkt_cnt -= n; + pfr_destroy_kentries(workq); +} + +void +pfr_clean_node_mask(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p; + + SLIST_FOREACH(p, workq, pfrke_workq) + pfr_unroute_kentry(kt, p); +} + +void +pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) +{ + struct pfr_kentry *p; + int s; + + SLIST_FOREACH(p, workq, pfrke_workq) { + s = splsoftnet(); + if (negchange) + p->pfrke_not = !p->pfrke_not; + bzero(p->pfrke_packets, sizeof(p->pfrke_packets)); + bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes)); + splx(s); + p->pfrke_tzero = tzero; + } +} + +void +pfr_reset_feedback(struct pfr_addr *addr, int size) +{ + struct pfr_addr ad; + int i; + + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + break; + ad.pfra_fback = PFR_FB_NONE; + if (copyout(&ad, addr+i, sizeof(ad))) + break; + } +} + +void +pfr_prepare_network(union sockaddr_union *sa, int af, int net) +{ + int i; + + bzero(sa, sizeof(*sa)); + if (af == AF_INET) { + sa->sin.sin_len = sizeof(sa->sin); + sa->sin.sin_family = AF_INET; + sa->sin.sin_addr.s_addr = htonl(-1 << (32-net)); + } else { + sa->sin6.sin6_len = sizeof(sa->sin6); + sa->sin6.sin6_family = AF_INET6; + for (i = 0; i < 4; i++) { + if (net <= 32) { + sa->sin6.sin6_addr.s6_addr32[i] = + htonl(-1 << (32-net)); + break; + } + sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF; + net -= 32; + } + } +} + +int +pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) +{ + union sockaddr_union mask; + struct radix_node *rn; + struct radix_node_head *head; + int s; + + bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); + if (ke->pfrke_af == AF_INET) + head = kt->pfrkt_ip4; + else + head = kt->pfrkt_ip6; + + s = splsoftnet(); + if (KENTRY_NETWORK(ke)) { + pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); + rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); + } else + rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); + splx(s); + + return (rn == NULL ? -1 : 0); +} + +int +pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) +{ + union sockaddr_union mask; + struct radix_node *rn; + struct radix_node_head *head; + int s; + + if (ke->pfrke_af == AF_INET) + head = kt->pfrkt_ip4; + else + head = kt->pfrkt_ip6; + + s = splsoftnet(); + if (KENTRY_NETWORK(ke)) { + pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); + rn = rn_delete(&ke->pfrke_sa, &mask, head); + } else + rn = rn_delete(&ke->pfrke_sa, NULL, head); + splx(s); + + if (rn == NULL) { + printf("pfr_unroute_kentry: delete failed.\n"); + return (-1); + } + return (0); +} + +void +pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) +{ + bzero(ad, sizeof(*ad)); + if (ke == NULL) + return; + ad->pfra_af = ke->pfrke_af; + ad->pfra_net = ke->pfrke_net; + ad->pfra_not = ke->pfrke_not; + if (ad->pfra_af == AF_INET) + ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr; + else + ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; +} + +int +pfr_walktree(struct radix_node *rn, void *arg) +{ + struct pfr_kentry *ke = (struct pfr_kentry *)rn; + struct pfr_walktree *w = arg; + int s; + + switch (w->pfrw_op) { + case PFRW_MARK: + ke->pfrke_mark = 0; + break; + case PFRW_SWEEP: + if (ke->pfrke_mark) + break; + /* fall trough */ + case PFRW_ENQUEUE: + SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq); + w->pfrw_cnt++; + break; + case PFRW_GET_ADDRS: + if (w->pfrw_free-- > 0) { + struct pfr_addr ad; + + pfr_copyout_addr(&ad, ke); + if (copyout(&ad, w->pfrw_addr, sizeof(ad))) + return (EFAULT); + w->pfrw_addr++; + } + break; + case PFRW_GET_ASTATS: + if (w->pfrw_free-- > 0) { + struct pfr_astats as; + + pfr_copyout_addr(&as.pfras_a, ke); + + s = splsoftnet(); + bcopy(ke->pfrke_packets, as.pfras_packets, + sizeof(as.pfras_packets)); + bcopy(ke->pfrke_bytes, as.pfras_bytes, + sizeof(as.pfras_bytes)); + splx(s); + as.pfras_tzero = ke->pfrke_tzero; + + if (copyout(&as, w->pfrw_astats, sizeof(as))) + return (EFAULT); + w->pfrw_astats++; + } + break; + case PFRW_POOL_GET: + if (ke->pfrke_not) + break; /* negative entries are ignored */ + if (!w->pfrw_cnt--) { + w->pfrw_kentry = ke; + return (1); /* finish search */ + } + break; + } + return (0); +} + +int +pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + int s, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); + if (pfr_table_count(filter, flags) < 0) + return (ENOENT); + + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_setflags_ktables(&workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) +{ + struct pfr_ktableworkq addq, changeq; + struct pfr_ktable *p, *q, *r, key; + int i, rv, s, xadd = 0; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + SLIST_INIT(&addq); + SLIST_INIT(&changeq); + for (i = 0; i < size; i++) { + if (copyin(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + senderr(EFAULT); + if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK)) + senderr(EINVAL); + key.pfrkt_flags |= PFR_TFLAG_ACTIVE; + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p == NULL) { + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); + if (p == NULL) + senderr(ENOMEM); + SLIST_FOREACH(q, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(p, q)) + goto _skip; + } + SLIST_INSERT_HEAD(&addq, p, pfrkt_workq); + xadd++; + if (!key.pfrkt_anchor[0]) + goto _skip; + + /* find or create root table */ + bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); + bzero(key.pfrkt_ruleset, sizeof(key.pfrkt_ruleset)); + r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (r != NULL) { + p->pfrkt_root = r; + goto _skip; + } + SLIST_FOREACH(q, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(&key, q)) { + p->pfrkt_root = q; + goto _skip; + } + } + key.pfrkt_flags = 0; + r = pfr_create_ktable(&key.pfrkt_t, 0, 1); + if (r == NULL) + senderr(ENOMEM); + SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); + p->pfrkt_root = r; + } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + SLIST_FOREACH(q, &changeq, pfrkt_workq) + if (!pfr_ktable_compare(&key, q)) + goto _skip; + p->pfrkt_nflags = (p->pfrkt_flags & + ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; + SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); + xadd++; + } +_skip: + ; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_insert_ktables(&addq); + pfr_setflags_ktables(&changeq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } else + pfr_destroy_ktables(&addq, 0); + if (nadd != NULL) + *nadd = xadd; + return (0); +_bad: + pfr_destroy_ktables(&addq, 0); + return (rv); +} + +int +pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, *q, key; + int i, s, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (copyin(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + SLIST_FOREACH(q, &workq, pfrkt_workq) + if (!pfr_ktable_compare(p, q)) + goto _skip; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } +_skip: + ; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_setflags_ktables(&workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, + int flags) +{ + struct pfr_ktable *p; + int n, nn; + + ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); + n = nn = pfr_table_count(filter, flags); + if (n < 0) + return (ENOENT); + if (n > *size) { + *size = n; + return (0); + } + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (n-- <= 0) + continue; + if (copyout(&p->pfrkt_t, tbl++, sizeof(*tbl))) + return (EFAULT); + } + if (n) { + printf("pfr_get_tables: corruption detected (%d).\n", n); + return (ENOTTY); + } + *size = nn; + return (0); +} + +int +pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, + int flags) +{ + struct pfr_ktable *p; + struct pfr_ktableworkq workq; + int s, n, nn; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); + /* XXX PFR_FLAG_CLSTATS disabled */ + n = nn = pfr_table_count(filter, flags); + if (n < 0) + return (ENOENT); + if (n > *size) { + *size = n; + return (0); + } + SLIST_INIT(&workq); + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (n-- <= 0) + continue; + if (!(flags & PFR_FLAG_ATOMIC)) + s = splsoftnet(); + if (copyout(&p->pfrkt_ts, tbl++, sizeof(*tbl))) { + splx(s); + return (EFAULT); + } + if (!(flags & PFR_FLAG_ATOMIC)) + splx(s); + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + } + if (flags & PFR_FLAG_CLSTATS) + pfr_clstats_ktables(&workq, tzero, + flags & PFR_FLAG_ADDRSTOO); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + if (n) { + printf("pfr_get_tstats: corruption detected (%d).\n", n); + return (ENOTTY); + } + *size = nn; + return (0); +} + +int +pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, key; + int i, s, xzero = 0; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (copyin(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL) { + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xzero++; + } + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (nzero != NULL) + *nzero = xzero; + return (0); +} + +int +pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, + int *nchange, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, *q, key; + int i, s, xchange = 0, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + if ((setflag & ~PFR_TFLAG_USRMASK) || + (clrflag & ~PFR_TFLAG_USRMASK) || + (setflag & clrflag)) + return (EINVAL); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (copyin(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + p->pfrkt_nflags = (p->pfrkt_flags | setflag) & + ~clrflag; + if (p->pfrkt_nflags == p->pfrkt_flags) + goto _skip; + SLIST_FOREACH(q, &workq, pfrkt_workq) + if (!pfr_ktable_compare(p, q)) + goto _skip; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) && + (clrflag & PFR_TFLAG_PERSIST) && + !(p->pfrkt_flags & PFR_TFLAG_REFERENCED)) + xdel++; + else + xchange++; + } +_skip: + ; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_setflags_ktables(&workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (nchange != NULL) + *nchange = xchange; + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + struct pf_ruleset *rs; + int xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_DUMMY); + rs = pf_find_or_create_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + if (rs == NULL) + return (ENOMEM); + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + if (ticket != NULL) + *ticket = ++rs->tticket; + rs->topen = 1; + } else + pf_remove_if_empty_ruleset(rs); + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nadd, int *naddr, u_int32_t ticket, int flags) +{ + struct pfr_ktableworkq tableq; + struct pfr_kentryworkq addrq; + struct pfr_ktable *kt, *rt, *shadow, key; + struct pfr_kentry *p; + struct pfr_addr ad; + struct pf_ruleset *rs; + int i, rv, xadd = 0, xaddr = 0; + + ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO); + if (size && !(flags & PFR_FLAG_ADDRSTOO)) + return (EINVAL); + if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK)) + return (EINVAL); + rs = pf_find_ruleset(tbl->pfrt_anchor, tbl->pfrt_ruleset); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (EBUSY); + tbl->pfrt_flags |= PFR_TFLAG_INACTIVE; + SLIST_INIT(&tableq); + kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); + if (kt == NULL) { + kt = pfr_create_ktable(tbl, 0, 1); + if (kt == NULL) + return (ENOMEM); + SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); + xadd++; + if (!tbl->pfrt_anchor[0]) + goto _skip; + + /* find or create root table */ + bzero(&key, sizeof(key)); + strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name)); + rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (rt != NULL) { + kt->pfrkt_root = rt; + goto _skip; + } + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); + if (rt == NULL) { + pfr_destroy_ktables(&tableq, 0); + return (ENOMEM); + } + SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq); + kt->pfrkt_root = rt; + } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) + xadd++; +_skip: + shadow = pfr_create_ktable(tbl, 0, 0); + if (shadow == NULL) { + pfr_destroy_ktables(&tableq, 0); + return (ENOMEM); + } + SLIST_INIT(&addrq); + for (i = 0; i < size; i++) { + if (copyin(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + if (pfr_lookup_addr(shadow, &ad, 1) != NULL) + continue; + p = pfr_create_kentry(&ad); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(shadow, p)) { + pfr_destroy_kentry(p); + continue; + } + SLIST_INSERT_HEAD(&addrq, p, pfrke_workq); + xaddr++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (kt->pfrkt_shadow != NULL) + pfr_destroy_ktable(kt->pfrkt_shadow, 1); + kt->pfrkt_flags |= PFR_TFLAG_INACTIVE; + pfr_insert_ktables(&tableq); + shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ? + xaddr : NO_ADDRESSES; + kt->pfrkt_shadow = shadow; + } else { + pfr_clean_node_mask(shadow, &addrq); + pfr_destroy_ktable(shadow, 0); + pfr_destroy_ktables(&tableq, 0); + pfr_destroy_kentries(&addrq); + } + if (nadd != NULL) + *nadd = xadd; + if (naddr != NULL) + *naddr = xaddr; + return (0); +_bad: + pfr_destroy_ktable(shadow, 0); + pfr_destroy_ktables(&tableq, 0); + pfr_destroy_kentries(&addrq); + return (rv); +} + +int +pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, + int *nchange, int flags) +{ + struct pfr_ktable *p; + struct pfr_ktableworkq workq; + struct pf_ruleset *rs; + int s, xadd = 0, xchange = 0; + long tzero = time.tv_sec; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (EBUSY); + + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + if (p->pfrkt_flags & PFR_TFLAG_ACTIVE) + xchange++; + else + xadd++; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + SLIST_FOREACH(p, &workq, pfrkt_workq) + pfr_commit_ktable(p, tzero); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + rs->topen = 0; + pf_remove_if_empty_ruleset(rs); + } + if (nadd != NULL) + *nadd = xadd; + if (nchange != NULL) + *nchange = xchange; + + return (0); +} + +void +pfr_commit_ktable(struct pfr_ktable *kt, long tzero) +{ + struct pfr_ktable *shadow = kt->pfrkt_shadow; + int nflags; + + if (shadow->pfrkt_cnt == NO_ADDRESSES) { + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + pfr_clstats_ktable(kt, tzero, 1); + } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) { + /* kt might contain addresses */ + struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq; + struct pfr_kentry *p, *q, *next; + struct pfr_addr ad; + + pfr_enqueue_addrs(shadow, &addrq, NULL, 0); + pfr_mark_addrs(kt); + SLIST_INIT(&addq); + SLIST_INIT(&changeq); + SLIST_INIT(&delq); + SLIST_INIT(&garbageq); + pfr_clean_node_mask(shadow, &addrq); + for (p = SLIST_FIRST(&addrq); p != NULL; p = next) { + next = SLIST_NEXT(p, pfrke_workq); /* XXX */ + pfr_copyout_addr(&ad, p); + q = pfr_lookup_addr(kt, &ad, 1); + if (q != NULL) { + if (q->pfrke_not != p->pfrke_not) + SLIST_INSERT_HEAD(&changeq, q, + pfrke_workq); + q->pfrke_mark = 1; + SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq); + } else { + p->pfrke_tzero = tzero; + SLIST_INSERT_HEAD(&addq, p, pfrke_workq); + } + } + pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY); + pfr_insert_kentries(kt, &addq, tzero); + pfr_remove_kentries(kt, &delq); + pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); + pfr_destroy_kentries(&garbageq); + } else { + /* kt cannot contain addresses */ + SWAP(struct radix_node_head *, kt->pfrkt_ip4, + shadow->pfrkt_ip4); + SWAP(struct radix_node_head *, kt->pfrkt_ip6, + shadow->pfrkt_ip6); + SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt); + pfr_clstats_ktable(kt, tzero, 1); + } + nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) | + (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE) + & ~PFR_TFLAG_INACTIVE; + pfr_destroy_ktable(shadow, 0); + kt->pfrkt_shadow = NULL; + pfr_setflags_ktable(kt, nflags); +} + +int +pfr_validate_table(struct pfr_table *tbl, int allowedflags) +{ + int i; + + if (!tbl->pfrt_name[0]) + return (-1); + if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1]) + return (-1); + for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++) + if (tbl->pfrt_name[i]) + return (-1); + if (tbl->pfrt_flags & ~allowedflags) + return (-1); + return (0); +} + +int +pfr_table_count(struct pfr_table *filter, int flags) +{ + struct pf_ruleset *rs; + struct pf_anchor *ac; + + if (flags & PFR_FLAG_ALLRSETS) + return (pfr_ktable_cnt); + if (filter->pfrt_ruleset[0]) { + rs = pf_find_ruleset(filter->pfrt_anchor, + filter->pfrt_ruleset); + return ((rs != NULL) ? rs->tables : -1); + } + if (filter->pfrt_anchor[0]) { + ac = pf_find_anchor(filter->pfrt_anchor); + return ((ac != NULL) ? ac->tables : -1); + } + return (pf_main_ruleset.tables); +} + +int +pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) +{ + if (flags & PFR_FLAG_ALLRSETS) + return (0); + if (strncmp(filter->pfrt_anchor, kt->pfrkt_anchor, + PF_ANCHOR_NAME_SIZE)) + return (1); + if (!filter->pfrt_ruleset[0]) + return (0); + if (strncmp(filter->pfrt_ruleset, kt->pfrkt_ruleset, + PF_RULESET_NAME_SIZE)) + return (1); + return (0); +} + +void +pfr_insert_ktables(struct pfr_ktableworkq *workq) +{ + struct pfr_ktable *p; + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_insert_ktable(p); +} + +void +pfr_insert_ktable(struct pfr_ktable *kt) +{ + RB_INSERT(pfr_ktablehead, &pfr_ktables, kt); + pfr_ktable_cnt++; + if (kt->pfrkt_root != NULL) + if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++) + pfr_setflags_ktable(kt->pfrkt_root, + kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR); +} + +void +pfr_setflags_ktables(struct pfr_ktableworkq *workq) +{ + struct pfr_ktable *p; + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_setflags_ktable(p, p->pfrkt_nflags); +} + +void +pfr_setflags_ktable(struct pfr_ktable *kt, int newf) +{ + struct pfr_kentryworkq addrq; + + if (!(newf & PFR_TFLAG_REFERENCED) && + !(newf & PFR_TFLAG_PERSIST)) + newf &= ~PFR_TFLAG_ACTIVE; + if (!(newf & PFR_TFLAG_ACTIVE)) + newf &= ~PFR_TFLAG_USRMASK; + if (!(newf & PFR_TFLAG_SETMASK)) { + RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt); + if (kt->pfrkt_root != NULL) + if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]) + pfr_setflags_ktable(kt->pfrkt_root, + kt->pfrkt_root->pfrkt_flags & + ~PFR_TFLAG_REFDANCHOR); + pfr_destroy_ktable(kt, 1); + pfr_ktable_cnt--; + return; + } + if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_remove_kentries(kt, &addrq); + } + if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) { + pfr_destroy_ktable(kt->pfrkt_shadow, 1); + kt->pfrkt_shadow = NULL; + } + kt->pfrkt_flags = newf; +} + +void +pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) +{ + struct pfr_ktable *p; + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_clstats_ktable(p, tzero, recurse); +} + +void +pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) +{ + struct pfr_kentryworkq addrq; + int s; + + if (recurse) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_clstats_kentries(&addrq, tzero, 0); + } + s = splsoftnet(); + bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); + bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); + kt->pfrkt_match = kt->pfrkt_nomatch = 0; + splx(s); + kt->pfrkt_tzero = tzero; +} + +struct pfr_ktable * +pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) +{ + struct pfr_ktable *kt; + struct pf_ruleset *rs; + + kt = pool_get(&pfr_ktable_pl, PR_NOWAIT); + if (kt == NULL) + return (NULL); + bzero(kt, sizeof(*kt)); + kt->pfrkt_t = *tbl; + + if (attachruleset) { + rs = pf_find_or_create_ruleset(tbl->pfrt_anchor, + tbl->pfrt_ruleset); + if (!rs) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_rs = rs; + rs->tables++; + if (rs->anchor != NULL) + rs->anchor->tables++; + } + + if (!rn_inithead((void **)&kt->pfrkt_ip4, + offsetof(struct sockaddr_in, sin_addr) * 8) || + !rn_inithead((void **)&kt->pfrkt_ip6, + offsetof(struct sockaddr_in6, sin6_addr) * 8)) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_tzero = tzero; + + return (kt); +} + +void +pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) +{ + struct pfr_ktable *p, *q; + + for (p = SLIST_FIRST(workq); p; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); + pfr_destroy_ktable(p, flushaddr); + } +} + +void +pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) +{ + struct pfr_kentryworkq addrq; + + if (flushaddr) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_clean_node_mask(kt, &addrq); + pfr_destroy_kentries(&addrq); + } + if (kt->pfrkt_ip4 != NULL) + free((caddr_t)kt->pfrkt_ip4, M_RTABLE); + if (kt->pfrkt_ip6 != NULL) + free((caddr_t)kt->pfrkt_ip6, M_RTABLE); + if (kt->pfrkt_shadow != NULL) + pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); + if (kt->pfrkt_rs != NULL) { + kt->pfrkt_rs->tables--; + if (kt->pfrkt_rs->anchor != NULL) + kt->pfrkt_rs->anchor->tables--; + pf_remove_if_empty_ruleset(kt->pfrkt_rs); + } + pool_put(&pfr_ktable_pl, kt); +} + +int +pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) +{ + int d; + + if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE))) + return (d); + if ((d = strncmp(p->pfrkt_anchor, q->pfrkt_anchor, + PF_ANCHOR_NAME_SIZE))) + return (d); + return strncmp(p->pfrkt_ruleset, q->pfrkt_ruleset, + PF_RULESET_NAME_SIZE); +} + +struct pfr_ktable * +pfr_lookup_table(struct pfr_table *tbl) +{ + /* struct pfr_ktable start like a struct pfr_table */ + return RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); +} + +int +pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +{ + struct pfr_kentry *ke = NULL; + int match; + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return 0; + + switch (af) { + case AF_INET: + pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + case AF_INET6: + bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + } + match = (ke && !ke->pfrke_not); + if (match) + kt->pfrkt_match++; + else + kt->pfrkt_nomatch++; + return (match); +} + +void +pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, + u_int64_t len, int dir_out, int op_pass, int notrule) +{ + struct pfr_kentry *ke = NULL; + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return; + + switch (af) { + case AF_INET: + pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + case AF_INET6: + bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + } + if ((ke == NULL || ke->pfrke_not) != notrule) { + if (op_pass != PFR_OP_PASS) + printf("pfr_update_stats: assertion failed.\n"); + op_pass = PFR_OP_XPASS; + } + kt->pfrkt_packets[dir_out][op_pass]++; + kt->pfrkt_bytes[dir_out][op_pass] += len; + if (ke != NULL && op_pass != PFR_OP_XPASS) { + ke->pfrke_packets[dir_out][op_pass]++; + ke->pfrke_bytes[dir_out][op_pass] += len; + } +} + +struct pfr_ktable * +pfr_attach_table(struct pf_ruleset *rs, char *name) +{ + struct pfr_ktable *kt, *rt; + struct pfr_table tbl; + struct pf_anchor *ac = rs->anchor; + + bzero(&tbl, sizeof(tbl)); + strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); + if (ac != NULL) { + strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); + strlcpy(tbl.pfrt_ruleset, rs->name, sizeof(tbl.pfrt_ruleset)); + } + kt = pfr_lookup_table(&tbl); + if (kt == NULL) { + kt = pfr_create_ktable(&tbl, time.tv_sec, 1); + if (kt == NULL) + return (NULL); + if (ac != NULL) { + bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); + bzero(tbl.pfrt_ruleset, sizeof(tbl.pfrt_ruleset)); + rt = pfr_lookup_table(&tbl); + if (rt == NULL) { + rt = pfr_create_ktable(&tbl, 0, 1); + if (rt == NULL) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + pfr_insert_ktable(rt); + } + kt->pfrkt_root = rt; + } + pfr_insert_ktable(kt); + } + if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++) + pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED); + return kt; +} + +void +pfr_detach_table(struct pfr_ktable *kt) +{ + if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0) + printf("pfr_detach_table: refcount = %d.\n", + kt->pfrkt_refcnt[PFR_REFCNT_RULE]); + else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) + pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); +} + +int +pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, + struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) +{ + struct pfr_kentry *ke, *ke2; + struct pf_addr *addr; + union sockaddr_union mask; + int idx = -1, use_counter = 0; + + addr = (af == AF_INET) ? (struct pf_addr *)&pfr_sin.sin_addr : + (struct pf_addr *)&pfr_sin6.sin6_addr; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (-1); + + if (pidx != NULL) + idx = *pidx; + if (counter != NULL && idx >= 0) + use_counter = 1; + if (idx < 0) + idx = 0; + +_next_block: + ke = pfr_kentry_byidx(kt, idx, af); + if (ke == NULL) + return (1); + pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); + *raddr = SUNION2PF(&ke->pfrke_sa, af); + *rmask = SUNION2PF(&pfr_mask, af); + + if (use_counter) { + /* is supplied address within block? */ + if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) { + /* no, go to next block in table */ + idx++; + use_counter = 0; + goto _next_block; + } + PF_ACPY(addr, counter, af); + } else { + /* use first address of block */ + PF_ACPY(addr, *raddr, af); + } + + if (!KENTRY_NETWORK(ke)) { + /* this is a single IP address - no possible nested block */ + PF_ACPY(counter, addr, af); + *pidx = idx; + return (0); + } + for (;;) { + /* we don't want to use a nested block */ + ke2 = (struct pfr_kentry *)(af == AF_INET ? + rn_match(&pfr_sin, kt->pfrkt_ip4) : + rn_match(&pfr_sin6, kt->pfrkt_ip6)); + /* no need to check KENTRY_RNF_ROOT() here */ + if (ke2 == ke) { + /* lookup return the same block - perfect */ + PF_ACPY(counter, addr, af); + *pidx = idx; + return (0); + } + + /* we need to increase the counter past the nested block */ + pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); + PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); + PF_AINC(addr, af); + if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { + /* ok, we reached the end of our main block */ + /* go to next block in table */ + idx++; + use_counter = 0; + goto _next_block; + } + } +} + +struct pfr_kentry * +pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) +{ + struct pfr_walktree w; + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_POOL_GET; + w.pfrw_cnt = idx; + + switch(af) { + case AF_INET: + rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + return w.pfrw_kentry; + case AF_INET6: + rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + return w.pfrw_kentry; + default: + return NULL; + } +} + diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h new file mode 100644 index 000000000000..058e425c300e --- /dev/null +++ b/sys/contrib/pf/net/pfvar.h @@ -0,0 +1,1264 @@ +/* $OpenBSD: pfvar.h,v 1.170 2003/08/22 21:50:34 david Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _NET_PFVAR_H_ +#define _NET_PFVAR_H_ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/tree.h> + +#include <net/radix.h> +#include <netinet/ip_ipsp.h> +#include <netinet/tcp_fsm.h> + +struct ip; + +#define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) +#define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) + +enum { PF_INOUT, PF_IN, PF_OUT }; +enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NAT, PF_NONAT, + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; +enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, + PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; +enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, + PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG }; +enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; +enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, + PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, + PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +/* + * Note about PFTM_*: real indices into pf_rule.timeout[] come before + * PFTM_MAX, special cases afterwards. See pf_state_expires(). + */ +enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, + PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED, + PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE, + PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY, + PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, + PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, + PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_MAX, + PFTM_PURGE, PFTM_UNTIL_PACKET }; +enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; +enum { PF_LIMIT_STATES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; +#define PF_POOL_IDMASK 0x0f +enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, + PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; +enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, + PF_ADDR_TABLE }; +#define PF_POOL_TYPEMASK 0x0f +#define PF_WSCALE_FLAG 0x80 +#define PF_WSCALE_MASK 0x0f + +struct pf_addr { + union { + struct in_addr v4; + struct in6_addr v6; + u_int8_t addr8[16]; + u_int16_t addr16[8]; + u_int32_t addr32[4]; + } pfa; /* 128-bit address */ +#define v4 pfa.v4 +#define v6 pfa.v6 +#define addr8 pfa.addr8 +#define addr16 pfa.addr16 +#define addr32 pfa.addr32 +}; + +#define PF_TABLE_NAME_SIZE 32 + +struct pf_addr_wrap { + union { + struct { + struct pf_addr addr; + struct pf_addr mask; + } a; + char ifname[IFNAMSIZ]; + char tblname[PF_TABLE_NAME_SIZE]; + } v; + union { + struct pf_addr_dyn *dyn; + struct pfr_ktable *tbl; + int tblcnt; + } p; + u_int8_t type; /* PF_ADDR_* */ +}; + +struct pf_addr_dyn { + char ifname[IFNAMSIZ]; + struct ifnet *ifp; + struct pf_addr *addr; + sa_family_t af; + void *hook_cookie; + u_int8_t undefined; +}; + +/* + * Address manipulation macros + */ + +#ifdef _KERNEL + +#ifdef INET +#ifndef INET6 +#define PF_INET_ONLY +#endif /* ! INET6 */ +#endif /* INET */ + +#ifdef INET6 +#ifndef INET +#define PF_INET6_ONLY +#endif /* ! INET */ +#endif /* INET6 */ + +#ifdef INET +#ifdef INET6 +#define PF_INET_INET6 +#endif /* INET6 */ +#endif /* INET */ + +#else + +#define PF_INET_INET6 + +#endif /* _KERNEL */ + +/* Both IPv4 and IPv6 */ +#ifdef PF_INET_INET6 + +#define PF_AEQ(a, b, c) \ + ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ + ((a)->addr32[3] == (b)->addr32[3] && \ + (a)->addr32[2] == (b)->addr32[2] && \ + (a)->addr32[1] == (b)->addr32[1] && \ + (a)->addr32[0] == (b)->addr32[0])) \ + +#define PF_ANEQ(a, b, c) \ + ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ + ((a)->addr32[3] != (b)->addr32[3] || \ + (a)->addr32[2] != (b)->addr32[2] || \ + (a)->addr32[1] != (b)->addr32[1] || \ + (a)->addr32[0] != (b)->addr32[0])) \ + +#define PF_AZERO(a, c) \ + ((c == AF_INET && !(a)->addr32[0]) || \ + (!(a)->addr32[0] && !(a)->addr32[1] && \ + !(a)->addr32[2] && !(a)->addr32[3] )) \ + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + pf_addrcpy(a, b, f) + +#define PF_AINC(a, f) \ + pf_addr_inc(a, f) + +#define PF_POOLMASK(a, b, c, d, f) \ + pf_poolmask(a, b, c, d, f) + +#else + +/* Just IPv6 */ + +#ifdef PF_INET6_ONLY + +#define PF_AEQ(a, b, c) \ + ((a)->addr32[3] == (b)->addr32[3] && \ + (a)->addr32[2] == (b)->addr32[2] && \ + (a)->addr32[1] == (b)->addr32[1] && \ + (a)->addr32[0] == (b)->addr32[0]) \ + +#define PF_ANEQ(a, b, c) \ + ((a)->addr32[3] != (b)->addr32[3] || \ + (a)->addr32[2] != (b)->addr32[2] || \ + (a)->addr32[1] != (b)->addr32[1] || \ + (a)->addr32[0] != (b)->addr32[0]) \ + +#define PF_AZERO(a, c) \ + (!(a)->addr32[0] && \ + !(a)->addr32[1] && \ + !(a)->addr32[2] && \ + !(a)->addr32[3] ) \ + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + pf_addrcpy(a, b, f) + +#define PF_AINC(a, f) \ + pf_addr_inc(a, f) + +#define PF_POOLMASK(a, b, c, d, f) \ + pf_poolmask(a, b, c, d, f) + +#else + +/* Just IPv4 */ +#ifdef PF_INET_ONLY + +#define PF_AEQ(a, b, c) \ + ((a)->addr32[0] == (b)->addr32[0]) + +#define PF_ANEQ(a, b, c) \ + ((a)->addr32[0] != (b)->addr32[0]) + +#define PF_AZERO(a, c) \ + (!(a)->addr32[0]) + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + (a)->v4.s_addr = (b)->v4.s_addr + +#define PF_AINC(a, f) \ + do { \ + (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ + } while (0) + +#define PF_POOLMASK(a, b, c, d, f) \ + do { \ + (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ + (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ + } while (0) + +#endif /* PF_INET_ONLY */ +#endif /* PF_INET6_ONLY */ +#endif /* PF_INET_INET6 */ + +#define PF_MISMATCHAW(aw, x, af, not) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + ((aw)->p.dyn->undefined || \ + (!PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af)))) != \ + (not) \ + ) + +struct pf_rule_uid { + uid_t uid[2]; + u_int8_t op; +}; + +struct pf_rule_gid { + uid_t gid[2]; + u_int8_t op; +}; + +struct pf_rule_addr { + struct pf_addr_wrap addr; + u_int16_t port[2]; + u_int8_t not; + u_int8_t port_op; +}; + +struct pf_pooladdr { + struct pf_addr_wrap addr; + TAILQ_ENTRY(pf_pooladdr) entries; + char ifname[IFNAMSIZ]; + struct ifnet *ifp; +}; + +TAILQ_HEAD(pf_palist, pf_pooladdr); + +struct pf_poolhashkey { + union { + u_int8_t key8[16]; + u_int16_t key16[8]; + u_int32_t key32[4]; + } pfk; /* 128-bit hash key */ +#define key8 pfk.key8 +#define key16 pfk.key16 +#define key32 pfk.key32 +}; + +struct pf_pool { + struct pf_palist list; + struct pf_pooladdr *cur; + struct pf_poolhashkey key; + struct pf_addr counter; + int tblidx; + u_int16_t proxy_port[2]; + u_int8_t port_op; + u_int8_t opts; +}; + + +/* A packed Operating System description for fingerprinting */ +typedef u_int32_t pf_osfp_t; +#define PF_OSFP_ANY ((pf_osfp_t)0) +#define PF_OSFP_UNKNOWN ((pf_osfp_t)-1) +#define PF_OSFP_NOMATCH ((pf_osfp_t)-2) + +struct pf_osfp_entry { + SLIST_ENTRY(pf_osfp_entry) fp_entry; + pf_osfp_t fp_os; + int fp_enflags; +#define PF_OSFP_EXPANDED 0x001 /* expanded entry */ +#define PF_OSFP_GENERIC 0x002 /* generic signature */ +#define PF_OSFP_NODETAIL 0x004 /* no p0f details */ +#define PF_OSFP_LEN 32 + char fp_class_nm[PF_OSFP_LEN]; + char fp_version_nm[PF_OSFP_LEN]; + char fp_subtype_nm[PF_OSFP_LEN]; +}; +#define PF_OSFP_ENTRY_EQ(a, b) \ + ((a)->fp_os == (b)->fp_os && \ + memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \ + memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \ + memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0) + +/* handle pf_osfp_t packing */ +#define _FP_RESERVED_BIT 1 /* For the special negative #defines */ +#define _FP_UNUSED_BITS 1 +#define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */ +#define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */ +#define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */ +#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \ + (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \ + ((1 << _FP_CLASS_BITS) - 1); \ + (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \ + ((1 << _FP_VERSION_BITS) - 1);\ + (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \ +} while(0) +#define PF_OSFP_PACK(osfp, class, version, subtype) do { \ + (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \ + + _FP_SUBTYPE_BITS); \ + (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \ + _FP_SUBTYPE_BITS; \ + (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \ +} while(0) + +/* the fingerprint of an OSes TCP SYN packet */ +typedef u_int64_t pf_tcpopts_t; +struct pf_os_fingerprint { + SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */ + pf_tcpopts_t fp_tcpopts; /* packed TCP options */ + u_int16_t fp_wsize; /* TCP window size */ + u_int16_t fp_psize; /* ip->ip_len */ + u_int16_t fp_mss; /* TCP MSS */ + u_int16_t fp_flags; +#define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */ +#define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */ +#define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */ +#define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */ +#define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */ +#define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */ +#define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */ +#define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */ +#define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */ +#define PF_OSFP_MSS 0x0200 /* TCP MSS */ +#define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */ +#define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ +#define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ +#define PF_OSFP_TS0 0x2000 /* Zero timestamp */ + u_int8_t fp_optcnt; /* TCP option count */ + u_int8_t fp_wscale; /* TCP window scaling */ + u_int8_t fp_ttl; /* IPv4 TTL */ +#define PF_OSFP_MAXTTL_OFFSET 40 +/* TCP options packing */ +#define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */ +#define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */ +#define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */ +#define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */ +#define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */ +#define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */ +#define PF_OSFP_MAX_OPTS \ + (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \ + / PF_OSFP_TCPOPT_BITS + + SLIST_ENTRY(pf_os_fingerprint) fp_next; +}; + +struct pf_osfp_ioctl { + struct pf_osfp_entry fp_os; + pf_tcpopts_t fp_tcpopts; /* packed TCP options */ + u_int16_t fp_wsize; /* TCP window size */ + u_int16_t fp_psize; /* ip->ip_len */ + u_int16_t fp_mss; /* TCP MSS */ + u_int16_t fp_flags; + u_int8_t fp_optcnt; /* TCP option count */ + u_int8_t fp_wscale; /* TCP window scaling */ + u_int8_t fp_ttl; /* IPv4 TTL */ + + int fp_getnum; /* DIOCOSFPGET number */ +}; + + +union pf_rule_ptr { + struct pf_rule *ptr; + u_int32_t nr; +}; + +struct pf_rule { + struct pf_rule_addr src; + struct pf_rule_addr dst; +#define PF_SKIP_IFP 0 +#define PF_SKIP_DIR 1 +#define PF_SKIP_AF 2 +#define PF_SKIP_PROTO 3 +#define PF_SKIP_SRC_ADDR 4 +#define PF_SKIP_SRC_PORT 5 +#define PF_SKIP_DST_ADDR 6 +#define PF_SKIP_DST_PORT 7 +#define PF_SKIP_COUNT 8 + union pf_rule_ptr skip[PF_SKIP_COUNT]; +#define PF_RULE_LABEL_SIZE 64 + char label[PF_RULE_LABEL_SIZE]; + u_int32_t timeout[PFTM_MAX]; +#define PF_QNAME_SIZE 16 + char ifname[IFNAMSIZ]; + char qname[PF_QNAME_SIZE]; + char pqname[PF_QNAME_SIZE]; +#define PF_ANCHOR_NAME_SIZE 16 + char anchorname[PF_ANCHOR_NAME_SIZE]; +#define PF_TAG_NAME_SIZE 16 + char tagname[PF_TAG_NAME_SIZE]; + char match_tagname[PF_TAG_NAME_SIZE]; + + TAILQ_ENTRY(pf_rule) entries; + struct pf_pool rpool; + + u_int64_t evaluations; + u_int64_t packets; + u_int64_t bytes; + + struct ifnet *ifp; + struct pf_anchor *anchor; + + pf_osfp_t os_fingerprint; + u_int32_t states; + u_int32_t max_states; + u_int32_t qid; + u_int32_t pqid; + u_int32_t rt_listid; + u_int32_t nr; + + u_int16_t return_icmp; + u_int16_t return_icmp6; + u_int16_t max_mss; + u_int16_t tag; + u_int16_t match_tag; + + struct pf_rule_uid uid; + struct pf_rule_gid gid; + + u_int32_t rule_flag; + u_int8_t action; + u_int8_t direction; + u_int8_t log; + u_int8_t quick; + u_int8_t ifnot; + u_int8_t match_tag_not; + u_int8_t natpass; + +#define PF_STATE_NORMAL 0x1 +#define PF_STATE_MODULATE 0x2 +#define PF_STATE_SYNPROXY 0x3 + u_int8_t keep_state; + sa_family_t af; + u_int8_t proto; + u_int8_t type; + u_int8_t code; + u_int8_t flags; + u_int8_t flagset; + u_int8_t min_ttl; + u_int8_t allow_opts; + u_int8_t rt; + u_int8_t return_ttl; + u_int8_t tos; +}; + +/* rule flags */ +#define PFRULE_DROP 0x0000 +#define PFRULE_RETURNRST 0x0001 +#define PFRULE_FRAGMENT 0x0002 +#define PFRULE_RETURNICMP 0x0004 +#define PFRULE_RETURN 0x0008 + +/* scrub flags */ +#define PFRULE_NODF 0x0100 +#define PFRULE_FRAGCROP 0x0200 /* non-buffering frag cache */ +#define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ +#define PFRULE_RANDOMID 0x0800 +#define PFRULE_REASSEMBLE_TCP 0x1000 + +#define PFSTATE_HIWAT 10000 /* default state table size */ + + +struct pf_state_scrub { + u_int16_t pfss_flags; +#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ + u_int8_t pfss_ttl; /* stashed TTL */ + u_int8_t pad; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +}; + +struct pf_state_host { + struct pf_addr addr; + u_int16_t port; + u_int16_t pad; +}; + +struct pf_state_peer { + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int16_t mss; /* Maximum segment size option */ + struct pf_state_scrub *scrub; /* state is scrubbed */ +}; + +struct pf_state { + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + struct pf_state_peer src; + struct pf_state_peer dst; + union pf_rule_ptr rule; + union pf_rule_ptr anchor; + union pf_rule_ptr nat_rule; + struct pf_addr rt_addr; + struct ifnet *rt_ifp; + u_int32_t creation; + u_int32_t expire; + u_int32_t packets[2]; + u_int32_t bytes[2]; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t pad[2]; +}; + +struct pf_tree_node { + RB_ENTRY(pf_tree_node) entry; + struct pf_state *state; + struct pf_addr addr[2]; + u_int16_t port[2]; + sa_family_t af; + u_int8_t proto; +}; + +TAILQ_HEAD(pf_rulequeue, pf_rule); + +struct pf_anchor; + +struct pf_ruleset { + TAILQ_ENTRY(pf_ruleset) entries; +#define PF_RULESET_NAME_SIZE 16 + char name[PF_RULESET_NAME_SIZE]; + struct { + struct pf_rulequeue queues[2]; + struct { + struct pf_rulequeue *ptr; + u_int32_t ticket; + } active, inactive; + } rules[PF_RULESET_MAX]; + struct pf_anchor *anchor; + u_int32_t tticket; + int tables; + int topen; +}; + +TAILQ_HEAD(pf_rulesetqueue, pf_ruleset); + +struct pf_anchor { + TAILQ_ENTRY(pf_anchor) entries; + char name[PF_ANCHOR_NAME_SIZE]; + struct pf_rulesetqueue rulesets; + int tables; +}; + +TAILQ_HEAD(pf_anchorqueue, pf_anchor); + +#define PFR_TFLAG_PERSIST 0x00000001 +#define PFR_TFLAG_CONST 0x00000002 +#define PFR_TFLAG_ACTIVE 0x00000004 +#define PFR_TFLAG_INACTIVE 0x00000008 +#define PFR_TFLAG_REFERENCED 0x00000010 +#define PFR_TFLAG_REFDANCHOR 0x00000020 +#define PFR_TFLAG_USRMASK 0x00000003 +#define PFR_TFLAG_SETMASK 0x0000003C +#define PFR_TFLAG_ALLMASK 0x0000003F + +struct pfr_table { + char pfrt_anchor[PF_ANCHOR_NAME_SIZE]; + char pfrt_ruleset[PF_RULESET_NAME_SIZE]; + char pfrt_name[PF_TABLE_NAME_SIZE]; + u_int32_t pfrt_flags; + u_int8_t pfrt_fback; +}; + +enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, + PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, + PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; + +struct pfr_addr { + union { + struct in_addr _pfra_ip4addr; + struct in6_addr _pfra_ip6addr; + } pfra_u; + u_int8_t pfra_af; + u_int8_t pfra_net; + u_int8_t pfra_not; + u_int8_t pfra_fback; +}; +#define pfra_ip4addr pfra_u._pfra_ip4addr +#define pfra_ip6addr pfra_u._pfra_ip6addr + +enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; +enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; +#define PFR_OP_XPASS PFR_OP_ADDR_MAX + +struct pfr_astats { + struct pfr_addr pfras_a; + u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + long pfras_tzero; +}; + +enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; + +struct pfr_tstats { + struct pfr_table pfrts_t; + u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_match; + u_int64_t pfrts_nomatch; + long pfrts_tzero; + int pfrts_cnt; + int pfrts_refcnt[PFR_REFCNT_MAX]; +}; +#define pfrts_name pfrts_t.pfrt_name +#define pfrts_flags pfrts_t.pfrt_flags + +SLIST_HEAD(pfr_kentryworkq, pfr_kentry); +struct pfr_kentry { + struct radix_node pfrke_node[2]; + union sockaddr_union pfrke_sa; + u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + SLIST_ENTRY(pfr_kentry) pfrke_workq; + long pfrke_tzero; + u_int8_t pfrke_af; + u_int8_t pfrke_net; + u_int8_t pfrke_not; + u_int8_t pfrke_mark; +}; + +SLIST_HEAD(pfr_ktableworkq, pfr_ktable); +RB_HEAD(pfr_ktablehead, pfr_ktable); +struct pfr_ktable { + struct pfr_tstats pfrkt_ts; + RB_ENTRY(pfr_ktable) pfrkt_tree; + SLIST_ENTRY(pfr_ktable) pfrkt_workq; + struct radix_node_head *pfrkt_ip4; + struct radix_node_head *pfrkt_ip6; + struct pfr_ktable *pfrkt_shadow; + struct pfr_ktable *pfrkt_root; + struct pf_ruleset *pfrkt_rs; + int pfrkt_nflags; +}; +#define pfrkt_t pfrkt_ts.pfrts_t +#define pfrkt_name pfrkt_t.pfrt_name +#define pfrkt_anchor pfrkt_t.pfrt_anchor +#define pfrkt_ruleset pfrkt_t.pfrt_ruleset +#define pfrkt_flags pfrkt_t.pfrt_flags +#define pfrkt_cnt pfrkt_ts.pfrts_cnt +#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt +#define pfrkt_packets pfrkt_ts.pfrts_packets +#define pfrkt_bytes pfrkt_ts.pfrts_bytes +#define pfrkt_match pfrkt_ts.pfrts_match +#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch +#define pfrkt_tzero pfrkt_ts.pfrts_tzero + +struct pf_pdesc { + u_int64_t tot_len; /* Make Mickey money */ + union { + struct tcphdr *tcp; + struct udphdr *udp; + struct icmp *icmp; +#ifdef INET6 + struct icmp6_hdr *icmp6; +#endif /* INET6 */ + void *any; + } hdr; + struct pf_addr *src; + struct pf_addr *dst; + u_int16_t *ip_sum; + u_int32_t p_len; /* total length of payload */ + u_int16_t flags; /* Let SCRUB trigger behavior in + * state code. Easier than tags */ +#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ + sa_family_t af; + u_int8_t proto; + u_int8_t tos; +}; + +/* flags for RDR options */ +#define PF_DPORT_RANGE 0x01 /* Dest port uses range */ +#define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ + +/* Reasons code for passing/dropping a packet */ +#define PFRES_MATCH 0 /* Explicit match of a rule */ +#define PFRES_BADOFF 1 /* Bad offset for pull_hdr */ +#define PFRES_FRAG 2 /* Dropping following fragment */ +#define PFRES_SHORT 3 /* Dropping short packet */ +#define PFRES_NORM 4 /* Dropping by normalizer */ +#define PFRES_MEMORY 5 /* Dropped due to lacking mem */ +#define PFRES_MAX 6 /* total+1 */ + +#define PFRES_NAMES { \ + "match", \ + "bad-offset", \ + "fragment", \ + "short", \ + "normalize", \ + "memory", \ + NULL \ +} + +/* UDP state enumeration */ +#define PFUDPS_NO_TRAFFIC 0 +#define PFUDPS_SINGLE 1 +#define PFUDPS_MULTIPLE 2 + +#define PFUDPS_NSTATES 3 /* number of state levels */ + +#define PFUDPS_NAMES { \ + "NO_TRAFFIC", \ + "SINGLE", \ + "MULTIPLE", \ + NULL \ +} + +/* Other protocol state enumeration */ +#define PFOTHERS_NO_TRAFFIC 0 +#define PFOTHERS_SINGLE 1 +#define PFOTHERS_MULTIPLE 2 + +#define PFOTHERS_NSTATES 3 /* number of state levels */ + +#define PFOTHERS_NAMES { \ + "NO_TRAFFIC", \ + "SINGLE", \ + "MULTIPLE", \ + NULL \ +} + +#define FCNT_STATE_SEARCH 0 +#define FCNT_STATE_INSERT 1 +#define FCNT_STATE_REMOVALS 2 +#define FCNT_MAX 3 + + +#define ACTION_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + } while (0) + +#define REASON_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + if (x < PFRES_MAX) \ + pf_status.counters[x]++; \ + } while (0) + +struct pf_status { + u_int64_t counters[PFRES_MAX]; + u_int64_t fcounters[FCNT_MAX]; + u_int64_t pcounters[2][2][3]; + u_int64_t bcounters[2][2]; + u_int32_t running; + u_int32_t states; + u_int32_t since; + u_int32_t debug; + char ifname[IFNAMSIZ]; +}; + +struct cbq_opts { + u_int minburst; + u_int maxburst; + u_int pktsize; + u_int maxpktsize; + u_int ns_per_byte; + u_int maxidle; + int minidle; + u_int offtime; + int flags; +}; + +struct priq_opts { + int flags; +}; + +struct hfsc_opts { + /* real-time service curve */ + u_int rtsc_m1; /* slope of the 1st segment in bps */ + u_int rtsc_d; /* the x-projection of m1 in msec */ + u_int rtsc_m2; /* slope of the 2nd segment in bps */ + /* link-sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; + /* upper-limit service curve */ + u_int ulsc_m1; + u_int ulsc_d; + u_int ulsc_m2; + int flags; +}; + +struct pf_altq { + char ifname[IFNAMSIZ]; + + void *altq_disc; /* discipline-specific state */ + TAILQ_ENTRY(pf_altq) entries; + + /* scheduler spec */ + u_int8_t scheduler; /* scheduler type */ + u_int16_t tbrsize; /* tokenbucket regulator size */ + u_int32_t ifbandwidth; /* interface bandwidth */ + + /* queue spec */ + char qname[PF_QNAME_SIZE]; /* queue name */ + char parent[PF_QNAME_SIZE]; /* parent name */ + u_int32_t parent_qid; /* parent queue id */ + u_int32_t bandwidth; /* queue bandwidth */ + u_int8_t priority; /* priority */ + u_int16_t qlimit; /* queue size limit */ + u_int16_t flags; /* misc flags */ + union { + struct cbq_opts cbq_opts; + struct priq_opts priq_opts; + struct hfsc_opts hfsc_opts; + } pq_u; + + u_int32_t qid; /* return value */ +}; + +struct pf_tag { + u_int16_t tag; /* tag id */ +}; + +struct pf_tagname { + TAILQ_ENTRY(pf_tagname) entries; + char name[PF_TAG_NAME_SIZE]; + u_int16_t tag; + int ref; +}; + +#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ +#define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ +#define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ +#define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ + +/* + * ioctl parameter structures + */ + +struct pfioc_pooladdr { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + u_int32_t r_num; + u_int8_t r_action; + u_int8_t r_last; + u_int8_t af; + char anchor[PF_ANCHOR_NAME_SIZE]; + char ruleset[PF_RULESET_NAME_SIZE]; + struct pf_pooladdr addr; +}; + +struct pfioc_rule { + u_int32_t action; + u_int32_t ticket; + u_int32_t pool_ticket; + u_int32_t nr; + char anchor[PF_ANCHOR_NAME_SIZE]; + char ruleset[PF_RULESET_NAME_SIZE]; + struct pf_rule rule; +}; + +struct pfioc_natlook { + struct pf_addr saddr; + struct pf_addr daddr; + struct pf_addr rsaddr; + struct pf_addr rdaddr; + u_int16_t sport; + u_int16_t dport; + u_int16_t rsport; + u_int16_t rdport; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; +}; + +struct pfioc_state { + u_int32_t nr; + struct pf_state state; +}; + +struct pfioc_state_kill { + /* XXX returns the number of states killed in psk_af */ + sa_family_t psk_af; + int psk_proto; + struct pf_rule_addr psk_src; + struct pf_rule_addr psk_dst; +}; + +struct pfioc_states { + int ps_len; + union { + caddr_t psu_buf; + struct pf_state *psu_states; + } ps_u; +#define ps_buf ps_u.psu_buf +#define ps_states ps_u.psu_states +}; + +struct pfioc_if { + char ifname[IFNAMSIZ]; +}; + +struct pfioc_tm { + int timeout; + int seconds; +}; + +struct pfioc_limit { + int index; + unsigned limit; +}; + +struct pfioc_altq { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + struct pf_altq altq; +}; + +struct pfioc_qstats { + u_int32_t ticket; + u_int32_t nr; + void *buf; + int nbytes; + u_int8_t scheduler; +}; + +struct pfioc_anchor { + u_int32_t nr; + char name[PF_ANCHOR_NAME_SIZE]; +}; + +struct pfioc_ruleset { + u_int32_t nr; + char anchor[PF_ANCHOR_NAME_SIZE]; + char name[PF_RULESET_NAME_SIZE]; +}; + +#define PFR_FLAG_ATOMIC 0x00000001 +#define PFR_FLAG_DUMMY 0x00000002 +#define PFR_FLAG_FEEDBACK 0x00000004 +#define PFR_FLAG_CLSTATS 0x00000008 +#define PFR_FLAG_ADDRSTOO 0x00000010 +#define PFR_FLAG_REPLACE 0x00000020 +#define PFR_FLAG_ALLRSETS 0x00000040 +#define PFR_FLAG_ALLMASK 0x0000007F + +struct pfioc_table { + struct pfr_table pfrio_table; + void *pfrio_buffer; + int pfrio_esize; + int pfrio_size; + int pfrio_size2; + int pfrio_nadd; + int pfrio_ndel; + int pfrio_nchange; + int pfrio_flags; + u_int32_t pfrio_ticket; +}; +#define pfrio_exists pfrio_nadd +#define pfrio_nzero pfrio_nadd +#define pfrio_nmatch pfrio_nadd +#define pfrio_naddr pfrio_size2 +#define pfrio_setflag pfrio_size2 +#define pfrio_clrflag pfrio_nadd + + +/* + * ioctl operations + */ + +#define DIOCSTART _IO ('D', 1) +#define DIOCSTOP _IO ('D', 2) +#define DIOCBEGINRULES _IOWR('D', 3, struct pfioc_rule) +#define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) +#define DIOCCOMMITRULES _IOWR('D', 5, struct pfioc_rule) +#define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) +#define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) +/* XXX cut 8 - 17 */ +#define DIOCCLRSTATES _IO ('D', 18) +#define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) +#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) +#define DIOCGETSTATUS _IOWR('D', 21, struct pf_status) +#define DIOCCLRSTATUS _IO ('D', 22) +#define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) +#define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) +#define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) +#define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) +/* XXX cut 26 - 28 */ +#define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) +#define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) +#define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) +#define DIOCCLRRULECTRS _IO ('D', 38) +#define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) +#define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) +#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) +#define DIOCSTARTALTQ _IO ('D', 42) +#define DIOCSTOPALTQ _IO ('D', 43) +#define DIOCBEGINALTQS _IOWR('D', 44, u_int32_t) +#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) +#define DIOCCOMMITALTQS _IOWR('D', 46, u_int32_t) +#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) +#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) +#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) +#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats) +#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) +#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) +#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) +#define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) +#define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) +#define DIOCGETANCHORS _IOWR('D', 56, struct pfioc_anchor) +#define DIOCGETANCHOR _IOWR('D', 57, struct pfioc_anchor) +#define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) +#define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) +#define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) +#define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) +#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) +#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) +#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) +#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) +#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) +#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) +#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) +#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) +#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) +#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) +#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) +#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) +#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) +#define DIOCRINABEGIN _IOWR('D', 75, struct pfioc_table) +#define DIOCRINACOMMIT _IOWR('D', 76, struct pfioc_table) +#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) +#define DIOCOSFPFLUSH _IO('D', 78) +#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) +#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) + +#ifdef _KERNEL +RB_HEAD(pf_state_tree, pf_tree_node); +RB_PROTOTYPE(pf_state_tree, pf_tree_node, entry, pf_state_compare); +extern struct pf_state_tree tree_lan_ext, tree_ext_gwy; + +extern struct pf_anchorqueue pf_anchors; +extern struct pf_ruleset pf_main_ruleset; +TAILQ_HEAD(pf_poolqueue, pf_pool); +extern struct pf_poolqueue pf_pools[2]; +TAILQ_HEAD(pf_altqqueue, pf_altq); +extern struct pf_altqqueue pf_altqs[2]; +extern struct pf_palist pf_pabuf; + + +extern u_int32_t ticket_altqs_active; +extern u_int32_t ticket_altqs_inactive; +extern u_int32_t ticket_pabuf; +extern struct pf_altqqueue *pf_altqs_active; +extern struct pf_altqqueue *pf_altqs_inactive; +extern struct pf_poolqueue *pf_pools_active; +extern struct pf_poolqueue *pf_pools_inactive; +extern int pf_tbladdr_setup(struct pf_ruleset *, + struct pf_addr_wrap *); +extern void pf_tbladdr_remove(struct pf_addr_wrap *); +extern void pf_tbladdr_copyout(struct pf_addr_wrap *); +extern int pf_dynaddr_setup(struct pf_addr_wrap *, + sa_family_t); +extern void pf_dynaddr_copyout(struct pf_addr_wrap *); +extern void pf_dynaddr_remove(struct pf_addr_wrap *); +extern void pf_calc_skip_steps(struct pf_rulequeue *); +extern void pf_rule_set_qid(struct pf_rulequeue *); +extern u_int32_t pf_qname_to_qid(char *); +extern void pf_update_anchor_rules(void); +extern struct pool pf_tree_pl, pf_rule_pl, pf_addr_pl; +extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; +extern struct pool pf_state_scrub_pl; +extern void pf_purge_timeout(void *); +extern void pf_purge_expired_states(void); +extern int pf_insert_state(struct pf_state *); +extern struct pf_state *pf_find_state(struct pf_state_tree *, + struct pf_tree_node *); +extern struct pf_anchor *pf_find_anchor(const char *); +extern struct pf_ruleset *pf_find_ruleset(char *, char *); +extern struct pf_ruleset *pf_find_or_create_ruleset(char *, char *); +extern void pf_remove_if_empty_ruleset( + struct pf_ruleset *); + +extern struct ifnet *status_ifp; +extern struct pf_rule pf_default_rule; +extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, + u_int8_t); +void pf_rm_rule(struct pf_rulequeue *, + struct pf_rule *); + +#ifdef INET +int pf_test(int, struct ifnet *, struct mbuf **); +#endif /* INET */ + +#ifdef INET6 +int pf_test6(int, struct ifnet *, struct mbuf **); +void pf_poolmask(struct pf_addr *, struct pf_addr*, + struct pf_addr *, struct pf_addr *, u_int8_t); +void pf_addr_inc(struct pf_addr *, sa_family_t); +#endif /* INET6 */ + +void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, + sa_family_t); +void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); +int pflog_packet(struct ifnet *, struct mbuf *, sa_family_t, u_int8_t, + u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *); +int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, + struct pf_addr *, sa_family_t); +int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); +int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); +int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); +int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); + +void pf_normalize_init(void); +int pf_normalize_ip(struct mbuf **, int, struct ifnet *, u_short *); +int pf_normalize_ip6(struct mbuf **, int, struct ifnet *, u_short *); +int pf_normalize_tcp(int, struct ifnet *, struct mbuf *, int, int, void *, + struct pf_pdesc *); +void pf_normalize_tcp_cleanup(struct pf_state *); +int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); +int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, + u_short *, struct tcphdr *, struct pf_state_peer *, + struct pf_state_peer *, int *); +u_int32_t + pf_state_expires(const struct pf_state *); +void pf_purge_expired_fragments(void); +int pf_routable(struct pf_addr *addr, sa_family_t af); +void pfr_initialize(void); +int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); +void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, + u_int64_t, int, int, int); +int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, + struct pf_addr **, struct pf_addr **, sa_family_t); +struct pfr_ktable * + pfr_attach_table(struct pf_ruleset *, char *); +void pfr_detach_table(struct pfr_ktable *); +int pfr_clr_tables(struct pfr_table *, int *, int); +int pfr_add_tables(struct pfr_table *, int, int *, int); +int pfr_del_tables(struct pfr_table *, int, int *, int); +int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int); +int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); +int pfr_clr_tstats(struct pfr_table *, int, int *, int); +int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); +int pfr_clr_addrs(struct pfr_table *, int *, int); +int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int *, int *, int *, int); +int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); +int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); +int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int); +int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); +int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, + int *, u_int32_t, int); + +u_int16_t pf_tagname2tag(char *); +void pf_tag2tagname(u_int16_t, char *); +void pf_tag_unref(u_int16_t); +int pf_tag_packet(struct mbuf *, struct pf_tag *, int); + +extern struct pf_status pf_status; +extern struct pool pf_frent_pl, pf_frag_pl; + +struct pf_pool_limit { + void *pp; + unsigned limit; +}; +extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; + +#endif /* _KERNEL */ + +/* The fingerprint functions can be linked into userland programs (tcpdump) */ +int pf_osfp_add(struct pf_osfp_ioctl *); +#ifdef _KERNEL +struct pf_osfp_enlist * + pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, + const struct tcphdr *); +#endif /* _KERNEL */ +struct pf_osfp_enlist * + pf_osfp_fingerprint_hdr(const struct ip *, const struct tcphdr *); +void pf_osfp_flush(void); +int pf_osfp_get(struct pf_osfp_ioctl *); +void pf_osfp_initialize(void); +int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); +struct pf_os_fingerprint * + pf_osfp_validate(void); + + +#endif /* _NET_PFVAR_H_ */ diff --git a/sys/contrib/pf/netinet/in4_cksum.c b/sys/contrib/pf/netinet/in4_cksum.c new file mode 100644 index 000000000000..1c40f2e05b6e --- /dev/null +++ b/sys/contrib/pf/netinet/in4_cksum.c @@ -0,0 +1,219 @@ +/* $OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $ */ +/* $KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $ */ +/* $NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $ */ + +/* + * Copyright (C) 1999 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/systm.h> +#include <sys/socket.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * This is only for IPv4 pseudo header checksum. + * No need to clear non-pseudo-header fields in IPv4 header. + * len is for actual payload size, and does not include IPv4 header and + * skipped header chain (off + len should be equal to the whole packet). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + +int +in4_cksum(m, nxt, off, len) + struct mbuf *m; + u_int8_t nxt; + int off, len; +{ + u_int16_t *w; + int sum = 0; + int mlen = 0; + int byte_swapped = 0; + union { + struct ipovly ipov; + u_int16_t w[10]; + } u; + union { + u_int8_t c[2]; + u_int16_t s; + } s_util; + union { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + if (nxt != 0) { + /* pseudo header */ + if (off < sizeof(struct ipovly)) + panic("in4_cksum: offset too short"); + if (m->m_len < sizeof(struct ip)) + panic("in4_cksum: bad mbuf chain"); + bzero(&u.ipov, sizeof(u.ipov)); + u.ipov.ih_len = htons(len); + u.ipov.ih_pr = nxt; + u.ipov.ih_src = mtod(m, struct ip *)->ip_src; + u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; + w = u.w; + /* assumes sizeof(ipov) == 20 */ + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; + sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; + } + + /* skip unnecessary part */ + while (m && off > 0) { + if (m->m_len > off) + break; + off -= m->m_len; + m = m->m_next; + } + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = (u_int16_t *)(mtod(m, caddr_t) + off); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + * + * s_util.c[0] is already saved when scanning previous + * mbuf. + */ + s_util.c[1] = *(u_int8_t *)w; + sum += s_util.s; + w = (u_int16_t *)((u_int8_t *)w + 1); + mlen = m->m_len - off - 1; + len--; + } else + mlen = m->m_len - off; + off = 0; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (long) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_int8_t *)w; + w = (u_int16_t *)((int8_t *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + continue; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(u_int8_t *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(u_int8_t *)w; + } + if (len) + printf("cksum4: out of data\n"); + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} |