diff options
Diffstat (limited to 'sys/contrib/ipfilter/netinet/ip_frag.c')
-rw-r--r-- | sys/contrib/ipfilter/netinet/ip_frag.c | 555 |
1 files changed, 555 insertions, 0 deletions
diff --git a/sys/contrib/ipfilter/netinet/ip_frag.c b/sys/contrib/ipfilter/netinet/ip_frag.c new file mode 100644 index 000000000000..556478d51d0b --- /dev/null +++ b/sys/contrib/ipfilter/netinet/ip_frag.c @@ -0,0 +1,555 @@ +/* + * Copyright (C) 1993-2000 by Darren Reed. + * + * Redistribution and use in source and binary forms are permitted + * provided that this notice is preserved and due credit is given + * to the original author and the contributors. + */ +#if !defined(lint) +static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed"; +static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.10.2.7 2000/11/27 10:26:56 darrenr Exp $"; +#endif + +#if defined(KERNEL) && !defined(_KERNEL) +# define _KERNEL +#endif + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/file.h> +#if !defined(_KERNEL) && !defined(KERNEL) +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +#endif +#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +#else +# include <sys/ioctl.h> +#endif +#include <sys/uio.h> +#ifndef linux +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) && !defined(linux) +# include <sys/systm.h> +#endif +#if !defined(__SVR4) && !defined(__svr4__) +# if defined(_KERNEL) && !defined(__sgi) +# include <sys/kernel.h> +# endif +# ifndef linux +# include <sys/mbuf.h> +# endif +#else +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#include <net/if.h> +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#ifndef linux +# include <netinet/ip_var.h> +#endif +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_proxy.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_auth.h" +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +# if (defined(KERNEL) || defined(_KERNEL)) +# ifndef IPFILTER_LKM +# include <sys/libkern.h> +# include <sys/systm.h> +# endif +extern struct callout_handle ipfr_slowtimer_ch; +# endif +#endif +#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) +# include <sys/callout.h> +extern struct callout ipfr_slowtimer_ch; +#endif + + +static ipfr_t *ipfr_heads[IPFT_SIZE]; +static ipfr_t *ipfr_nattab[IPFT_SIZE]; +static ipfrstat_t ipfr_stats; +static int ipfr_inuse = 0; + +int fr_ipfrttl = 120; /* 60 seconds */ +int fr_frag_lock = 0; + +#ifdef _KERNEL +# if SOLARIS2 >= 7 +extern timeout_id_t ipfr_timer_id; +# else +extern int ipfr_timer_id; +# endif +#endif +#if (SOLARIS || defined(__sgi)) && defined(_KERNEL) +extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex; +# if SOLARIS +extern KRWLOCK_T ipf_solaris; +# else +KRWLOCK_T ipf_solaris; +# endif +extern kmutex_t ipf_rw; +#endif + + +static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, u_int, ipfr_t **)); +static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **)); +static void ipfr_delete __P((ipfr_t *)); + + +ipfrstat_t *ipfr_fragstats() +{ + ipfr_stats.ifs_table = ipfr_heads; + ipfr_stats.ifs_nattab = ipfr_nattab; + ipfr_stats.ifs_inuse = ipfr_inuse; + return &ipfr_stats; +} + + +/* + * add a new entry to the fragment cache, registering it as having come + * through this box, with the result of the filter operation. + */ +static ipfr_t *ipfr_new(ip, fin, pass, table) +ip_t *ip; +fr_info_t *fin; +u_int pass; +ipfr_t *table[]; +{ + ipfr_t **fp, *fra, frag; + u_int idx; + + if (ipfr_inuse >= IPFT_SIZE) + return NULL; + + frag.ipfr_p = ip->ip_p; + idx = ip->ip_p; + frag.ipfr_id = ip->ip_id; + idx += ip->ip_id; + frag.ipfr_tos = ip->ip_tos; + frag.ipfr_src.s_addr = ip->ip_src.s_addr; + idx += ip->ip_src.s_addr; + frag.ipfr_dst.s_addr = ip->ip_dst.s_addr; + idx += ip->ip_dst.s_addr; + frag.ipfr_ifp = fin->fin_ifp; + idx *= 127; + idx %= IPFT_SIZE; + + /* + * first, make sure it isn't already there... + */ + for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next) + if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, + IPFR_CMPSZ)) { + ATOMIC_INCL(ipfr_stats.ifs_exists); + return NULL; + } + + /* + * allocate some memory, if possible, if not, just record that we + * failed to do so. + */ + KMALLOC(fra, ipfr_t *); + if (fra == NULL) { + ATOMIC_INCL(ipfr_stats.ifs_nomem); + return NULL; + } + + if ((fra->ipfr_rule = fin->fin_fr) != NULL) { + ATOMIC_INC32(fin->fin_fr->fr_ref); + } + + + /* + * Instert the fragment into the fragment table, copy the struct used + * in the search using bcopy rather than reassign each field. + * Set the ttl to the default and mask out logging from "pass" + */ + if ((fra->ipfr_next = table[idx])) + table[idx]->ipfr_prev = fra; + fra->ipfr_prev = NULL; + fra->ipfr_data = NULL; + table[idx] = fra; + bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ); + fra->ipfr_ttl = fr_ipfrttl; + /* + * Compute the offset of the expected start of the next packet. + */ + fra->ipfr_off = (ip->ip_off & IP_OFFMASK) + (fin->fin_dlen >> 3); + ATOMIC_INCL(ipfr_stats.ifs_new); + ATOMIC_INC32(ipfr_inuse); + return fra; +} + + +int ipfr_newfrag(ip, fin, pass) +ip_t *ip; +fr_info_t *fin; +u_int pass; +{ + ipfr_t *ipf; + + if ((ip->ip_v != 4) || (fr_frag_lock)) + return -1; + WRITE_ENTER(&ipf_frag); + ipf = ipfr_new(ip, fin, pass, ipfr_heads); + RWLOCK_EXIT(&ipf_frag); + return ipf ? 0 : -1; +} + + +int ipfr_nat_newfrag(ip, fin, pass, nat) +ip_t *ip; +fr_info_t *fin; +u_int pass; +nat_t *nat; +{ + ipfr_t *ipf; + + if ((ip->ip_v != 4) || (fr_frag_lock)) + return -1; + WRITE_ENTER(&ipf_natfrag); + ipf = ipfr_new(ip, fin, pass, ipfr_nattab); + if (ipf != NULL) { + ipf->ipfr_data = nat; + nat->nat_data = ipf; + } + RWLOCK_EXIT(&ipf_natfrag); + return ipf ? 0 : -1; +} + + +/* + * check the fragment cache to see if there is already a record of this packet + * with its filter result known. + */ +static ipfr_t *ipfr_lookup(ip, fin, table) +ip_t *ip; +fr_info_t *fin; +ipfr_t *table[]; +{ + ipfr_t *f, frag; + u_int idx; + + /* + * For fragments, we record protocol, packet id, TOS and both IP#'s + * (these should all be the same for all fragments of a packet). + * + * build up a hash value to index the table with. + */ + frag.ipfr_p = ip->ip_p; + idx = ip->ip_p; + frag.ipfr_id = ip->ip_id; + idx += ip->ip_id; + frag.ipfr_tos = ip->ip_tos; + frag.ipfr_src.s_addr = ip->ip_src.s_addr; + idx += ip->ip_src.s_addr; + frag.ipfr_dst.s_addr = ip->ip_dst.s_addr; + idx += ip->ip_dst.s_addr; + frag.ipfr_ifp = fin->fin_ifp; + idx *= 127; + idx %= IPFT_SIZE; + + /* + * check the table, careful to only compare the right amount of data + */ + for (f = table[idx]; f; f = f->ipfr_next) + if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src, + IPFR_CMPSZ)) { + u_short atoff, off; + + if (f != table[idx]) { + /* + * move fragment info. to the top of the list + * to speed up searches. + */ + if ((f->ipfr_prev->ipfr_next = f->ipfr_next)) + f->ipfr_next->ipfr_prev = f->ipfr_prev; + f->ipfr_next = table[idx]; + table[idx]->ipfr_prev = f; + f->ipfr_prev = NULL; + table[idx] = f; + } + off = ip->ip_off & IP_OFFMASK; + atoff = off + (fin->fin_dlen >> 3); + /* + * If we've follwed the fragments, and this is the + * last (in order), shrink expiration time. + */ + if (off == f->ipfr_off) { + if (!(ip->ip_off & IP_MF)) + f->ipfr_ttl = 1; + else + f->ipfr_off = atoff; + } + ATOMIC_INCL(ipfr_stats.ifs_hits); + return f; + } + return NULL; +} + + +/* + * functional interface for NAT lookups of the NAT fragment cache + */ +nat_t *ipfr_nat_knownfrag(ip, fin) +ip_t *ip; +fr_info_t *fin; +{ + nat_t *nat; + ipfr_t *ipf; + + if ((ip->ip_v != 4) || (fr_frag_lock)) + return NULL; + READ_ENTER(&ipf_natfrag); + ipf = ipfr_lookup(ip, fin, ipfr_nattab); + if (ipf != NULL) { + nat = ipf->ipfr_data; + /* + * This is the last fragment for this packet. + */ + if ((ipf->ipfr_ttl == 1) && (nat != NULL)) { + nat->nat_data = NULL; + ipf->ipfr_data = NULL; + } + } else + nat = NULL; + RWLOCK_EXIT(&ipf_natfrag); + return nat; +} + + +/* + * functional interface for normal lookups of the fragment cache + */ +frentry_t *ipfr_knownfrag(ip, fin) +ip_t *ip; +fr_info_t *fin; +{ + frentry_t *fr = NULL; + ipfr_t *fra; + + if ((ip->ip_v != 4) || (fr_frag_lock)) + return NULL; + READ_ENTER(&ipf_frag); + fra = ipfr_lookup(ip, fin, ipfr_heads); + if (fra != NULL) + fr = fra->ipfr_rule; + RWLOCK_EXIT(&ipf_frag); + return fr; +} + + +/* + * forget any references to this external object. + */ +void ipfr_forget(nat) +void *nat; +{ + ipfr_t *fr; + int idx; + + WRITE_ENTER(&ipf_natfrag); + for (idx = IPFT_SIZE - 1; idx >= 0; idx--) + for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next) + if (fr->ipfr_data == nat) + fr->ipfr_data = NULL; + + RWLOCK_EXIT(&ipf_natfrag); +} + + +static void ipfr_delete(fra) +ipfr_t *fra; +{ + frentry_t *fr; + + fr = fra->ipfr_rule; + if (fr != NULL) { + ATOMIC_DEC32(fr->fr_ref); + if (fr->fr_ref == 0) + KFREE(fr); + } + if (fra->ipfr_prev) + fra->ipfr_prev->ipfr_next = fra->ipfr_next; + if (fra->ipfr_next) + fra->ipfr_next->ipfr_prev = fra->ipfr_prev; + KFREE(fra); +} + + +/* + * Free memory in use by fragment state info. kept. + */ +void ipfr_unload() +{ + ipfr_t **fp, *fra; + nat_t *nat; + int idx; + + WRITE_ENTER(&ipf_frag); + for (idx = IPFT_SIZE - 1; idx >= 0; idx--) + for (fp = &ipfr_heads[idx]; (fra = *fp); ) { + *fp = fra->ipfr_next; + ipfr_delete(fra); + } + RWLOCK_EXIT(&ipf_frag); + + WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ipf_natfrag); + for (idx = IPFT_SIZE - 1; idx >= 0; idx--) + for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { + *fp = fra->ipfr_next; + nat = fra->ipfr_data; + if (nat != NULL) { + if (nat->nat_data == fra) + nat->nat_data = NULL; + } + ipfr_delete(fra); + } + RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ipf_nat); +} + + +#ifdef _KERNEL +void ipfr_fragexpire() +{ + ipfr_t **fp, *fra; + nat_t *nat; + int idx; +#if defined(_KERNEL) +# if !SOLARIS + int s; +# endif +#endif + + if (fr_frag_lock) + return; + + SPL_NET(s); + WRITE_ENTER(&ipf_frag); + + /* + * Go through the entire table, looking for entries to expire, + * decreasing the ttl by one for each entry. If it reaches 0, + * remove it from the chain and free it. + */ + for (idx = IPFT_SIZE - 1; idx >= 0; idx--) + for (fp = &ipfr_heads[idx]; (fra = *fp); ) { + --fra->ipfr_ttl; + if (fra->ipfr_ttl == 0) { + *fp = fra->ipfr_next; + ipfr_delete(fra); + ATOMIC_INCL(ipfr_stats.ifs_expire); + ATOMIC_DEC32(ipfr_inuse); + } else + fp = &fra->ipfr_next; + } + RWLOCK_EXIT(&ipf_frag); + + /* + * Same again for the NAT table, except that if the structure also + * still points to a NAT structure, and the NAT structure points back + * at the one to be free'd, NULL the reference from the NAT struct. + * NOTE: We need to grab both mutex's early, and in this order so as + * to prevent a deadlock if both try to expire at the same time. + */ + WRITE_ENTER(&ipf_nat); + WRITE_ENTER(&ipf_natfrag); + for (idx = IPFT_SIZE - 1; idx >= 0; idx--) + for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { + --fra->ipfr_ttl; + if (fra->ipfr_ttl == 0) { + ATOMIC_INCL(ipfr_stats.ifs_expire); + ATOMIC_DEC32(ipfr_inuse); + nat = fra->ipfr_data; + if (nat != NULL) { + if (nat->nat_data == fra) + nat->nat_data = NULL; + } + *fp = fra->ipfr_next; + ipfr_delete(fra); + } else + fp = &fra->ipfr_next; + } + RWLOCK_EXIT(&ipf_natfrag); + RWLOCK_EXIT(&ipf_nat); + SPL_X(s); +} + + +/* + * Slowly expire held state for fragments. Timeouts are set * in expectation + * of this being called twice per second. + */ +# if (BSD >= 199306) || SOLARIS || defined(__sgi) +# if defined(SOLARIS2) && (SOLARIS2 < 7) +void ipfr_slowtimer() +# else +void ipfr_slowtimer __P((void *ptr)) +# endif +# else +int ipfr_slowtimer() +# endif +{ +#if defined(_KERNEL) && SOLARIS + extern int fr_running; + + if (fr_running <= 0) + return; +#endif + + READ_ENTER(&ipf_solaris); +#ifdef __sgi + ipfilter_sgi_intfsync(); +#endif + + ipfr_fragexpire(); + fr_timeoutstate(); + ip_natexpire(); + fr_authexpire(); +# if SOLARIS + ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000)); + RWLOCK_EXIT(&ipf_solaris); +# else +# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000) + callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL); +# else +# if (__FreeBSD_version >= 300000) + ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2); +# else + timeout(ipfr_slowtimer, NULL, hz/2); +# endif +# if (BSD < 199306) && !defined(__sgi) + return 0; +# endif /* FreeBSD */ +# endif /* NetBSD */ +# endif /* SOLARIS */ +} +#endif /* defined(_KERNEL) */ |