diff options
Diffstat (limited to 'sys/netinet/ip_mroute.c')
-rw-r--r-- | sys/netinet/ip_mroute.c | 1056 |
1 files changed, 1056 insertions, 0 deletions
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c new file mode 100644 index 0000000000000..7ce8361ea3b90 --- /dev/null +++ b/sys/netinet/ip_mroute.c @@ -0,0 +1,1056 @@ +/* + * Copyright (c) 1989 Stephen Deering + * Copyright (c) 1992 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Stephen Deering of Stanford University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_mroute.c 7.4 (Berkeley) 11/19/92 + */ + +/* + * Procedures for the kernel part of DVMRP, + * a Distance-Vector Multicast Routing Protocol. + * (See RFC-1075.) + * + * Written by David Waitzman, BBN Labs, August 1988. + * Modified by Steve Deering, Stanford, February 1989. + * + * MROUTING 1.1 + */ + +#ifndef MROUTING +int ip_mrtproto; /* for netstat only */ +#else + +#include "param.h" +#include "errno.h" +#include "ioctl.h" +#include "malloc.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "time.h" + +#ifndef __FreeBSD__ +#include "net/af.h" +#else +#include "systm.h" +#endif +#include "net/if.h" +#include "net/route.h" +#include "net/raw_cb.h" + +#include "in.h" +#include "in_systm.h" +#include "ip.h" +#include "in_pcb.h" +#include "in_var.h" +#include "ip_var.h" + +#include "igmp.h" +#include "igmp_var.h" +#include "ip_mroute.h" + +/* Static forwards */ +static int ip_mrouter_init __P((struct socket *)); +static int add_vif __P((struct vifctl *)); +static int del_vif __P((vifi_t *vifip)); +static int add_lgrp __P((struct lgrplctl *)); +static int del_lgrp __P((struct lgrplctl *)); +static int grplst_member __P((struct vif *, struct in_addr)); +static u_long nethash __P((u_long in)); +static int add_mrt __P((struct mrtctl *)); +static int del_mrt __P((struct in_addr *)); +static struct mrt *mrtfind __P((u_long)); +static void phyint_send __P((struct ip *, struct vif *, struct mbuf *)); +static void srcrt_send __P((struct ip *, struct vif *, struct mbuf *)); +static void encap_send __P((struct ip *, struct vif *, struct mbuf *)); +static void multiencap_decap __P((struct mbuf *, int hlen)); + +#define INSIZ sizeof(struct in_addr) +#define same(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) +#define satosin(sa) ((struct sockaddr_in *)(sa)) + +/* + * Globals. All but ip_mrouter and ip_mrtproto could be static, + * except for netstat or debugging purposes. + */ +struct socket *ip_mrouter = NULL; +int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ + +struct mrt *mrttable[MRTHASHSIZ]; +struct vif viftable[MAXVIFS]; +struct mrtstat mrtstat; + +/* + * 'Interfaces' associated with decapsulator (so we can tell + * packets that went through it from ones that get reflected + * by a broken gateway). These interfaces are never linked into + * the system ifnet list & no routes point to them. I.e., packets + * can't be sent this way. They only exist as a placeholder for + * multicast source verification. + */ +struct ifnet multicast_decap_if[MAXVIFS]; + +#define ENCAP_TTL 64 +#define ENCAP_PROTO 4 + +/* prototype IP hdr for encapsulated packets */ +struct ip multicast_encap_iphdr = { +#if defined(ultrix) || defined(i386) + sizeof(struct ip) >> 2, IPVERSION, +#else + IPVERSION, sizeof(struct ip) >> 2, +#endif + 0, /* tos */ + sizeof(struct ip), /* total length */ + 0, /* id */ + 0, /* frag offset */ + ENCAP_TTL, ENCAP_PROTO, + 0, /* checksum */ +}; + +/* + * Private variables. + */ +static vifi_t numvifs = 0; +static struct mrt *cached_mrt = NULL; +static u_long cached_origin; +static u_long cached_originmask; + +static void (*encap_oldrawip)(); + +/* + * one-back cache used by multiencap_decap to locate a tunnel's vif + * given a datagram's src ip address. + */ +static u_long last_encap_src; +static struct vif *last_encap_vif; + +/* + * A simple hash function: returns MRTHASHMOD of the low-order octet of + * the argument's network or subnet number. + */ +static u_long +nethash(n) + u_long n; +{ + struct in_addr in; + + in.s_addr = n; + n = in_netof(in); + while ((n & 0xff) == 0) + n >>= 8; + return (MRTHASHMOD(n)); +} + +/* + * this is a direct-mapped cache used to speed the mapping from a + * datagram source address to the associated multicast route. Note + * that unlike mrttable, the hash is on IP address, not IP net number. + */ +#define MSRCHASHSIZ 1024 +#define MSRCHASH(a) ((((a) >> 20) ^ ((a) >> 10) ^ (a)) & (MSRCHASHSIZ - 1)) +struct mrt *mrtsrchash[MSRCHASHSIZ]; + +/* + * Find a route for a given origin IP address. + */ +#define MRTFIND(o, rt) { \ + register u_int _mrhash = o; \ + _mrhash = MSRCHASH(_mrhash); \ + ++mrtstat.mrts_mrt_lookups; \ + rt = mrtsrchash[_mrhash]; \ + if (rt == NULL || \ + (o & rt->mrt_originmask.s_addr) != rt->mrt_origin.s_addr) \ + if ((rt = mrtfind(o)) != NULL) \ + mrtsrchash[_mrhash] = rt; \ +} + +static struct mrt * +mrtfind(origin) + u_long origin; +{ + register struct mrt *rt; + register u_int hash; + + mrtstat.mrts_mrt_misses++; + + hash = nethash(origin); + for (rt = mrttable[hash]; rt; rt = rt->mrt_next) { + if ((origin & rt->mrt_originmask.s_addr) == + rt->mrt_origin.s_addr) + return (rt); + } + return (NULL); +} + +/* + * Handle DVMRP setsockopt commands to modify the multicast routing tables. + */ +int +ip_mrouter_cmd(cmd, so, m) + register int cmd; + register struct socket *so; + register struct mbuf *m; +{ + register int error = 0; + + if (cmd != DVMRP_INIT && so != ip_mrouter) + error = EACCES; + else switch (cmd) { + + case DVMRP_INIT: + error = ip_mrouter_init(so); + break; + + case DVMRP_DONE: + error = ip_mrouter_done(); + break; + + case DVMRP_ADD_VIF: + if (m == NULL || m->m_len < sizeof(struct vifctl)) + error = EINVAL; + else + error = add_vif(mtod(m, struct vifctl *)); + break; + + case DVMRP_DEL_VIF: + if (m == NULL || m->m_len < sizeof(short)) + error = EINVAL; + else + error = del_vif(mtod(m, vifi_t *)); + break; + + case DVMRP_ADD_LGRP: + if (m == NULL || m->m_len < sizeof(struct lgrplctl)) + error = EINVAL; + else + error = add_lgrp(mtod(m, struct lgrplctl *)); + break; + + case DVMRP_DEL_LGRP: + if (m == NULL || m->m_len < sizeof(struct lgrplctl)) + error = EINVAL; + else + error = del_lgrp(mtod(m, struct lgrplctl *)); + break; + + case DVMRP_ADD_MRT: + if (m == NULL || m->m_len < sizeof(struct mrtctl)) + error = EINVAL; + else + error = add_mrt(mtod(m, struct mrtctl *)); + break; + + case DVMRP_DEL_MRT: + if (m == NULL || m->m_len < sizeof(struct in_addr)) + error = EINVAL; + else + error = del_mrt(mtod(m, struct in_addr *)); + break; + + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +/* + * Enable multicast routing + */ +static int +ip_mrouter_init(so) + register struct socket *so; +{ + if (so->so_type != SOCK_RAW || + so->so_proto->pr_protocol != IPPROTO_IGMP) + return (EOPNOTSUPP); + + if (ip_mrouter != NULL) + return (EADDRINUSE); + + ip_mrouter = so; + + return (0); +} + +/* + * Disable multicast routing + */ +int +ip_mrouter_done() +{ + register vifi_t vifi; + register int i; + register struct ifnet *ifp; + register int s; + struct ifreq ifr; + + s = splnet(); + + /* + * For each phyint in use, free its local group list and + * disable promiscuous reception of all IP multicasts. + */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_lcl_addr.s_addr != 0 && + !(viftable[vifi].v_flags & VIFF_TUNNEL)) { + if (viftable[vifi].v_lcl_grps) + free(viftable[vifi].v_lcl_grps, M_MRTABLE); + satosin(&ifr.ifr_addr)->sin_family = AF_INET; + satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; + ifp = viftable[vifi].v_ifp; + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + } + } + bzero((caddr_t)viftable, sizeof(viftable)); + numvifs = 0; + + /* + * Free any multicast route entries. + */ + for (i = 0; i < MRTHASHSIZ; i++) + if (mrttable[i]) + free(mrttable[i], M_MRTABLE); + bzero((caddr_t)mrttable, sizeof(mrttable)); + bzero((caddr_t)mrtsrchash, sizeof(mrtsrchash)); + + ip_mrouter = NULL; + + splx(s); + return (0); +} + +/* + * Add a vif to the vif table + */ +static int +add_vif(vifcp) + register struct vifctl *vifcp; +{ + register struct vif *vifp = viftable + vifcp->vifc_vifi; + register struct ifaddr *ifa; + register struct ifnet *ifp; + struct ifreq ifr; + register int error, s; + static struct sockaddr_in sin = { sizeof(sin), AF_INET }; + + if (vifcp->vifc_vifi >= MAXVIFS) + return (EINVAL); + if (vifp->v_lcl_addr.s_addr != 0) + return (EADDRINUSE); + + /* Find the interface with an address in AF_INET family */ + sin.sin_addr = vifcp->vifc_lcl_addr; + ifa = ifa_ifwithaddr((struct sockaddr *)&sin); + if (ifa == 0) + return (EADDRNOTAVAIL); + ifp = ifa->ifa_ifp; + + if (vifcp->vifc_flags & VIFF_TUNNEL) { + if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { + /* + * An encapsulating tunnel is wanted. If we + * haven't done so already, put our decap routine + * in front of raw_input so we have a chance to + * decapsulate incoming packets. Then set the + * arrival 'interface' to be the decapsulator. + */ + if (encap_oldrawip == 0) { + register int pr = ip_protox[ENCAP_PROTO]; + + encap_oldrawip = inetsw[pr].pr_input; + inetsw[pr].pr_input = multiencap_decap; + for (s = 0; s < MAXVIFS; ++s) { + multicast_decap_if[s].if_name = + "mdecap"; + multicast_decap_if[s].if_unit = s; + } + } + ifp = &multicast_decap_if[vifcp->vifc_vifi]; + } else { + ifp = 0; + } + } else { + /* Make sure the interface supports multicast */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) + return EOPNOTSUPP; + + /* + * Enable promiscuous reception of all + * IP multicasts from the if + */ + ((struct sockaddr_in *)&ifr.ifr_addr)->sin_family = AF_INET; + ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr = + INADDR_ANY; + s = splnet(); + error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); + splx(s); + if (error) + return error; + } + + s = splnet(); + vifp->v_flags = vifcp->vifc_flags; + vifp->v_threshold = vifcp->vifc_threshold; + vifp->v_lcl_addr = vifcp->vifc_lcl_addr; + vifp->v_ifp = ifa->ifa_ifp; + vifp->v_rmt_addr = vifcp->vifc_rmt_addr; + splx(s); + + /* Adjust numvifs up if the vifi is higher than numvifs */ + if (numvifs <= vifcp->vifc_vifi) + numvifs = vifcp->vifc_vifi + 1; + + splx(s); + return (0); +} + +/* + * Delete a vif from the vif table + */ +static int +del_vif(vifip) + register vifi_t *vifip; +{ + register struct vif *vifp = viftable + *vifip; + register struct ifnet *ifp; + register int i, s; + struct ifreq ifr; + + if (*vifip >= numvifs) + return (EINVAL); + if (vifp->v_lcl_addr.s_addr == 0) + return (EADDRNOTAVAIL); + + s = splnet(); + + if (!(vifp->v_flags & VIFF_TUNNEL)) { + if (vifp->v_lcl_grps) + free(vifp->v_lcl_grps, M_MRTABLE); + satosin(&ifr.ifr_addr)->sin_family = AF_INET; + satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; + ifp = vifp->v_ifp; + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + } + if (vifp == last_encap_vif) { + last_encap_vif = 0; + last_encap_src = 0; + } + bzero((caddr_t)vifp, sizeof (*vifp)); + + /* Adjust numvifs down */ + for (i = numvifs - 1; i >= 0; i--) + if (viftable[i].v_lcl_addr.s_addr != 0) + break; + numvifs = i + 1; + + splx(s); + return (0); +} + +/* + * Add the multicast group in the lgrpctl to the list of local multicast + * group memberships associated with the vif indexed by gcp->lgc_vifi. + */ +static int +add_lgrp(gcp) + register struct lgrplctl *gcp; +{ + register struct vif *vifp; + register int s; + + if (gcp->lgc_vifi >= numvifs) + return (EINVAL); + + vifp = viftable + gcp->lgc_vifi; + if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) + return (EADDRNOTAVAIL); + + /* If not enough space in existing list, allocate a larger one */ + s = splnet(); + if (vifp->v_lcl_grps_n + 1 >= vifp->v_lcl_grps_max) { + register int num; + register struct in_addr *ip; + + num = vifp->v_lcl_grps_max; + if (num <= 0) + num = 32; /* initial number */ + else + num += num; /* double last number */ + ip = (struct in_addr *)malloc(num * sizeof(*ip), + M_MRTABLE, M_NOWAIT); + if (ip == NULL) { + splx(s); + return (ENOBUFS); + } + + bzero((caddr_t)ip, num * sizeof(*ip)); /* XXX paranoid */ + bcopy((caddr_t)vifp->v_lcl_grps, (caddr_t)ip, + vifp->v_lcl_grps_n * sizeof(*ip)); + + vifp->v_lcl_grps_max = num; + if (vifp->v_lcl_grps) + free(vifp->v_lcl_grps, M_MRTABLE); + vifp->v_lcl_grps = ip; + } + + vifp->v_lcl_grps[vifp->v_lcl_grps_n++] = gcp->lgc_gaddr; + + if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) + vifp->v_cached_result = 1; + + splx(s); + return (0); +} + +/* + * Delete the the local multicast group associated with the vif + * indexed by gcp->lgc_vifi. + */ + +static int +del_lgrp(gcp) + register struct lgrplctl *gcp; +{ + register struct vif *vifp; + register int i, error, s; + + if (gcp->lgc_vifi >= numvifs) + return (EINVAL); + vifp = viftable + gcp->lgc_vifi; + if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) + return (EADDRNOTAVAIL); + + s = splnet(); + + if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) + vifp->v_cached_result = 0; + + error = EADDRNOTAVAIL; + for (i = 0; i < vifp->v_lcl_grps_n; ++i) + if (same(&gcp->lgc_gaddr, &vifp->v_lcl_grps[i])) { + error = 0; + --vifp->v_lcl_grps_n; + for (; i < vifp->v_lcl_grps_n; ++i) + vifp->v_lcl_grps[i] = vifp->v_lcl_grps[i + 1]; + error = 0; + break; + } + + splx(s); + return (error); +} + +/* + * Return 1 if gaddr is a member of the local group list for vifp. + */ +static int +grplst_member(vifp, gaddr) + register struct vif *vifp; + struct in_addr gaddr; +{ + register int i, s; + register u_long addr; + + mrtstat.mrts_grp_lookups++; + + addr = gaddr.s_addr; + if (addr == vifp->v_cached_group) + return (vifp->v_cached_result); + + mrtstat.mrts_grp_misses++; + + for (i = 0; i < vifp->v_lcl_grps_n; ++i) + if (addr == vifp->v_lcl_grps[i].s_addr) { + s = splnet(); + vifp->v_cached_group = addr; + vifp->v_cached_result = 1; + splx(s); + return (1); + } + s = splnet(); + vifp->v_cached_group = addr; + vifp->v_cached_result = 0; + splx(s); + return (0); +} + +/* + * Add an mrt entry + */ +static int +add_mrt(mrtcp) + register struct mrtctl *mrtcp; +{ + struct mrt *rt; + u_long hash; + int s; + + if (rt = mrtfind(mrtcp->mrtc_origin.s_addr)) { + /* Just update the route */ + s = splnet(); + rt->mrt_parent = mrtcp->mrtc_parent; + VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); + VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); + splx(s); + return (0); + } + + s = splnet(); + + rt = (struct mrt *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); + if (rt == NULL) { + splx(s); + return (ENOBUFS); + } + + /* + * insert new entry at head of hash chain + */ + rt->mrt_origin = mrtcp->mrtc_origin; + rt->mrt_originmask = mrtcp->mrtc_originmask; + rt->mrt_parent = mrtcp->mrtc_parent; + VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); + VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); + /* link into table */ + hash = nethash(mrtcp->mrtc_origin.s_addr); + rt->mrt_next = mrttable[hash]; + mrttable[hash] = rt; + + splx(s); + return (0); +} + +/* + * Delete an mrt entry + */ +static int +del_mrt(origin) + register struct in_addr *origin; +{ + register struct mrt *rt, *prev_rt; + register u_long hash = nethash(origin->s_addr); + register struct mrt **cmrt, **cmrtend; + register int s; + + for (prev_rt = rt = mrttable[hash]; rt; prev_rt = rt, rt = rt->mrt_next) + if (origin->s_addr == rt->mrt_origin.s_addr) + break; + if (!rt) + return (ESRCH); + + s = splnet(); + + cmrt = mrtsrchash; + cmrtend = cmrt + MSRCHASHSIZ; + for ( ; cmrt < cmrtend; ++cmrt) + if (*cmrt == rt) + *cmrt = 0; + + if (prev_rt == rt) + mrttable[hash] = rt->mrt_next; + else + prev_rt->mrt_next = rt->mrt_next; + free(rt, M_MRTABLE); + + splx(s); + return (0); +} + +/* + * IP multicast forwarding function. This function assumes that the packet + * pointed to by "ip" has arrived on (or is about to be sent to) the interface + * pointed to by "ifp", and the packet is to be relayed to other networks + * that have members of the packet's destination IP multicast group. + * + * The packet is returned unscathed to the caller, unless it is tunneled + * or erroneous, in which case a non-zero return value tells the caller to + * discard it. + */ + +#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ +#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ + +int +ip_mforward(ip, ifp, m) + register struct ip *ip; + register struct ifnet *ifp; + register struct mbuf *m; +{ + register struct mrt *rt; + register struct vif *vifp; + register int vifi; + register u_char *ipoptions; + u_long tunnel_src; + + if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || + (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { + /* + * Packet arrived via a physical interface. + */ + tunnel_src = 0; + } else { + /* + * Packet arrived through a tunnel. + * + * A tunneled packet has a single NOP option and a + * two-element loose-source-and-record-route (LSRR) + * option immediately following the fixed-size part of + * the IP header. At this point in processing, the IP + * header should contain the following IP addresses: + * + * original source - in the source address field + * destination group - in the destination address field + * remote tunnel end-point - in the first element of LSRR + * one of this host's addrs - in the second element of LSRR + * + * NOTE: RFC-1075 would have the original source and + * remote tunnel end-point addresses swapped. However, + * that could cause delivery of ICMP error messages to + * innocent applications on intermediate routing + * hosts! Therefore, we hereby change the spec. + */ + + /* + * Verify that the tunnel options are well-formed. + */ + if (ipoptions[0] != IPOPT_NOP || + ipoptions[2] != 11 || /* LSRR option length */ + ipoptions[3] != 12 || /* LSRR address pointer */ + (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { + mrtstat.mrts_bad_tunnel++; + return (1); + } + + /* + * Delete the tunnel options from the packet. + */ + ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, + (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); + m->m_len -= TUNNEL_LEN; + ip->ip_len -= TUNNEL_LEN; + ip->ip_hl -= TUNNEL_LEN >> 2; + } + + /* + * Don't forward a packet with time-to-live of zero or one, + * or a packet destined to a local-only group. + */ + if (ip->ip_ttl <= 1 || + ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) + return ((int)tunnel_src); + + /* + * Don't forward if we don't have a route for the packet's origin. + */ + MRTFIND(ip->ip_src.s_addr, rt) + if (rt == NULL) { + mrtstat.mrts_no_route++; + return ((int)tunnel_src); + } + + /* + * Don't forward if it didn't arrive from the + * parent vif for its origin. + * + * Notes: v_ifp is zero for src route tunnels, multicast_decap_if + * for encapsulated tunnels and a real ifnet for non-tunnels so + * the first part of the if catches wrong physical interface or + * tunnel type; v_rmt_addr is zero for non-tunneled packets so + * the 2nd part catches both packets that arrive via a tunnel + * that shouldn't and packets that arrive via the wrong tunnel. + */ + vifi = rt->mrt_parent; + if (viftable[vifi].v_ifp != ifp || + (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { + /* came in the wrong interface */ + ++mrtstat.mrts_wrong_if; + return (int)tunnel_src; + } + + /* + * For each vif, decide if a copy of the packet should be forwarded. + * Forward if: + * - the ttl exceeds the vif's threshold AND + * - the vif is a child in the origin's route AND + * - ( the vif is not a leaf in the origin's route OR + * the destination group has members on the vif ) + * + * (This might be speeded up with some sort of cache -- someday.) + */ + for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) { + if (ip->ip_ttl > vifp->v_threshold && + VIFM_ISSET(vifi, rt->mrt_children) && + (!VIFM_ISSET(vifi, rt->mrt_leaves) || + grplst_member(vifp, ip->ip_dst))) { + if (vifp->v_flags & VIFF_SRCRT) + srcrt_send(ip, vifp, m); + else if (vifp->v_flags & VIFF_TUNNEL) + encap_send(ip, vifp, m); + else + phyint_send(ip, vifp, m); + } + } + return ((int)tunnel_src); +} + +static void +phyint_send(ip, vifp, m) + register struct ip *ip; + register struct vif *vifp; + register struct mbuf *m; +{ + register struct mbuf *mb_copy; + register struct ip_moptions *imo; + register int error; + struct ip_moptions simo; + + mb_copy = m_copy(m, 0, M_COPYALL); + if (mb_copy == NULL) + return; + + imo = &simo; + imo->imo_multicast_ifp = vifp->v_ifp; + imo->imo_multicast_ttl = ip->ip_ttl - 1; + imo->imo_multicast_loop = 1; + + error = ip_output(mb_copy, NULL, NULL, + IP_FORWARDING|IP_MULTICASTOPTS, imo); +} + +static void +srcrt_send(ip, vifp, m) + register struct ip *ip; + register struct vif *vifp; + register struct mbuf *m; +{ + register struct mbuf *mb_copy, *mb_opts; + register struct ip *ip_copy; + register int error; + register u_char *cp; + + /* + * Make sure that adding the tunnel options won't exceed the + * maximum allowed number of option bytes. + */ + if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { + mrtstat.mrts_cant_tunnel++; + return; + } + + mb_copy = m_copy(m, 0, M_COPYALL); + if (mb_copy == NULL) + return; + ip_copy = mtod(mb_copy, struct ip *); + ip_copy->ip_ttl--; + ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ + /* + * Adjust the ip header length to account for the tunnel options. + */ + ip_copy->ip_hl += TUNNEL_LEN >> 2; + ip_copy->ip_len += TUNNEL_LEN; + MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); + if (mb_opts == NULL) { + m_freem(mb_copy); + return; + } + /* + * 'Delete' the base ip header from the mb_copy chain + */ + mb_copy->m_len -= IP_HDR_LEN; + mb_copy->m_data += IP_HDR_LEN; + /* + * Make mb_opts be the new head of the packet chain. + * Any options of the packet were left in the old packet chain head + */ + mb_opts->m_next = mb_copy; + mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; + mb_opts->m_data += MSIZE - mb_opts->m_len; + /* + * Copy the base ip header from the mb_copy chain to the new head mbuf + */ + bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); + /* + * Add the NOP and LSRR after the base ip header + */ + cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; + *cp++ = IPOPT_NOP; + *cp++ = IPOPT_LSRR; + *cp++ = 11; /* LSRR option length */ + *cp++ = 8; /* LSSR pointer to second element */ + *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ + cp += 4; + *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ + + error = ip_output(mb_opts, NULL, NULL, IP_FORWARDING, NULL); +} + +static void +encap_send(ip, vifp, m) + register struct ip *ip; + register struct vif *vifp; + register struct mbuf *m; +{ + register struct mbuf *mb_copy; + register struct ip *ip_copy; + register int i, len = ip->ip_len; + + /* + * copy the old packet & pullup it's IP header into the + * new mbuf so we can modify it. Try to fill the new + * mbuf since if we don't the ethernet driver will. + */ + MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); + if (mb_copy == NULL) + return; + mb_copy->m_data += 16; + mb_copy->m_len = sizeof(multicast_encap_iphdr); + if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { + m_freem(mb_copy); + return; + } + i = MHLEN - 16; + if (i > len) + i = len; + mb_copy = m_pullup(mb_copy, i); + if (mb_copy == NULL) + return; + + /* + * fill in the encapsulating IP header. + */ + ip_copy = mtod(mb_copy, struct ip *); + *ip_copy = multicast_encap_iphdr; + ip_copy->ip_id = htons(ip_id++); + ip_copy->ip_len += len; + ip_copy->ip_src = vifp->v_lcl_addr; + ip_copy->ip_dst = vifp->v_rmt_addr; + + /* + * turn the encapsulated IP header back into a valid one. + */ + ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); + --ip->ip_ttl; + HTONS(ip->ip_len); + HTONS(ip->ip_off); + ip->ip_sum = 0; +#if defined(LBL) && !defined(ultrix) && !defined(i386) + ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); +#else + mb_copy->m_data += sizeof(multicast_encap_iphdr); + ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); + mb_copy->m_data -= sizeof(multicast_encap_iphdr); +#endif + ip_output(mb_copy, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, (struct mbuf *)0); +} + +/* + * De-encapsulate a packet and feed it back through ip input (this + * routine is called whenever IP gets a packet with proto type + * ENCAP_PROTO and a local destination address). + */ +static void +multiencap_decap(m, hlen) + register struct mbuf *m; + int hlen; +{ + struct ifnet *ifp; + register struct ip *ip = mtod(m, struct ip *); + register int s; + register struct ifqueue *ifq; + register struct vif *vifp; + + if (ip->ip_p != ENCAP_PROTO) { + (*encap_oldrawip)(m, hlen); + return; + } + /* + * dump the packet if it's not to a multicast destination or if + * we don't have an encapsulating tunnel with the source. + * Note: This code assumes that the remote site IP address + * uniquely identifies the tunnel (i.e., that this site has + * at most one tunnel with the remote site). + */ + if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { + ++mrtstat.mrts_bad_tunnel; + m_freem(m); + return; + } + if (ip->ip_src.s_addr != last_encap_src) { + register struct vif *vife; + + vifp = viftable; + vife = vifp + numvifs; + last_encap_src = ip->ip_src.s_addr; + last_encap_vif = 0; + for ( ; vifp < vife; ++vifp) + if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { + if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) + == VIFF_TUNNEL) + last_encap_vif = vifp; + break; + } + } + if ((vifp = last_encap_vif) == 0) { + mrtstat.mrts_cant_tunnel++; /*XXX*/ + m_freem(m); + return; + } + ifp = vifp->v_ifp; + m->m_data += hlen; + m->m_len -= hlen; + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len -= hlen; + ifq = &ipintrq; + s = splimp(); + if (IF_QFULL(ifq)) { + IF_DROP(ifq); + m_freem(m); + } else { + IF_ENQUEUE(ifq, m); + /* + * normally we would need a "schednetisr(NETISR_IP)" + * here but we were called by ip_input and it is going + * to loop back & try to dequeue the packet we just + * queued as soon as we return so we avoid the + * unnecessary software interrrupt. + */ + } + splx(s); +} +#endif |