diff options
| author | Julian Elischer <julian@FreeBSD.org> | 2008-05-09 23:03:00 +0000 |
|---|---|---|
| committer | Julian Elischer <julian@FreeBSD.org> | 2008-05-09 23:03:00 +0000 |
| commit | 8b07e49a008c89a15e1fc4a1e3db6d945f81fab4 (patch) | |
| tree | 1bc85679564ad62b5790f35580ebdcc21ca90f8b /sys/net | |
| parent | a15370c6aa962e0030c1ae024292d76c112d6ea2 (diff) | |
Notes
Diffstat (limited to 'sys/net')
| -rw-r--r-- | sys/net/if.c | 9 | ||||
| -rw-r--r-- | sys/net/if_atmsubr.c | 3 | ||||
| -rw-r--r-- | sys/net/if_fwsubr.c | 2 | ||||
| -rw-r--r-- | sys/net/if_gif.c | 3 | ||||
| -rw-r--r-- | sys/net/if_gif.h | 1 | ||||
| -rw-r--r-- | sys/net/if_gre.c | 7 | ||||
| -rw-r--r-- | sys/net/if_gre.h | 1 | ||||
| -rw-r--r-- | sys/net/if_iso88025subr.c | 3 | ||||
| -rw-r--r-- | sys/net/if_stf.c | 9 | ||||
| -rw-r--r-- | sys/net/if_var.h | 2 | ||||
| -rw-r--r-- | sys/net/radix_mpath.c | 4 | ||||
| -rw-r--r-- | sys/net/radix_mpath.h | 3 | ||||
| -rw-r--r-- | sys/net/route.c | 518 | ||||
| -rw-r--r-- | sys/net/route.h | 35 | ||||
| -rw-r--r-- | sys/net/rtsock.c | 14 |
15 files changed, 465 insertions, 149 deletions
diff --git a/sys/net/if.c b/sys/net/if.c index c3c367b065c5..85306a4fcfab 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -740,11 +740,14 @@ if_detach(struct ifnet *ifp) * to this interface...oh well... */ for (i = 1; i <= AF_MAX; i++) { - if ((rnh = rt_tables[i]) == NULL) + int j; + for (j = 0; j < rt_numfibs; j++) { + if ((rnh = rt_tables[j][i]) == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); RADIX_NODE_HEAD_UNLOCK(rnh); + } } /* Announce that the interface is gone. */ @@ -1010,9 +1013,9 @@ if_rtdel(struct radix_node *rn, void *arg) if ((rt->rt_flags & RTF_UP) == 0) return (0); - err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, - (struct rtentry **) NULL); + (struct rtentry **) NULL, rt->rt_fibnum); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); } diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c index 9d1a7faf5572..15647371d7b2 100644 --- a/sys/net/if_atmsubr.c +++ b/sys/net/if_atmsubr.c @@ -158,7 +158,8 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst, * check route */ if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); + error = rt_check_fib(&rt, &rt0, + dst, rt0->rt_fibnum); if (error) goto bad; RT_UNLOCK(rt); diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c index e001c29d7562..65b2aff6e33e 100644 --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -103,7 +103,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, } if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); + error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum); if (error) goto bad; RT_UNLOCK(rt); diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 63f3c7d99011..831088163702 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -46,6 +46,7 @@ #include <sys/time.h> #include <sys/sysctl.h> #include <sys/syslog.h> +#include <sys/proc.h> #include <sys/protosw.h> #include <sys/conf.h> #include <machine/cpu.h> @@ -155,6 +156,7 @@ gif_clone_create(ifc, unit, params) struct gif_softc *sc; sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); + sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); if (GIF2IFP(sc) == NULL) { free(sc, M_GIF); @@ -441,6 +443,7 @@ gif_output(ifp, m, dst, rt) if (ifp->if_bridge) af = AF_LINK; + M_SETFIB(m, sc->gif_fibnum); /* inner AF-specific encapsulation */ /* XXX should we check if our outer source is legal? */ diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h index 8e9ceb172c9a..4e417fd16e26 100644 --- a/sys/net/if_gif.h +++ b/sys/net/if_gif.h @@ -67,6 +67,7 @@ struct gif_softc { #endif } gifsc_gifscr; int gif_flags; + u_int gif_fibnum; const struct encaptab *encap_cookie4; const struct encaptab *encap_cookie6; void *gif_netgraph; /* ng_gif(4) netgraph node info */ diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index b4b42b99cd3b..9045f06f6c84 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -58,6 +58,7 @@ #include <sys/module.h> #include <sys/mbuf.h> #include <sys/priv.h> +#include <sys/proc.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -201,6 +202,7 @@ gre_clone_create(ifc, unit, params) GRE2IFP(sc)->if_flags |= IFF_LINK0; sc->encap = NULL; sc->called = 0; + sc->gre_fibnum = curthread->td_proc->p_fibnum; sc->wccp_ver = WCCP_V1; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); @@ -395,6 +397,8 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, goto end; } + M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ + gh = mtod(m, struct greip *); if (sc->g_proto == IPPROTO_GRE) { /* we don't have any GRE flags for now */ @@ -754,6 +758,7 @@ gre_compute_route(struct gre_softc *sc) * toggle last bit, so our interface is not found, but a less * specific route. I'd rather like to specify a shorter mask, * but this is not possible. Should work though. XXX + * XXX MRT Use a different FIB for the tunnel to solve this problem. */ if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= @@ -765,7 +770,7 @@ gre_compute_route(struct gre_softc *sc) inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); #endif - rtalloc(ro); + rtalloc_fib(ro, sc->gre_fibnum); /* * check if this returned a route at all and this route is no diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index 6c8e8537c6d3..3c34beced186 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -59,6 +59,7 @@ struct gre_softc { LIST_ENTRY(gre_softc) sc_list; int gre_unit; int gre_flags; + u_int gre_fibnum; /* use this fib for envelopes */ struct in_addr g_src; /* source address of gre packets */ struct in_addr g_dst; /* destination address of gre packets */ struct route route; /* routing entry that determines, where a diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c index dd50923295b8..f56101e71cd6 100644 --- a/sys/net/if_iso88025subr.c +++ b/sys/net/if_iso88025subr.c @@ -259,7 +259,8 @@ iso88025_output(ifp, m, dst, rt0) /* Calculate routing info length based on arp table entry */ /* XXX any better way to do this ? */ if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); +/* XXX MRT *//* Guess only */ + error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum); if (error) goto bad; RT_UNLOCK(rt); diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 8f70df6d6a56..f373eaac9176 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -87,6 +87,7 @@ #include <sys/kernel.h> #include <sys/module.h> #include <sys/protosw.h> +#include <sys/proc.h> #include <sys/queue.h> #include <machine/cpu.h> @@ -136,6 +137,7 @@ struct stf_softc { struct route_in6 __sc_ro6; /* just for safety */ } __sc_ro46; #define sc_ro __sc_ro46.__sc_ro4 + u_int sc_fibnum; const struct encaptab *encap_cookie; }; #define STF2IFP(sc) ((sc)->sc_ifp) @@ -219,6 +221,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) return (ENOSPC); } ifp->if_softc = sc; + sc->sc_fibnum = curthread->td_proc->p_fibnum; /* * Set the name manually rather then using if_initname because @@ -521,7 +524,7 @@ stf_output(ifp, m, dst, rt) } if (sc->sc_ro.ro_rt == NULL) { - rtalloc(&sc->sc_ro); + rtalloc_fib(&sc->sc_ro, sc->sc_fibnum); if (sc->sc_ro.ro_rt == NULL) { m_freem(m); ifp->if_oerrors++; @@ -529,6 +532,7 @@ stf_output(ifp, m, dst, rt) } } + M_SETFIB(m, sc->sc_fibnum); ifp->if_opackets++; return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); } @@ -599,7 +603,8 @@ stf_checkaddr4(sc, in, inifp) sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = *in; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, + 0UL, sc->sc_fibnum); if (!rt || rt->rt_ifp != inifp) { #if 0 log(LOG_WARNING, "%s: packet from 0x%x dropped " diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 8fbf7293a5b7..d738e3210d16 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -690,6 +690,8 @@ struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *); struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *); struct ifaddr *ifa_ifwithnet(struct sockaddr *); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *); +struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int); + struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index d1db258043b4..b04b42a1bd1d 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -255,7 +255,7 @@ different: } void -rtalloc_mpath(struct route *ro, int hash) +rtalloc_mpath_fib(struct route *ro, int hash, u_int fibnum) { struct radix_node *rn0, *rn; int n; @@ -266,7 +266,7 @@ rtalloc_mpath(struct route *ro, int hash) */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) return; /* XXX */ - ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL); + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0UL, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h index 661aaf394661..b9224c8ecb8e 100644 --- a/sys/net/radix_mpath.h +++ b/sys/net/radix_mpath.h @@ -50,7 +50,8 @@ int rn_mpath_count(struct radix_node *); struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *); int rt_mpath_conflict(struct radix_node_head *, struct rtentry *, struct sockaddr *); -void rtalloc_mpath(struct route *, int); +void rtalloc_mpath_fib(struct route *, int, u_int); +#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0) struct radix_node *rn_mpath_lookup(void *, void *, struct radix_node_head *); int rt_mpath_deldup(struct rtentry *, struct rtentry *); diff --git a/sys/net/route.c b/sys/net/route.c index d55c2f8ed4c0..3ae5dbcd9d19 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -29,8 +29,13 @@ * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ +/************************************************************************ + * Note: In this file a 'fib' is a "forwarding information base" * + * Which is the new name for an in kernel routing (next hop) table. * + ***********************************************************************/ #include "opt_inet.h" +#include "opt_route.h" #include "opt_mrouting.h" #include "opt_mpath.h" @@ -39,6 +44,9 @@ #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/sysproto.h> +#include <sys/proc.h> #include <sys/domain.h> #include <sys/kernel.h> @@ -54,14 +62,45 @@ #include <vm/uma.h> +#ifndef ROUTETABLES + #define RT_NUMFIBS 1 + #define RT_MAXFIBS 1 +#else + /* while we use 4 bits in the mbuf flags, + * we are limited to 16 + */ + #if ROUTETABLES > RT_MAXFIBS + #define RT_NUMFIBS RT_MAXFIBS + #error "ROUTETABLES defined too big" + #else + #if ROUTETABLES == 0 + #define RT_NUMFIBS 1 + #else + #define RT_NUMFIBS ROUTETABLES + #endif + #endif +#endif + +u_int rt_numfibs = RT_NUMFIBS; +SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); +/* Eventually this will be a tunable */ +TUNABLE_INT("net.fibs", &rt_numfibs); + static struct rtstat rtstat; -struct radix_node_head *rt_tables[AF_MAX+1]; + +/* by default only the first 'row' of tables will be accessed. */ +/* + * XXXMRT When we fix netstat, and do this differnetly, + * we can allocate this dynamically. As long as we are keeping + * things backwards compaitble we need to allocate this + * statically. + */ +struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1]; static int rttrash; /* routes not in table but not freed */ static void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); -static void rtable_init(void **); /* compare two sockaddr structures */ #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) @@ -78,25 +117,83 @@ static void rtable_init(void **); */ #define RNTORT(p) ((struct rtentry *)(p)) -static void -rtable_init(void **table) +static uma_zone_t rtzone; /* Routing table UMA zone. */ + +#if 0 +/* default fib for tunnels to use */ +u_int tunnel_fib = 0; +SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, ""); +#endif + +/* + * handler for net.my_fibnum + */ +static int +sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { - struct domain *dom; - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_rtattach) - dom->dom_rtattach(&table[dom->dom_family], - dom->dom_rtoffset); + int fibnum; + int error; + + fibnum = curthread->td_proc->p_fibnum; + error = sysctl_handle_int(oidp, &fibnum, 0, req); + return (error); } -static uma_zone_t rtzone; /* Routing table UMA zone. */ +SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, + NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static void route_init(void) { + int table; + struct domain *dom; + int fam; + + /* whack teh tunable ints into line. */ + if (rt_numfibs > RT_MAXFIBS) + rt_numfibs = RT_MAXFIBS; + if (rt_numfibs == 0) + rt_numfibs = 1; rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rn_init(); /* initialize all zeroes, all ones, mask table */ - rtable_init((void **)rt_tables); + + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_rtattach) { + for (table = 0; table < rt_numfibs; table++) { + if ( (fam = dom->dom_family) == AF_INET || + table == 0) { + /* for now only AF_INET has > 1 table */ + /* XXX MRT + * rtattach will be also called + * from vfs_export.c but the + * offset will be 0 + * (only for AF_INET and AF_INET6 + * which don't need it anyhow) + */ + dom->dom_rtattach( + (void **)&rt_tables[table][fam], + dom->dom_rtoffset); + } else { + break; + } + } + } + } +} + +#ifndef _SYS_SYSPROTO_H_ +struct setfib_args { + int fibnum; +}; +#endif +int +setfib(struct thread *td, struct setfib_args *uap) +{ + if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) + return EINVAL; + td->td_proc->p_fibnum = uap->fibnum; + return (0); } /* @@ -105,7 +202,13 @@ route_init(void) void rtalloc(struct route *ro) { - rtalloc_ign(ro, 0UL); + rtalloc_ign_fib(ro, 0UL, 0); +} + +void +rtalloc_fib(struct route *ro, u_int fibnum) +{ + rtalloc_ign_fib(ro, 0UL, fibnum); } void @@ -119,7 +222,23 @@ rtalloc_ign(struct route *ro, u_long ignore) RTFREE(rt); ro->ro_rt = NULL; } - ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore); + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0); + if (ro->ro_rt) + RT_UNLOCK(ro->ro_rt); +} + +void +rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) +{ + struct rtentry *rt; + + if ((rt = ro->ro_rt) != NULL) { + if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) + return; + RTFREE(rt); + ro->ro_rt = NULL; + } + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } @@ -133,7 +252,14 @@ rtalloc_ign(struct route *ro, u_long ignore) struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { - struct radix_node_head *rnh = rt_tables[dst->sa_family]; + return (rtalloc1_fib(dst, report, ignflags, 0)); +} + +struct rtentry * +rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, + u_int fibnum) +{ + struct radix_node_head *rnh; struct rtentry *rt; struct radix_node *rn; struct rtentry *newrt; @@ -141,6 +267,10 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags) u_long nflags; int err = 0, msgtype = RTM_MISS; + KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); + if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ + fibnum = 0; + rnh = rt_tables[fibnum][dst->sa_family]; newrt = NULL; /* * Look up the address in the table for that Address Family @@ -164,8 +294,8 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags) * If it requires that it be cloned, do so. * (This implies it wasn't a HOST route.) */ - err = rtrequest(RTM_RESOLVE, dst, NULL, - NULL, 0, &newrt); + err = rtrequest_fib(RTM_RESOLVE, dst, NULL, + NULL, 0, &newrt, fibnum); if (err) { /* * If the cloning didn't succeed, maybe @@ -237,7 +367,7 @@ rtfree(struct rtentry *rt) struct radix_node_head *rnh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); - rnh = rt_tables[rt_key(rt)->sa_family]; + rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); RT_LOCK_ASSERT(rt); @@ -323,6 +453,17 @@ rtredirect(struct sockaddr *dst, int flags, struct sockaddr *src) { + rtredirect_fib(dst, gateway, netmask, flags, src, 0); +} + +void +rtredirect_fib(struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct sockaddr *src, + u_int fibnum) +{ struct rtentry *rt, *rt0 = NULL; int error = 0; short *stat = NULL; @@ -334,7 +475,7 @@ rtredirect(struct sockaddr *dst, error = ENETUNREACH; goto out; } - rt = rtalloc1(dst, 0, 0UL); /* NB: rt is locked */ + rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, @@ -377,7 +518,7 @@ rtredirect(struct sockaddr *dst, info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; - error = rtrequest1(RTM_ADD, &info, &rt); + error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); @@ -423,11 +564,17 @@ out: rt_missmsg(RTM_REDIRECT, &info, flags, error); } +int +rtioctl(u_long req, caddr_t data) +{ + return (rtioctl_fib(req, data, 0)); +} + /* * Routing table ioctl interface. */ int -rtioctl(u_long req, caddr_t data) +rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* @@ -438,7 +585,7 @@ rtioctl(u_long req, caddr_t data) */ #ifdef INET /* Multicast goop, grrr... */ - return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP; + return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ @@ -447,6 +594,13 @@ rtioctl(u_long req, caddr_t data) struct ifaddr * ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) { + return (ifa_ifwithroute_fib(flags, dst, gateway, 0)); +} + +struct ifaddr * +ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, + u_int fibnum) +{ register struct ifaddr *ifa; int not_found = 0; @@ -474,7 +628,7 @@ ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) if (ifa == NULL) ifa = ifa_ifwithnet(gateway); if (ifa == NULL) { - struct rtentry *rt = rtalloc1(gateway, 0, 0UL); + struct rtentry *rt = rtalloc1_fib(gateway, 0, 0UL, fibnum); if (rt == NULL) return (NULL); /* @@ -529,6 +683,18 @@ rtrequest(int req, int flags, struct rtentry **ret_nrt) { + return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0)); +} + +int +rtrequest_fib(int req, + struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct rtentry **ret_nrt, + u_int fibnum) +{ struct rt_addrinfo info; if (dst->sa_len == 0) @@ -539,7 +705,7 @@ rtrequest(int req, info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; - return rtrequest1(req, &info, ret_nrt); + return rtrequest1_fib(req, &info, ret_nrt, fibnum); } /* @@ -556,6 +722,12 @@ rtrequest(int req, int rt_getifa(struct rt_addrinfo *info) { + return (rt_getifa_fib(info, 0)); +} + +int +rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) +{ struct ifaddr *ifa; int error = 0; @@ -577,9 +749,11 @@ rt_getifa(struct rt_addrinfo *info) if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) - info->rti_ifa = ifa_ifwithroute(flags, dst, gateway); + info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, + fibnum); else if (sa != NULL) - info->rti_ifa = ifa_ifwithroute(flags, sa, sa); + info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, + fibnum); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) @@ -613,7 +787,7 @@ rtexpunge(struct rtentry *rt) /* * Find the correct routing tree to use for this Address Family */ - rnh = rt_tables[rt_key(rt)->sa_family]; + rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; if (rnh == NULL) return (EAFNOSUPPORT); @@ -680,6 +854,13 @@ bad: int rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) { + return (rtrequest1_fib(req, info, ret_nrt, 0)); +} + +int +rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, + u_int fibnum) +{ int error = 0; register struct rtentry *rt; register struct radix_node *rn; @@ -688,10 +869,13 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) struct sockaddr *ndst; #define senderr(x) { error = x ; goto bad; } + KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); + if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ + fibnum = 0; /* * Find the correct routing tree to use for this Address Family */ - rnh = rt_tables[dst->sa_family]; + rnh = rt_tables[fibnum][dst->sa_family]; if (rnh == NULL) return (EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK(rnh); @@ -848,7 +1032,7 @@ deldone: (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) senderr(EINVAL); - if (info->rti_ifa == NULL && (error = rt_getifa(info))) + if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum))) senderr(error); ifa = info->rti_ifa; @@ -858,6 +1042,7 @@ deldone: senderr(ENOBUFS); RT_LOCK_INIT(rt); rt->rt_flags = RTF_UP | flags; + rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it * also add the rt_gwroute if possible. @@ -918,7 +1103,7 @@ deldone: * then we just blow it away and retry the insertion * of the new one. */ - rt2 = rtalloc1(dst, 0, 0); + rt2 = rtalloc1_fib(dst, 0, 0, fibnum); if (rt2 && rt2->rt_parent) { rtexpunge(rt2); RT_UNLOCK(rt2); @@ -1034,8 +1219,8 @@ rt_fixdelete(struct radix_node *rn, void *vp) if (rt->rt_parent == rt0 && !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) { - return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), - rt->rt_flags, NULL); + return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), + rt->rt_flags, NULL, rt->rt_fibnum); } return 0; } @@ -1099,15 +1284,15 @@ rt_fixchange(struct radix_node *rn, void *vp) * changed/added under the node's mask. So, get rid of it. */ delete_rt: - return rtrequest(RTM_DELETE, rt_key(rt), NULL, - rt_mask(rt), rt->rt_flags, NULL); + return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, + rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum); } int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { /* XXX dst may be overwritten, can we move this to below */ - struct radix_node_head *rnh = rt_tables[dst->sa_family]; + struct radix_node_head *rnh = rt_tables[rt->rt_fibnum][dst->sa_family]; int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); again: @@ -1138,7 +1323,7 @@ again: struct rtentry *gwrt; RT_UNLOCK(rt); /* XXX workaround LOR */ - gwrt = rtalloc1(gate, 1, 0); + gwrt = rtalloc1_fib(gate, 1, 0, rt->rt_fibnum); if (gwrt == rt) { RT_REMREF(rt); return (EADDRINUSE); /* failure */ @@ -1243,15 +1428,19 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma * Set up a routing table entry, normally * for an interface. */ -int -rtinit(struct ifaddr *ifa, int cmd, int flags) +#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ +static inline int +rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { struct sockaddr *dst; struct sockaddr *netmask; - struct mbuf *m = NULL; struct rtentry *rt = NULL; struct rt_addrinfo info; - int error=0; + int error = 0; + int startfib, endfib; + char tempbuf[_SOCKADDR_TMPSIZE]; + int didwork = 0; + int a_failure = 0; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; @@ -1260,126 +1449,190 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } + if ( dst->sa_family != AF_INET) + fibnum = 0; + if (fibnum == -1) { + startfib = 0; + endfib = rt_numfibs - 1; + } else { + KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); + startfib = fibnum; + endfib = fibnum; + } if (dst->sa_len == 0) return(EINVAL); /* - * If it's a delete, check that if it exists, it's on the correct - * interface or we might scrub a route to another ifa which would + * If it's a delete, check that if it exists, + * it's on the correct interface or we might scrub + * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { - struct sockaddr *deldst; - struct radix_node_head *rnh; - struct radix_node *rn; - /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) + * XXX this is kinda inet specific.. */ if (netmask != NULL) { - m = m_get(M_DONTWAIT, MT_SONAME); - if (m == NULL) - return(ENOBUFS); - deldst = mtod(m, struct sockaddr *); - rt_maskedcopy(dst, deldst, netmask); - dst = deldst; + rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); + dst = (struct sockaddr *)tempbuf; } - /* - * Look up an rtentry that is in the routing tree and - * contains the correct info. - */ - if ((rnh = rt_tables[dst->sa_family]) == NULL) - goto bad; - RADIX_NODE_HEAD_LOCK(rnh); + } + /* + * Now go through all the requested tables (fibs) and do the + * requested action. Realistically, this will either be fib 0 + * for protocols that don't do multiple tables or all the + * tables for those that do. XXX For this version only AF_INET. + * When that changes code should be refactored to protocol + * independent parts and protocol dependent parts. + */ + for ( fibnum = startfib; fibnum <= endfib; fibnum++) { + if (cmd == RTM_DELETE) { + struct radix_node_head *rnh; + struct radix_node *rn; + /* + * Look up an rtentry that is in the routing tree and + * contains the correct info. + */ + if ((rnh = rt_tables[fibnum][dst->sa_family]) == NULL) + /* this table doesn't exist but others might */ + continue; + RADIX_NODE_HEAD_LOCK(rnh); #ifdef RADIX_MPATH - if (rn_mpath_capable(rnh)) { + if (rn_mpath_capable(rnh)) { - rn = rnh->rnh_matchaddr(dst, rnh); - if (rn == NULL) - error = ESRCH; - else { - rt = RNTORT(rn); - /* - * for interface route the rt->rt_gateway is - * sockaddr_intf for cloning ARP entries, so - * rt_mpath_matchgate must use the interface - * address - */ - rt = rt_mpath_matchgate(rt, ifa->ifa_addr); - if (!rt) + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) error = ESRCH; + else { + rt = RNTORT(rn); + /* + * for interface route the + * rt->rt_gateway is sockaddr_intf + * for cloning ARP entries, so + * rt_mpath_matchgate must use the + * interface address + */ + rt = rt_mpath_matchgate(rt, + ifa->ifa_addr); + if (!rt) + error = ESRCH; + } } - } - else + else #endif - error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL || - (rn->rn_flags & RNF_ROOT) || - RNTORT(rn)->rt_ifa != ifa || - !sa_equal((struct sockaddr *)rn->rn_key, dst)); - - RADIX_NODE_HEAD_UNLOCK(rnh); - if (error) { -bad: - if (m) - (void) m_free(m); - return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + rn = rnh->rnh_lookup(dst, netmask, rnh); + error = (rn == NULL || + (rn->rn_flags & RNF_ROOT) || + RNTORT(rn)->rt_ifa != ifa || + !sa_equal((struct sockaddr *)rn->rn_key, dst)); + RADIX_NODE_HEAD_UNLOCK(rnh); + if (error) { + /* this is only an error if bad on ALL tables */ + continue; + } } - } - /* - * Do the actual request - */ - bzero((caddr_t)&info, sizeof(info)); - info.rti_ifa = ifa; - info.rti_flags = flags | ifa->ifa_flags; - info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; - info.rti_info[RTAX_NETMASK] = netmask; - error = rtrequest1(cmd, &info, &rt); - if (error == 0 && rt != NULL) { /* - * notify any listening routing agents of the change + * Do the actual request */ - RT_LOCK(rt); + bzero((caddr_t)&info, sizeof(info)); + info.rti_ifa = ifa; + info.rti_flags = flags | ifa->ifa_flags; + info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + info.rti_info[RTAX_NETMASK] = netmask; + error = rtrequest1_fib(cmd, &info, &rt, fibnum); + if (error == 0 && rt != NULL) { + /* + * notify any listening routing agents of the change + */ + RT_LOCK(rt); #ifdef RADIX_MPATH - /* - * in case address alias finds the first address - * e.g. ifconfig bge0 192.103.54.246/24 - * e.g. ifconfig bge0 192.103.54.247/24 - * the address set in the route is 192.103.54.246 - * so we need to replace it with 192.103.54.247 - */ - if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { - IFAFREE(rt->rt_ifa); - IFAREF(ifa); - rt->rt_ifp = ifa->ifa_ifp; - rt->rt_ifa = ifa; - } -#endif - rt_newaddrmsg(cmd, ifa, error, rt); - if (cmd == RTM_DELETE) { /* - * If we are deleting, and we found an entry, then - * it's been removed from the tree.. now throw it away. + * in case address alias finds the first address + * e.g. ifconfig bge0 192.103.54.246/24 + * e.g. ifconfig bge0 192.103.54.247/24 + * the address set in the route is 192.103.54.246 + * so we need to replace it with 192.103.54.247 */ - RTFREE_LOCKED(rt); - } else { - if (cmd == RTM_ADD) { + if (memcmp(rt->rt_ifa->ifa_addr, + ifa->ifa_addr, ifa->ifa_addr->sa_len)) { + IFAFREE(rt->rt_ifa); + IFAREF(ifa); + rt->rt_ifp = ifa->ifa_ifp; + rt->rt_ifa = ifa; + } +#endif + rt_newaddrmsg(cmd, ifa, error, rt); + if (cmd == RTM_DELETE) { /* - * We just wanted to add it.. we don't actually - * need a reference. + * If we are deleting, and we found an entry, + * then it's been removed from the tree.. + * now throw it away. */ - RT_REMREF(rt); + RTFREE_LOCKED(rt); + } else { + if (cmd == RTM_ADD) { + /* + * We just wanted to add it.. + * we don't actually need a reference. + */ + RT_REMREF(rt); + } + RT_UNLOCK(rt); } - RT_UNLOCK(rt); + didwork = 1; + } + if (error) + a_failure = error; + } + if (cmd == RTM_DELETE) { + if (didwork) { + error = 0; + } else { + /* we only give an error if it wasn't in any table */ + error = ((flags & RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH); + } + } else { + if (a_failure) { + /* return an error if any of them failed */ + error = a_failure; } } - if (m) - (void) m_free(m); return (error); } +/* special one for inet internal use. may not use. */ +int +rtinit_fib(struct ifaddr *ifa, int cmd, int flags) +{ + return (rtinit1(ifa, cmd, flags, -1)); +} + +/* + * Set up a routing table entry, normally + * for an interface. + */ +int +rtinit(struct ifaddr *ifa, int cmd, int flags) +{ + struct sockaddr *dst; + int fib = 0; + + if (flags & RTF_HOST) { + dst = ifa->ifa_dstaddr; + } else { + dst = ifa->ifa_addr; + } + + if (dst->sa_family == AF_INET) + fib = -1; + return (rtinit1(ifa, cmd, flags, fib)); +} + /* * rt_check() is invoked on each layer 2 output path, prior to * encapsulating outbound packets. @@ -1399,6 +1652,7 @@ bad: * final destination if directly reachable); * *lrt0 points to the cached route to the final destination; * *lrt is not meaningful; + * fibnum is the index to the correct network fib for this packet * * === Operation === * If the route is marked down try to find a new route. If the route @@ -1415,6 +1669,13 @@ bad: int rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) { + return (rt_check_fib(lrt, lrt0, dst, 0)); +} + +int +rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst, + u_int fibnum) +{ struct rtentry *rt; struct rtentry *rt0; int error; @@ -1426,7 +1687,7 @@ rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) RT_LOCK(rt); if ((rt->rt_flags & RTF_UP) == 0) { RT_UNLOCK(rt); - rt = rtalloc1(dst, 1, 0UL); + rt = rtalloc1_fib(dst, 1, 0UL, fibnum); if (rt != NULL) { RT_REMREF(rt); /* XXX what about if change? */ @@ -1446,7 +1707,8 @@ rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) rt0->rt_gwroute = NULL; lookup: RT_UNLOCK(rt0); - rt = rtalloc1(rt->rt_gateway, 1, 0UL); +/* XXX MRT link level looked up in table 0 */ + rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0); if (rt == rt0) { RT_REMREF(rt0); RT_UNLOCK(rt0); diff --git a/sys/net/route.h b/sys/net/route.h index e9f4980090f1..7b8c4606613a 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -82,6 +82,10 @@ struct rt_metrics { #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) +#define RT_MAXFIBS 16 +extern u_int rt_numfibs; /* number fo usable routing tables */ +extern u_int tunnel_fib; /* tunnels use these */ +extern u_int fwd_fib; /* packets being forwarded use these routes */ /* * XXX kernel function pointer `rt_output' is visible to applications. */ @@ -120,6 +124,7 @@ struct rtentry { caddr_t rt_llinfo; /* pointer to link level info cache */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ struct rtentry *rt_parent; /* cloning parent of this route */ + u_int rt_fibnum; /* which FIB */ #ifdef _KERNEL /* XXX ugly, user apps use this definition but don't have a mtx def */ struct mtx rt_mtx; /* mutex for routing entry */ @@ -325,11 +330,10 @@ struct rt_addrinfo { RTFREE_LOCKED(_rt); \ } while (0) -extern struct radix_node_head *rt_tables[AF_MAX+1]; +extern struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1]; struct ifmultiaddr; -int rt_getifa(struct rt_addrinfo *); void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); @@ -350,11 +354,15 @@ int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); * RTFREE() uses an unlocked entry. */ +int rtexpunge(struct rtentry *); +void rtfree(struct rtentry *); + +/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ +/* Thes are used by old code not yet converted to use multiple FIBS */ +int rt_getifa(struct rt_addrinfo *); void rtalloc_ign(struct route *ro, u_long ignflags); void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ struct rtentry *rtalloc1(struct sockaddr *, int, u_long); -int rtexpunge(struct rtentry *); -void rtfree(struct rtentry *); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, @@ -364,6 +372,25 @@ int rtrequest(int, struct sockaddr *, int rtrequest1(int, struct rt_addrinfo *, struct rtentry **); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); +/* defaults to "all" FIBs */ +int rtinit_fib(struct ifaddr *, int, int); + +/* XXX MRT NEW VERSIONS THAT USE FIBs + * For now the protocol indepedent versions are the same as the AF_INET ones + * but this will change.. + */ +int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); +void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); +void rtalloc_fib(struct route *ro, u_int fibnum); +struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); +int rtioctl_fib(u_long, caddr_t, u_int); +void rtredirect_fib(struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct sockaddr *, u_int); +int rtrequest_fib(int, struct sockaddr *, + struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); +int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); +int rt_check_fib(struct rtentry **, struct rtentry **, struct sockaddr *, u_int); + #include <sys/eventhandler.h> typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *); typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 5ea93d37aaba..9511035acab7 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -182,6 +182,7 @@ rts_attach(struct socket *so, int proto, struct thread *td) */ s = splnet(); so->so_pcb = (caddr_t)rp; + so->so_fibnum = td->td_proc->p_fibnum; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { @@ -387,7 +388,8 @@ route_output(struct mbuf *m, struct socket *so) if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; - error = rtrequest1(RTM_ADD, &info, &saved_nrt); + error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt, + so->so_fibnum); if (error == 0 && saved_nrt) { RT_LOCK(saved_nrt); rt_setmetrics(rtm->rtm_inits, @@ -401,7 +403,8 @@ route_output(struct mbuf *m, struct socket *so) case RTM_DELETE: saved_nrt = NULL; - error = rtrequest1(RTM_DELETE, &info, &saved_nrt); + error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, + so->so_fibnum); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; @@ -412,7 +415,7 @@ route_output(struct mbuf *m, struct socket *so) case RTM_GET: case RTM_CHANGE: case RTM_LOCK: - rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; + rnh = rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family]; if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK(rnh); @@ -530,7 +533,8 @@ route_output(struct mbuf *m, struct socket *so) !sa_equal(info.rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { RT_UNLOCK(rt); - if ((error = rt_getifa(&info)) != 0) + if ((error = rt_getifa_fib(&info, + rt->rt_fibnum)) != 0) senderr(error); RT_LOCK(rt); } @@ -1278,7 +1282,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) } else /* dump only one table */ i = lim = af; for (error = 0; error == 0 && i <= lim; i++) - if ((rnh = rt_tables[i]) != NULL) { + if ((rnh = rt_tables[curthread->td_proc->p_fibnum][i]) != NULL) { RADIX_NODE_HEAD_LOCK(rnh); error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); |
