diff options
Diffstat (limited to 'sys/netinet6')
| -rw-r--r-- | sys/netinet6/icmp6.c | 21 | ||||
| -rw-r--r-- | sys/netinet6/in6.c | 446 | ||||
| -rw-r--r-- | sys/netinet6/in6_rmx.c | 38 | ||||
| -rw-r--r-- | sys/netinet6/in6_src.c | 65 | ||||
| -rw-r--r-- | sys/netinet6/in6_var.h | 4 | ||||
| -rw-r--r-- | sys/netinet6/ip6_input.c | 23 | ||||
| -rw-r--r-- | sys/netinet6/ip6_output.c | 2 | ||||
| -rw-r--r-- | sys/netinet6/ip6_var.h | 2 | ||||
| -rw-r--r-- | sys/netinet6/nd6.c | 1041 | ||||
| -rw-r--r-- | sys/netinet6/nd6.h | 38 | ||||
| -rw-r--r-- | sys/netinet6/nd6_nbr.c | 98 | ||||
| -rw-r--r-- | sys/netinet6/nd6_rtr.c | 67 | ||||
| -rw-r--r-- | sys/netinet6/vinet6.h | 1 |
13 files changed, 846 insertions, 1000 deletions
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 59bc95acdd19..229aaa2e8b40 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -85,6 +85,7 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_dl.h> +#include <net/if_llatbl.h> #include <net/if_types.h> #include <net/route.h> #include <net/vnet.h> @@ -2573,32 +2574,32 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { /* target lladdr option */ - struct rtentry *rt_router = NULL; int len; - struct sockaddr_dl *sdl; + struct llentry *ln; struct nd_opt_hdr *nd_opt; char *lladdr; - rt_router = nd6_lookup(router_ll6, 0, ifp); - if (!rt_router) + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(router_ll6, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + if (!ln) goto nolladdropt; + len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; - if (!(rt_router->rt_flags & RTF_GATEWAY) && - (rt_router->rt_flags & RTF_LLINFO) && - (rt_router->rt_gateway->sa_family == AF_LINK) && - (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) && - sdl->sdl_alen) { + + if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); - bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen); + bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } + LLE_RUNLOCK(ln); } nolladdropt:; diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index c78484542037..81272811ad8c 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> +#include <net/if_llatbl.h> #include <netinet/if_ether.h> #include <netinet/in_systm.h> #include <netinet/ip.h> @@ -135,152 +136,7 @@ static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); struct in6_multihead in6_multihead; /* XXX BSS initialization */ int (*faithprefix_p)(struct in6_addr *); -/* - * Subroutine for in6_ifaddloop() and in6_ifremloop(). - * This routine does actual work. - */ -static void -in6_ifloop_request(int cmd, struct ifaddr *ifa) -{ - struct sockaddr_in6 all1_sa; - struct rtentry *nrt = NULL; - int e; - char ip6buf[INET6_ADDRSTRLEN]; - - bzero(&all1_sa, sizeof(all1_sa)); - all1_sa.sin6_family = AF_INET6; - all1_sa.sin6_len = sizeof(struct sockaddr_in6); - all1_sa.sin6_addr = in6mask128; - - /* - * We specify the address itself as the gateway, and set the - * RTF_LLINFO flag, so that the corresponding host route would have - * the flag, and thus applications that assume traditional behavior - * would be happy. Note that we assume the caller of the function - * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest, - * which changes the outgoing interface to the loopback interface. - */ - e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr, - (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); - if (e != 0) { - /* XXX need more descriptive message */ - - log(LOG_ERR, "in6_ifloop_request: " - "%s operation failed for %s (errno=%d)\n", - cmd == RTM_ADD ? "ADD" : "DELETE", - ip6_sprintf(ip6buf, - &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e); - } - - /* - * Report the addition/removal of the address to the routing socket. - * XXX: since we called rtinit for a p2p interface with a destination, - * we end up reporting twice in such a case. Should we rather - * omit the second report? - */ - if (nrt) { - RT_LOCK(nrt); - /* - * Make sure rt_ifa be equal to IFA, the second argument of - * the function. We need this because when we refer to - * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa - * points to the address instead of the loopback address. - */ - if (cmd == RTM_ADD && ifa != nrt->rt_ifa) { - IFAFREE(nrt->rt_ifa); - IFAREF(ifa); - nrt->rt_ifa = ifa; - } - - rt_newaddrmsg(cmd, ifa, e, nrt); - if (cmd == RTM_DELETE) - RTFREE_LOCKED(nrt); - else { - /* the cmd must be RTM_ADD here */ - RT_REMREF(nrt); - RT_UNLOCK(nrt); - } - } -} - -/* - * Add ownaddr as loopback rtentry. We previously add the route only if - * necessary (ex. on a p2p link). However, since we now manage addresses - * separately from prefixes, we should always add the route. We can't - * rely on the cloning mechanism from the corresponding interface route - * any more. - */ -void -in6_ifaddloop(struct ifaddr *ifa) -{ - struct rtentry *rt; - int need_loop; - - /* If there is no loopback entry, allocate one. */ - rt = rtalloc1(ifa->ifa_addr, 0, 0); - need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || - (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0); - if (rt) - RTFREE_LOCKED(rt); - if (need_loop) - in6_ifloop_request(RTM_ADD, ifa); -} - -/* - * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(), - * if it exists. - */ -void -in6_ifremloop(struct ifaddr *ifa) -{ - INIT_VNET_INET6(curvnet); - struct in6_ifaddr *ia; - struct rtentry *rt; - int ia_count = 0; - - /* - * Some of BSD variants do not remove cloned routes - * from an interface direct route, when removing the direct route - * (see comments in net/net_osdep.h). Even for variants that do remove - * cloned routes, they could fail to remove the cloned routes when - * we handle multple addresses that share a common prefix. - * So, we should remove the route corresponding to the deleted address. - */ - /* - * Delete the entry only if exact one ifa exists. More than one ifa - * can exist if we assign a same single address to multiple - * (probably p2p) interfaces. - * XXX: we should avoid such a configuration in IPv6... - */ - for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) { - if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) { - ia_count++; - if (ia_count > 1) - break; - } - } - - if (ia_count == 1) { - /* - * Before deleting, check if a corresponding loopbacked host - * route surely exists. With this check, we can avoid to - * delete an interface direct route whose destination is same - * as the address being removed. This can happen when removing - * a subnet-router anycast address on an interface attahced - * to a shared medium. - */ - rt = rtalloc1(ifa->ifa_addr, 0, 0); - if (rt != NULL) { - if ((rt->rt_flags & RTF_HOST) != 0 && - (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { - RTFREE_LOCKED(rt); - in6_ifloop_request(RTM_DELETE, ifa); - } else - RT_UNLOCK(rt); - } - } -} int in6_mask2len(struct in6_addr *mask, u_char *lim0) @@ -1131,10 +987,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, } } if (!rt) { - /* XXX: we need RTF_CLONING to fake nd6_rtrequest */ error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, + (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0); if (error) goto cleanup; @@ -1208,7 +1063,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, if (!rt) { error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, + (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0); if (error) goto cleanup; @@ -1287,34 +1142,16 @@ in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; - char ip6buf[INET6_ADDRSTRLEN]; struct in6_multi_mship *imm; /* stop DAD processing */ nd6_dad_stop(ifa); - /* - * delete route to the destination of the address being purged. - * The interface must be p2p or loopback in this case. - */ - if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) { - int e; - - if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) - != 0) { - log(LOG_ERR, "in6_purgeaddr: failed to remove " - "a route to the p2p destination: %s on %s, " - "errno=%d\n", - ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), - if_name(ifp), e); - /* proceed anyway... */ - } else - ia->ia_flags &= ~IFA_ROUTE; - } - - /* Remove ownaddr's loopback rtentry, if it exists. */ - in6_ifremloop(&(ia->ia_ifa)); - + IF_AFDATA_LOCK(ifp); + lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + /* * leave from multicast groups we have joined for the interface */ @@ -1688,26 +1525,15 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, /* we could do in(6)_socktrim here, but just omit it at this moment. */ - if (newhost) { - /* - * set the rtrequest function to create llinfo. It also - * adjust outgoing interface of the route for the local - * address when called via in6_ifaddloop() below. - */ - ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; - } - /* * Special case: * If a new destination address is specified for a point-to-point * interface, install a route to the destination as an interface - * direct route. In addition, if the link is expected to have neighbor - * cache entries, specify RTF_LLINFO so that a cache entry for the - * destination address will be created. - * created + * direct route. * XXX: the logic below rejects assigning multiple addresses on a p2p * interface that share the same destination. */ +#if 0 /* QL - verify */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { @@ -1715,7 +1541,6 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, struct rtentry *rt = NULL, **rtp = NULL; if (nd6_need_cache(ifp) != 0) { - rtflags |= RTF_LLINFO; rtp = &rt; } @@ -1744,16 +1569,36 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, } ia->ia_flags |= IFA_ROUTE; } - if (plen < 128) { - /* - * The RTF_CLONING flag is necessary for in6_is_ifloop_auto(). - */ - ia->ia_ifa.ifa_flags |= RTF_CLONING; +#else + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ + if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && + ia->ia_dstaddr.sin6_family == AF_INET6) { + if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, + RTF_UP | RTF_HOST)) != 0) + return (error); + ia->ia_flags |= IFA_ROUTE; } +#endif /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ - if (newhost) - in6_ifaddloop(&(ia->ia_ifa)); + if (newhost) { + struct llentry *ln; + + IF_AFDATA_LOCK(ifp); + ia->ia_ifa.ifa_rtrequest = NULL; + + /* XXX QL + * we need to report rt_newaddrmsg + */ + ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + if (ln) { + ln->la_expire = 0; /* for IPv6 this means permanent */ + ln->ln_state = ND6_LLINFO_REACHABLE; + LLE_WUNLOCK(ln); + } + } return (error); } @@ -2237,6 +2082,214 @@ in6_if2idlen(struct ifnet *ifp) } } +#include <sys/sysctl.h> + +struct in6_llentry { + struct llentry base; + struct sockaddr_in6 l3_addr6; +}; + +static struct llentry * +in6_lltable_new(const struct sockaddr *l3addr, u_int flags) +{ + struct in6_llentry *lle; + + lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, + M_DONTWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE); + lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; + lle->base.lle_refcnt = 1; + LLE_LOCK_INIT(&lle->base); + return &lle->base; +} + +/* + * Deletes an address from the address table. + * This function is called by the timer functions + * such as arptimer() and nd6_llinfo_timer(), and + * the caller does the locking. + */ +static void +in6_lltable_free(struct lltable *llt, struct llentry *lle) +{ + free(lle, M_LLTABLE); +} + +static int +in6_lltable_rtcheck(struct ifnet *ifp, const struct sockaddr *l3addr) +{ + struct rtentry *rt; + char ip6buf[INET6_ADDRSTRLEN]; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* XXX rtalloc1 should take a const param */ + rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + struct ifaddr *ifa; + /* + * Create an ND6 cache for an IPv6 neighbor + * that is not covered by our own prefix. + */ + /* XXX ifaof_ifpforaddr should take a const param */ + ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); + if (ifa != NULL) { + if (rt != NULL) + rtfree(rt); + return 0; + } + log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", + ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); + if (rt != NULL) + rtfree(rt); + return EINVAL; + } + rtfree(rt); + return 0; +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + struct llentries *lleh; + u_int hashkey; + + IF_AFDATA_LOCK_ASSERT(ifp); + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + hashkey = sin6->sin6_addr.s6_addr32[3]; + lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (bcmp(L3_ADDR(lle), l3addr, l3addr->sa_len) == 0) + break; + } + + if (lle == NULL) { + if (!(flags & LLE_CREATE)) + return (NULL); + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, l3addr) != 0) + return NULL; + + lle = in6_lltable_new(l3addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return NULL; + } + lle->la_flags = flags & ~LLE_CREATE; + if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { + bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lle->lle_tbl = llt; + lle->lle_head = lleh; + LIST_INSERT_HEAD(lleh, lle, lle_next); + } else if (flags & LLE_DELETE) { + LLE_WLOCK(lle); + lle->la_flags = LLE_DELETED; + LLE_WUNLOCK(lle); +#ifdef DIAGNOSTICS + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + lle = (void *)-1; + } + if (LLE_IS_VALID(lle)) { + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + } + return (lle); +} + +static int +in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) +{ + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_in6 sin6; + /* + * ndp.c assumes that sdl is word aligned + */ +#ifdef __LP64__ + uint32_t pad; +#endif + struct sockaddr_dl sdl; + } ndpc; + int i, error; + + /* XXXXX + * current IFNET_RLOCK() is mapped to IFNET_WLOCK() + * so it is okay to use this ASSERT, change it when + * IFNET lock is finalized + */ + IFNET_WLOCK_ASSERT(); + + error = 0; + for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { + LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + struct sockaddr_dl *sdl; + + /* skip deleted or invalid entries */ + if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) + continue; + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_in6 (IPv6) + * struct sockaddr_dl; + */ + bzero(&ndpc, sizeof(ndpc)); + ndpc.rtm.rtm_msglen = sizeof(ndpc); + ndpc.sin6.sin6_family = AF_INET6; + ndpc.sin6.sin6_len = sizeof(ndpc.sin6); + bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); + + /* publish */ + if (lle->la_flags & LLE_PUB) + ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; + + sdl = &ndpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_alen = ifp->if_addrlen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + ndpc.rtm.rtm_rmx.rmx_expire = + lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + ndpc.rtm.rtm_flags |= RTF_HOST; + if (lle->la_flags & LLE_STATIC) + ndpc.rtm.rtm_flags |= RTF_STATIC; + ndpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); + if (error) + break; + } + } + return error; +} + void * in6_domifattach(struct ifnet *ifp) { @@ -2256,6 +2309,14 @@ in6_domifattach(struct ifnet *ifp) ext->nd_ifinfo = nd6_ifattach(ifp); ext->scope6_id = scope6_ifattach(ifp); + ext->lltable = lltable_init(ifp, AF_INET6); + if (ext->lltable != NULL) { + ext->lltable->llt_new = in6_lltable_new; + ext->lltable->llt_free = in6_lltable_free; + ext->lltable->llt_rtcheck = in6_lltable_rtcheck; + ext->lltable->llt_lookup = in6_lltable_lookup; + ext->lltable->llt_dump = in6_lltable_dump; + } return ext; } @@ -2266,6 +2327,7 @@ in6_domifdetach(struct ifnet *ifp, void *aux) scope6_ifdetach(ext->scope6_id); nd6_ifdetach(ext->nd_ifinfo); + lltable_free(ext->lltable); free(ext->in6_ifstat, M_IFADDR); free(ext->icmp6_ifstat, M_IFADDR); free(ext, M_IFADDR); diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index bb5434186f83..42980f7b999f 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -124,6 +124,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); struct radix_node *ret; + RADIX_NODE_HEAD_WLOCK_ASSERT(head); if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; @@ -153,27 +154,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp); ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL && rt->rt_flags & RTF_HOST) { - struct rtentry *rt2; - /* - * We are trying to add a host route, but can't. - * Find out if it is because of an - * ARP entry and delete it if so. - */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED|RTF_CLONING); - if (rt2) { - if (rt2->rt_flags & RTF_LLINFO && - rt2->rt_flags & RTF_HOST && - rt2->rt_gateway && - rt2->rt_gateway->sa_family == AF_LINK) { - rtexpunge(rt2); - RTFREE_LOCKED(rt2); - ret = rn_addroute(v_arg, n_arg, head, - treenodes); - } else - RTFREE_LOCKED(rt2); - } - } else if (ret == NULL && rt->rt_flags & RTF_CLONING) { + if (ret == NULL) { struct rtentry *rt2; /* * We are trying to add a net route, but can't. @@ -187,10 +168,9 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED|RTF_CLONING); + rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED); if (rt2) { - if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) - == RTF_CLONING + if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK && rt2->rt_ifp == rt->rt_ifp) { @@ -199,7 +179,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, RTFREE_LOCKED(rt2); } } - return ret; + return (ret); } /* @@ -255,12 +235,6 @@ in6_clsroute(struct radix_node *rn, struct radix_node_head *head) if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ - if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) - return; - - if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) - return; - /* * As requested by David Greenman: * If rtq_reallyold6 is 0, just delete the route without @@ -307,7 +281,7 @@ in6_rtqkill(struct radix_node *rn, void *rock) err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + rt->rt_flags|RTF_RNH_LOCKED, 0); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 4d7723aa77e3..c343634887e1 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/route.h> +#include <net/if_llatbl.h> #ifdef RADIX_MPATH #include <net/radix_mpath.h> #endif @@ -131,7 +132,7 @@ int ip6_prefer_tempaddr; static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int, int)); + struct rtentry **, int)); static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); @@ -479,8 +480,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int clone, - int norouteok) + struct ifnet **retifp, struct rtentry **retrt, int norouteok) { INIT_VNET_INET6(curvnet); int error = 0; @@ -536,9 +536,10 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (opts && opts->ip6po_nexthop) { struct route_in6 *ron; - + struct llentry *la; + sin6_next = satosin6(opts->ip6po_nexthop); - + /* at this moment, we only support AF_INET6 next hops */ if (sin6_next->sin6_family != AF_INET6) { error = EAFNOSUPPORT; /* or should we proceed? */ @@ -550,6 +551,36 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * by that address must be a neighbor of the sending host. */ ron = &opts->ip6po_nextroute; + /* + * XXX what do we do here? + * PLZ to be fixing + */ + + + if (ron->ro_rt == NULL) { + rtalloc((struct route *)ron); /* multi path case? */ + if (ron->ro_rt == NULL) { + if (ron->ro_rt) { + RTFREE(ron->ro_rt); + ron->ro_rt = NULL; + } + error = EHOSTUNREACH; + goto done; + } + } + + rt = ron->ro_rt; + ifp = rt->rt_ifp; + IF_AFDATA_LOCK(ifp); + la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr); + IF_AFDATA_UNLOCK(ifp); + if (la) + LLE_RUNLOCK(la); + else { + error = EHOSTUNREACH; + goto done; + } +#if 0 if ((ron->ro_rt && (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != (RTF_UP | RTF_LLINFO)) || @@ -573,16 +604,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, goto done; } } - rt = ron->ro_rt; - ifp = rt->rt_ifp; +#endif /* * When cloning is required, try to allocate a route to the * destination so that the caller can store path MTU * information. */ - if (!clone) - goto done; + goto done; } /* @@ -608,21 +637,17 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, *sa6 = *dstsock; sa6->sin6_scope_id = 0; - if (clone) { #ifdef RADIX_MPATH rtalloc_mpath((struct route *)ro, ntohl(sa6->sin6_addr.s6_addr32[3])); -#else - rtalloc((struct route *)ro); -#endif - } else { +#else ro->ro_rt = rtalloc1(&((struct route *)ro) - ->ro_dst, 0, 0UL); + ->ro_dst, 0, 0UL); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); - } +#endif } - + /* * do not care about the result if we have the nexthop * explicitly specified. @@ -693,7 +718,7 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 0, 1)) != 0) { + &rt, 1)) != 0) { if (ro == &sro && rt && rt == sro.ro_rt) RTFREE(rt); return (error); @@ -745,11 +770,11 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int clone) + struct ifnet **retifp, struct rtentry **retrt) { return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, clone, 0)); + retrt, 0)); } /* diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index 957155a3777f..b5dba5e57ece 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -88,13 +88,17 @@ struct in6_addrlifetime { struct nd_ifinfo; struct scope6_id; +struct lltable; struct in6_ifextra { struct in6_ifstat *in6_ifstat; struct icmp6_ifstat *icmp6_ifstat; struct nd_ifinfo *nd_ifinfo; struct scope6_id *scope6_id; + struct lltable *lltable; }; +#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable) + struct in6_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 0c259e3e06e0..475888f8147b 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_systm.h> +#include <net/if_llatbl.h> #ifdef INET #include <netinet/ip.h> #include <netinet/ip_icmp.h> @@ -311,9 +312,11 @@ ip6_input(struct mbuf *m) u_int32_t plen; u_int32_t rtalert = ~0; int nxt, ours = 0; - struct ifnet *deliverifp = NULL; + struct ifnet *deliverifp = NULL, *ifp = NULL; struct in6_addr odst; int srcrt = 0; + struct llentry *lle = NULL; + struct sockaddr_in6 dst6; #ifdef IPSEC /* @@ -548,6 +551,24 @@ passin: /* * Unicast check */ + + bzero(&dst6, sizeof(dst6)); + dst6.sin6_family = AF_INET6; + dst6.sin6_len = sizeof(struct sockaddr_in6); + dst6.sin6_addr = ip6->ip6_dst; + ifp = m->m_pkthdr.rcvif; + IF_AFDATA_LOCK(ifp); + lle = lla_lookup(LLTABLE6(ifp), 0, + (struct sockaddr *)&dst6); + IF_AFDATA_UNLOCK(ifp); + if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { + ours = 1; + deliverifp = ifp; + LLE_RUNLOCK(lle); + goto hbhcheck; + } + LLE_RUNLOCK(lle); + if (V_ip6_forward_rt.ro_rt != NULL && (V_ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 6e5d0d03867c..c0f614f554d8 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -615,7 +615,7 @@ again: dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if ((error = in6_selectroute(&dst_sa, opt, im6o, ro, - &ifp, &rt, 0)) != 0) { + &ifp, &rt)) != 0) { switch (error) { case EHOSTUNREACH: V_ip6stat.ip6s_noroute++; diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 5975cc93d5d6..55bc5dbbbe6f 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -398,7 +398,7 @@ struct in6_addr *in6_selectsrc __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ifnet **, int *)); int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int)); + struct rtentry **)); u_int32_t ip6_randomid __P((void)); u_int32_t ip6_randomflowlabel __P((void)); #endif /* _KERNEL */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 18e81c305f48..5095d2351e06 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); #include <sys/protosw.h> #include <sys/errno.h> #include <sys/syslog.h> +#include <sys/lock.h> +#include <sys/rwlock.h> #include <sys/queue.h> #include <sys/sysctl.h> @@ -61,6 +63,8 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #include <netinet/in.h> +#include <net/if_llatbl.h> +#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include <netinet/if_ether.h> #include <netinet6/in6_var.h> #include <netinet/ip6.h> @@ -98,8 +102,9 @@ int nd6_maxqueuelen; int nd6_debug; /* for debugging? */ +#if 0 static int nd6_inuse, nd6_allocated; -struct llinfo_nd6 llinfo_nd6; +#endif struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix; @@ -114,9 +119,9 @@ static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *, static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); -static struct llinfo_nd6 *nd6_free(struct rtentry *, int); +static struct llentry *nd6_free(struct llentry *, int); static void nd6_llinfo_timer(void *); -static void clear_llinfo_pqueue(struct llinfo_nd6 *); +static void clear_llinfo_pqueue(struct llentry *); #ifdef VIMAGE_GLOBALS struct callout nd6_slowtimo_ch; @@ -162,8 +167,13 @@ nd6_init(void) V_dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/ V_dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */ + /* + * XXX just to get this to compile KMM + */ +#ifdef notyet V_llinfo_nd6.ln_next = &V_llinfo_nd6; V_llinfo_nd6.ln_prev = &V_llinfo_nd6; +#endif LIST_INIT(&V_nd_prefix); V_ip6_use_tempaddr = 0; @@ -424,14 +434,23 @@ skip1: * ND6 timer routine to handle ND6 entries */ void -nd6_llinfo_settimer(struct llinfo_nd6 *ln, long tick) +nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { if (tick < 0) { - ln->ln_expire = 0; + ln->la_expire = 0; ln->ln_ntick = 0; callout_stop(&ln->ln_timer_ch); + /* + * XXX - do we know that there is + * callout installed? i.e. are we + * guaranteed that we're not dropping + * a reference that we did not add? + * KMM + */ + LLE_REMREF(ln); } else { - ln->ln_expire = time_second + tick / hz; + ln->la_expire = time_second + tick / hz; + LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; callout_reset(&ln->ln_timer_ch, INT_MAX, @@ -444,16 +463,34 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, long tick) } } +void +nd6_llinfo_settimer(struct llentry *ln, long tick) +{ + + LLE_WLOCK(ln); + nd6_llinfo_settimer_locked(ln, tick); + LLE_WUNLOCK(ln); +} + static void nd6_llinfo_timer(void *arg) { - struct llinfo_nd6 *ln; - struct rtentry *rt; + struct llentry *ln; struct in6_addr *dst; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; - ln = (struct llinfo_nd6 *)arg; + ln = (struct llentry *)arg; + if (ln == NULL) { + panic("%s: NULL entry!\n", __func__); + return; + } + + if ((ifp = ((ln->lle_tbl != NULL) ? ln->lle_tbl->llt_ifp : NULL)) == NULL) + panic("ln ifp == NULL"); + + CURVNET_SET(ifp->if_vnet); + INIT_VNET_INET6(curvnet); if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { @@ -463,52 +500,44 @@ nd6_llinfo_timer(void *arg) ln->ln_ntick = 0; nd6_llinfo_settimer(ln, ln->ln_ntick); } - return; + goto done; } - if ((rt = ln->ln_rt) == NULL) - panic("ln->ln_rt == NULL"); - if ((ifp = rt->rt_ifp) == NULL) - panic("ln->ln_rt->rt_ifp == NULL"); ndi = ND_IFINFO(ifp); + dst = &L3_ADDR_SIN6(ln)->sin6_addr; + if ((ln->la_flags & LLE_STATIC) || (ln->la_expire > time_second)) { + goto done; + } - CURVNET_SET(ifp->if_vnet); - INIT_VNET_INET6(curvnet); - - /* sanity check */ - if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln) - panic("rt_llinfo(%p) is not equal to ln(%p)", - rt->rt_llinfo, ln); - if (rt_key(rt) == NULL) - panic("rt key is NULL in nd6_timer(ln=%p)", ln); - - dst = &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + if (ln->la_flags & LLE_DELETED) { + (void)nd6_free(ln, 0); + goto done; + } switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: - if (ln->ln_asked < V_nd6_mmaxtries) { - ln->ln_asked++; + if (ln->la_asked < V_nd6_mmaxtries) { + ln->la_asked++; nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); nd6_ns_output(ifp, NULL, dst, ln, 0); } else { - struct mbuf *m = ln->ln_hold; + struct mbuf *m = ln->la_hold; if (m) { struct mbuf *m0; /* - * assuming every packet in ln_hold has the + * assuming every packet in la_hold has the * same IP header */ m0 = m->m_nextpkt; m->m_nextpkt = NULL; icmp6_error2(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0, rt->rt_ifp); + ICMP6_DST_UNREACH_ADDR, 0, ifp); - ln->ln_hold = m0; + ln->la_hold = m0; clear_llinfo_pqueue(ln); } - if (rt && rt->rt_llinfo) - (void)nd6_free(rt, 0); + (void)nd6_free(ln, 0); ln = NULL; } break; @@ -522,8 +551,7 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_STALE: /* Garbage Collection(RFC 2461 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { - if (rt && rt->rt_llinfo) - (void)nd6_free(rt, 1); + (void)nd6_free(ln, 1); ln = NULL; } break; @@ -531,7 +559,7 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ - ln->ln_asked = 1; + ln->la_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); nd6_ns_output(ifp, dst, dst, ln, 0); @@ -541,31 +569,20 @@ nd6_llinfo_timer(void *arg) } break; case ND6_LLINFO_PROBE: - if (ln->ln_asked < V_nd6_umaxtries) { - ln->ln_asked++; + if (ln->la_asked < V_nd6_umaxtries) { + ln->la_asked++; nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); nd6_ns_output(ifp, dst, dst, ln, 0); - } else if (rt->rt_ifa != NULL && - rt->rt_ifa->ifa_addr->sa_family == AF_INET6 && - (((struct in6_ifaddr *)rt->rt_ifa)->ia_flags & IFA_ROUTE)) { - /* - * This is an unreachable neighbor whose address is - * specified as the destination of a p2p interface - * (see in6_ifinit()). We should not free the entry - * since this is sort of a "static" entry generated - * via interface address configuration. - */ - ln->ln_asked = 0; - ln->ln_expire = 0; /* make it permanent */ - ln->ln_state = ND6_LLINFO_STALE; } else { - if (rt && rt->rt_llinfo) - (void)nd6_free(rt, 0); + (void)nd6_free(ln, 0); ln = NULL; } break; } CURVNET_RESTORE(); +done: + if (ln != NULL) + LLE_FREE(ln); } @@ -772,7 +789,6 @@ void nd6_purge(struct ifnet *ifp) { INIT_VNET_INET6(ifp->if_vnet); - struct llinfo_nd6 *ln, *nln; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; @@ -829,132 +845,54 @@ nd6_purge(struct ifnet *ifp) nd6_setdefaultiface(0); if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */ - /* refresh default router list */ + /* refresh default router list + * + * + */ defrouter_select(); + } - /* - * Nuke neighbor cache entries for the ifp. - * Note that rt->rt_ifp may not be the same as ifp, - * due to KAME goto ours hack. See RTM_RESOLVE case in - * nd6_rtrequest(), and ip6_input(). + /* XXXXX + * We do not nuke the neighbor cache entries here any more + * because the neighbor cache is kept in if_afdata[AF_INET6]. + * nd6_purge() is invoked by in6_ifdetach() which is called + * from if_detach() where everything gets purged. So let + * in6_domifdetach() do the actual L2 table purging work. */ - ln = V_llinfo_nd6.ln_next; - while (ln && ln != &V_llinfo_nd6) { - struct rtentry *rt; - struct sockaddr_dl *sdl; - - nln = ln->ln_next; - rt = ln->ln_rt; - if (rt && rt->rt_gateway && - rt->rt_gateway->sa_family == AF_LINK) { - sdl = (struct sockaddr_dl *)rt->rt_gateway; - if (sdl->sdl_index == ifp->if_index) - nln = nd6_free(rt, 0); - } - ln = nln; - } } -struct rtentry * -nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp) +/* + * the caller acquires and releases the lock on the lltbls + * Returns the llentry locked + */ +struct llentry * +nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) { INIT_VNET_INET6(curvnet); - struct rtentry *rt; struct sockaddr_in6 sin6; - char ip6buf[INET6_ADDRSTRLEN]; - + struct llentry *ln; + int llflags = 0; + bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; - rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL); - if (rt) { - if ((rt->rt_flags & RTF_LLINFO) == 0 && create) { - /* - * This is the case for the default route. - * If we want to create a neighbor cache for the - * address, we should free the route for the - * destination and allocate an interface route. - */ - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (rt == NULL) { - if (create && ifp) { - int e; - /* - * If no route is available and create is set, - * we allocate a host route for the destination - * and treat it like an interface route. - * This hack is necessary for a neighbor which can't - * be covered by our own prefix. - */ - struct ifaddr *ifa = - ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); - if (ifa == NULL) - return (NULL); + IF_AFDATA_LOCK_ASSERT(ifp); - /* - * Create a new route. RTF_LLINFO is necessary - * to create a Neighbor Cache entry for the - * destination in nd6_rtrequest which will be - * called in rtrequest via ifa->ifa_rtrequest. - */ - if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6, - ifa->ifa_addr, (struct sockaddr *)&all1_sa, - (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) & - ~RTF_CLONING, &rt)) != 0) { - log(LOG_ERR, - "nd6_lookup: failed to add route for a " - "neighbor(%s), errno=%d\n", - ip6_sprintf(ip6buf, addr6), e); - } - if (rt == NULL) - return (NULL); - RT_LOCK(rt); - if (rt->rt_llinfo) { - struct llinfo_nd6 *ln = - (struct llinfo_nd6 *)rt->rt_llinfo; - ln->ln_state = ND6_LLINFO_NOSTATE; - } - } else - return (NULL); - } - RT_LOCK_ASSERT(rt); - RT_REMREF(rt); - /* - * Validation for the entry. - * Note that the check for rt_llinfo is necessary because a cloned - * route from a parent route that has the L flag (e.g. the default - * route to a p2p interface) may have the flag, too, while the - * destination is not actually a neighbor. - * XXX: we can't use rt->rt_ifp to check for the interface, since - * it might be the loopback interface if the entry is for our - * own address on a non-loopback interface. Instead, we should - * use rt->rt_ifa->ifa_ifp, which would specify the REAL - * interface. - * Note also that ifa_ifp and ifp may differ when we connect two - * interfaces to a same link, install a link prefix to an interface, - * and try to install a neighbor cache on an interface that does not - * have a route to the prefix. - */ - if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || - rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || - (ifp && rt->rt_ifa->ifa_ifp != ifp)) { - if (create) { - nd6log((LOG_DEBUG, - "nd6_lookup: failed to lookup %s (if = %s)\n", - ip6_sprintf(ip6buf, addr6), - ifp ? if_name(ifp) : "unspec")); - } - RT_UNLOCK(rt); - return (NULL); + if (flags & ND6_CREATE) + llflags |= LLE_CREATE; + if (flags & ND6_EXCLUSIVE) + llflags |= LLE_EXCLUSIVE; + + ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); + if ((ln != NULL) && (flags & LLE_CREATE)) { + ln->ln_state = ND6_LLINFO_NOSTATE; + callout_init(&ln->ln_timer_ch, 0); } - RT_UNLOCK(rt); /* XXX not ready to return rt locked */ - return (rt); + + return (ln); } /* @@ -1040,7 +978,10 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) int nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) { + struct llentry *lle; + int rc = 0; + IF_AFDATA_UNLOCK_ASSERT(ifp); if (nd6_is_new_addr_neighbor(addr, ifp)) return (1); @@ -1048,10 +989,13 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ - if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL) - return (1); - - return (0); + IF_AFDATA_LOCK(ifp); + if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { + LLE_RUNLOCK(lle); + rc = 1; + } + IF_AFDATA_UNLOCK(ifp); + return (rc); } /* @@ -1060,13 +1004,13 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. */ -static struct llinfo_nd6 * -nd6_free(struct rtentry *rt, int gc) +static struct llentry * +nd6_free(struct llentry *ln, int gc) { INIT_VNET_INET6(curvnet); - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next; - struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + struct llentry *next; struct nd_defrouter *dr; + struct ifnet *ifp=NULL; /* * we used to have pfctlinput(PRC_HOSTDEAD) here. @@ -1079,8 +1023,7 @@ nd6_free(struct rtentry *rt, int gc) if (!V_ip6_forwarding) { int s; s = splnet(); - dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - rt->rt_ifp); + dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp); if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { @@ -1102,7 +1045,7 @@ nd6_free(struct rtentry *rt, int gc) else nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); splx(s); - return (ln->ln_next); + return (LIST_NEXT(ln, lle_next)); } if (ln->ln_router || dr) { @@ -1111,7 +1054,7 @@ nd6_free(struct rtentry *rt, int gc) * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ - rt6_flush(&in6, rt->rt_ifp); + rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp); } if (dr) { @@ -1152,15 +1095,13 @@ nd6_free(struct rtentry *rt, int gc) * might have freed other entries (particularly the old next entry) as * a side effect (XXX). */ - next = ln->ln_next; + next = LIST_NEXT(ln, lle_next); - /* - * Detach the route from the routing tree and the list of neighbor - * caches, and disable the route entry not to be used in already - * cached routes. - */ - rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, - rt_mask(rt), 0, (struct rtentry **)0); + ifp = ln->lle_tbl->llt_ifp; + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(ln); + llentry_free(ln); + IF_AFDATA_UNLOCK(ifp); return (next); } @@ -1174,297 +1115,42 @@ void nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) { INIT_VNET_INET6(curvnet); - struct llinfo_nd6 *ln; + struct llentry *ln; + struct ifnet *ifp; - /* - * If the caller specified "rt", use that. Otherwise, resolve the - * routing table by supplied "dst6". - */ - if (rt == NULL) { - if (dst6 == NULL) - return; - if ((rt = nd6_lookup(dst6, 0, NULL)) == NULL) - return; - } - - if ((rt->rt_flags & RTF_GATEWAY) != 0 || - (rt->rt_flags & RTF_LLINFO) == 0 || - rt->rt_llinfo == NULL || rt->rt_gateway == NULL || - rt->rt_gateway->sa_family != AF_LINK) { - /* This is not a host route. */ + if ((dst6 == NULL) || (rt == NULL)) return; - } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - if (ln->ln_state < ND6_LLINFO_REACHABLE) + ifp = rt->rt_ifp; + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) return; + if (ln->ln_state < ND6_LLINFO_REACHABLE) + goto done; + /* * if we get upper-layer reachability confirmation many times, * it is possible we have false information. */ if (!force) { ln->ln_byhint++; - if (ln->ln_byhint > V_nd6_maxnudhint) - return; + if (ln->ln_byhint > V_nd6_maxnudhint) { + goto done; + } } - ln->ln_state = ND6_LLINFO_REACHABLE; + ln->ln_state = ND6_LLINFO_REACHABLE; if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer(ln, (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); } +done: + LLE_WUNLOCK(ln); } -/* - * info - XXX unused - */ -void -nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) -{ - struct sockaddr *gate = rt->rt_gateway; - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; - struct ifnet *ifp = rt->rt_ifp; - struct ifaddr *ifa; - INIT_VNET_NET(ifp->if_vnet); - INIT_VNET_INET6(ifp->if_vnet); - - RT_LOCK_ASSERT(rt); - - if ((rt->rt_flags & RTF_GATEWAY) != 0) - return; - - if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) { - /* - * This is probably an interface direct route for a link - * which does not need neighbor caches (e.g. fe80::%lo0/64). - * We do not need special treatment below for such a route. - * Moreover, the RTF_LLINFO flag which would be set below - * would annoy the ndp(8) command. - */ - return; - } - - if (req == RTM_RESOLVE && - (nd6_need_cache(ifp) == 0 || /* stf case */ - !nd6_is_new_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), - ifp))) { - /* - * FreeBSD and BSD/OS often make a cloned host route based - * on a less-specific route (e.g. the default route). - * If the less specific route does not have a "gateway" - * (this is the case when the route just goes to a p2p or an - * stf interface), we'll mistakenly make a neighbor cache for - * the host route, and will see strange neighbor solicitation - * for the corresponding destination. In order to avoid the - * confusion, we check if the destination of the route is - * a neighbor in terms of neighbor discovery, and stop the - * process if not. Additionally, we remove the LLINFO flag - * so that ndp(8) will not try to get the neighbor information - * of the destination. - */ - rt->rt_flags &= ~RTF_LLINFO; - return; - } - - switch (req) { - case RTM_ADD: - /* - * There is no backward compatibility :) - * - * if ((rt->rt_flags & RTF_HOST) == 0 && - * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) - * rt->rt_flags |= RTF_CLONING; - */ - if ((rt->rt_flags & RTF_CLONING) || - ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) { - /* - * Case 1: This route should come from a route to - * interface (RTF_CLONING case) or the route should be - * treated as on-link but is currently not - * (RTF_LLINFO && ln == NULL case). - */ - rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - if (ln) - nd6_llinfo_settimer(ln, 0); - if ((rt->rt_flags & RTF_CLONING) != 0) - break; - } - /* - * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here. - * We don't do that here since llinfo is not ready yet. - * - * There are also couple of other things to be discussed: - * - unsolicited NA code needs improvement beforehand - * - RFC2461 says we MAY send multicast unsolicited NA - * (7.2.6 paragraph 4), however, it also says that we - * SHOULD provide a mechanism to prevent multicast NA storm. - * we don't have anything like it right now. - * note that the mechanism needs a mutual agreement - * between proxies, which means that we need to implement - * a new protocol, or a new kludge. - * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA. - * we need to check ip6forwarding before sending it. - * (or should we allow proxy ND configuration only for - * routers? there's no mention about proxy ND from hosts) - */ - /* FALLTHROUGH */ - case RTM_RESOLVE: - if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) { - /* - * Address resolution isn't necessary for a point to - * point link, so we can skip this test for a p2p link. - */ - if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { - log(LOG_DEBUG, - "nd6_rtrequest: bad gateway value: %s\n", - if_name(ifp)); - break; - } - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - } - if (ln != NULL) - break; /* This happens on a route change */ - /* - * Case 2: This route may come from cloning, or a manual route - * add with a LL address. - */ - R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln)); - rt->rt_llinfo = (caddr_t)ln; - if (ln == NULL) { - log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n"); - break; - } - V_nd6_inuse++; - V_nd6_allocated++; - bzero(ln, sizeof(*ln)); - RT_ADDREF(rt); - ln->ln_rt = rt; - callout_init(&ln->ln_timer_ch, 0); - - /* this is required for "ndp" command. - shin */ - if (req == RTM_ADD) { - /* - * gate should have some valid AF_LINK entry, - * and ln->ln_expire should have some lifetime - * which is specified by ndp command. - */ - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - } else { - /* - * When req == RTM_RESOLVE, rt is created and - * initialized in rtrequest(), so rt_expire is 0. - */ - ln->ln_state = ND6_LLINFO_NOSTATE; - nd6_llinfo_settimer(ln, 0); - } - rt->rt_flags |= RTF_LLINFO; - ln->ln_next = V_llinfo_nd6.ln_next; - V_llinfo_nd6.ln_next = ln; - ln->ln_prev = &V_llinfo_nd6; - ln->ln_next->ln_prev = ln; - - /* - * check if rt_key(rt) is one of my address assigned - * to the interface. - */ - ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp, - &SIN6(rt_key(rt))->sin6_addr); - if (ifa) { - caddr_t macp = nd6_ifptomac(ifp); - nd6_llinfo_settimer(ln, -1); - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (macp) { - bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen); - SDL(gate)->sdl_alen = ifp->if_addrlen; - } - if (V_nd6_useloopback) { - rt->rt_ifp = &V_loif[0]; /* XXX */ - /* - * Make sure rt_ifa be equal to the ifaddr - * corresponding to the address. - * We need this because when we refer - * rt_ifa->ia6_flags in ip6_input, we assume - * that the rt_ifa points to the address instead - * of the loopback address. - */ - if (ifa != rt->rt_ifa) { - IFAFREE(rt->rt_ifa); - IFAREF(ifa); - rt->rt_ifa = ifa; - } - } - } else if (rt->rt_flags & RTF_ANNOUNCE) { - nd6_llinfo_settimer(ln, -1); - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - - /* join solicited node multicast for proxy ND */ - if (ifp->if_flags & IFF_MULTICAST) { - struct in6_addr llsol; - int error; - - llsol = SIN6(rt_key(rt))->sin6_addr; - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr8[12] = 0xff; - if (in6_setscope(&llsol, ifp, NULL)) - break; - if (in6_addmulti(&llsol, ifp, - &error, 0) == NULL) { - char ip6buf[INET6_ADDRSTRLEN]; - nd6log((LOG_ERR, "%s: failed to join " - "%s (errno=%d)\n", if_name(ifp), - ip6_sprintf(ip6buf, &llsol), - error)); - } - } - } - break; - - case RTM_DELETE: - if (ln == NULL) - break; - /* leave from solicited node multicast for proxy ND */ - if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && - (ifp->if_flags & IFF_MULTICAST) != 0) { - struct in6_addr llsol; - struct in6_multi *in6m; - - llsol = SIN6(rt_key(rt))->sin6_addr; - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr8[12] = 0xff; - if (in6_setscope(&llsol, ifp, NULL) == 0) { - IN6_LOOKUP_MULTI(llsol, ifp, in6m); - if (in6m) - in6_delmulti(in6m); - } else - ; /* XXX: should not happen. bark here? */ - } - V_nd6_inuse--; - ln->ln_next->ln_prev = ln->ln_prev; - ln->ln_prev->ln_next = ln->ln_next; - ln->ln_prev = NULL; - nd6_llinfo_settimer(ln, -1); - RT_REMREF(rt); - rt->rt_llinfo = 0; - rt->rt_flags &= ~RTF_LLINFO; - clear_llinfo_pqueue(ln); - Free((caddr_t)ln); - } -} int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) @@ -1477,7 +1163,6 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; struct nd_defrouter *dr; struct nd_prefix *pr; - struct rtentry *rt; int i = 0, error = 0; int s; @@ -1667,25 +1352,25 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) } case SIOCGNBRINFO_IN6: { - struct llinfo_nd6 *ln; + struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); - s = splnet(); - if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) { + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&nb_addr, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + + if (ln == NULL) { error = EINVAL; - splx(s); break; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; nbi->state = ln->ln_state; - nbi->asked = ln->ln_asked; + nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; - nbi->expire = ln->ln_expire; - splx(s); - + nbi->expire = ln->la_expire; + LLE_RUNLOCK(ln); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ @@ -1703,20 +1388,27 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * * type - ICMP6 type * code - type dependent information + * + * XXXXX + * The caller of this function already acquired the ndp + * cache table lock because the cache entry is returned. */ -struct rtentry * +struct llentry * nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { INIT_VNET_INET6(curvnet); - struct rtentry *rt = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; int is_newentry; - struct sockaddr_dl *sdl = NULL; int do_update; int olladdr; int llchange; + int flags = 0; int newstate = 0; + struct sockaddr_in6 sin6; + struct mbuf *chain = NULL; + + IF_AFDATA_UNLOCK_ASSERT(ifp); if (ifp == NULL) panic("ifp == NULL in nd6_cache_lladdr"); @@ -1736,40 +1428,29 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ - - rt = nd6_lookup(from, 0, ifp); - if (rt == NULL) { - rt = nd6_lookup(from, 1, ifp); + flags |= lladdr ? ND6_EXCLUSIVE : 0; + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(from, flags, ifp); + if (ln) + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) { + flags |= LLE_EXCLUSIVE; + ln = nd6_lookup(from, flags |ND6_CREATE, ifp); + IF_AFDATA_UNLOCK(ifp); is_newentry = 1; } else { /* do nothing if static ndp is set */ - if (rt->rt_flags & RTF_STATIC) - return NULL; + if (ln->la_flags & LLE_STATIC) + goto done; is_newentry = 0; } - - if (rt == NULL) - return NULL; - if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { -fail: - (void)nd6_free(rt, 0); - return NULL; - } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; if (ln == NULL) - goto fail; - if (rt->rt_gateway == NULL) - goto fail; - if (rt->rt_gateway->sa_family != AF_LINK) - goto fail; - sdl = SDL(rt->rt_gateway); - - olladdr = (sdl->sdl_alen) ? 1 : 0; + return (NULL); + + olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { - if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen)) - llchange = 1; - else - llchange = 0; + llchange = bcmp(lladdr, &ln->ll_addr, + ifp->if_addrlen); } else llchange = 0; @@ -1789,8 +1470,8 @@ fail: * Record source link-layer address * XXX is it dependent to ifp->if_type? */ - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; } if (!is_newentry) { @@ -1821,17 +1502,17 @@ fail: * we must set the timer now, although it is actually * meaningless. */ - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); - if (ln->ln_hold) { + if (ln->la_hold) { struct mbuf *m_hold, *m_hold_next; /* - * reset the ln_hold in advance, to explicitly - * prevent a ln_hold lookup in nd6_output() + * reset the la_hold in advance, to explicitly + * prevent a la_hold lookup in nd6_output() * (wouldn't happen, though...) */ - for (m_hold = ln->ln_hold, ln->ln_hold = NULL; + for (m_hold = ln->la_hold, ln->la_hold = NULL; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; @@ -1841,14 +1522,14 @@ fail: * just set the 2nd argument as the * 1st one. */ - nd6_output(ifp, ifp, m_hold, - (struct sockaddr_in6 *)rt_key(rt), - rt); + nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); } + if (chain) + memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ - nd6_llinfo_settimer((void *)ln, 0); + nd6_llinfo_settimer_locked((void *)ln, 0); } } @@ -1917,6 +1598,17 @@ fail: break; } + if (ln) { + if (flags & ND6_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + if (ln->la_flags & LLE_STATIC) + ln = NULL; + } + if (chain) + nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly @@ -1932,10 +1624,24 @@ fail: * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ - if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv) + if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv) { + /* + * guaranteed recursion + */ defrouter_select(); - - return rt; + } + + return (ln); +done: + if (ln) { + if (flags & ND6_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + if (ln->la_flags & LLE_STATIC) + ln = NULL; + } + return (ln); } static void @@ -1969,18 +1675,45 @@ nd6_slowtimo(void *arg) CURVNET_RESTORE(); } -#define senderr(e) { error = (e); goto bad;} int nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct sockaddr_in6 *dst, struct rtentry *rt0) { + + return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL)); +} + + +/* + * Note that I'm not enforcing any global serialization + * lle state or asked changes here as the logic is too + * complicated to avoid having to always acquire an exclusive + * lock + * KMM + * + */ +#define senderr(e) { error = (e); goto bad;} + +int +nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, + struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle, + struct mbuf **tail) +{ INIT_VNET_INET6(curvnet); struct mbuf *m = m0; struct rtentry *rt = rt0; - struct sockaddr_in6 *gw6 = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = lle; int error = 0; + int flags = 0; + +#ifdef INVARIANTS + if (lle) { + + LLE_WLOCK_ASSERT(lle); + KASSERT(tail != NULL, (" lle locked but no tail pointer passed")); + } +#endif if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) goto sendpkt; @@ -1990,81 +1723,6 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, /* * next hop determination. This routine is derived from ether_output. */ - /* NB: the locking here is tortuous... */ - if (rt != NULL) - RT_LOCK(rt); -again: - if (rt != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - RT_UNLOCK(rt); - rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL); - if (rt != NULL) { - RT_REMREF(rt); - if (rt->rt_ifp != ifp) - /* - * XXX maybe we should update ifp too, - * but the original code didn't and I - * don't know what is correct here. - */ - goto again; - } else - senderr(EHOSTUNREACH); - } - - if (rt->rt_flags & RTF_GATEWAY) { - gw6 = (struct sockaddr_in6 *)rt->rt_gateway; - - /* - * We skip link-layer address resolution and NUD - * if the gateway is not a neighbor from ND point - * of view, regardless of the value of nd_ifinfo.flags. - * The second condition is a bit tricky; we skip - * if the gateway is our own address, which is - * sometimes used to install a route to a p2p link. - */ - if (!nd6_is_addr_neighbor(gw6, ifp) || - in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { - RT_UNLOCK(rt); - /* - * We allow this kind of tricky route only - * when the outgoing interface is p2p. - * XXX: we may need a more generic rule here. - */ - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) - senderr(EHOSTUNREACH); - - goto sendpkt; - } - - if (rt->rt_gwroute == NULL) - goto lookup; - rt = rt->rt_gwroute; - RT_LOCK(rt); /* NB: gwroute */ - if ((rt->rt_flags & RTF_UP) == 0) { - RTFREE_LOCKED(rt); /* unlock gwroute */ - rt = rt0; - rt0->rt_gwroute = NULL; - lookup: - RT_UNLOCK(rt0); - rt = rtalloc1(rt->rt_gateway, 1, 0UL); - if (rt == rt0) { - RT_REMREF(rt0); - RT_UNLOCK(rt0); - senderr(EHOSTUNREACH); - } - RT_LOCK(rt0); - if (rt0->rt_gwroute != NULL) - RTFREE(rt0->rt_gwroute); - rt0->rt_gwroute = rt; - if (rt == NULL) { - RT_UNLOCK(rt0); - senderr(EHOSTUNREACH); - } - } - RT_UNLOCK(rt0); - } - RT_UNLOCK(rt); - } /* * Address resolution or Neighbor Unreachability Detection @@ -2073,20 +1731,25 @@ again: * or an anycast address(i.e. not a multicast). */ - /* Look up the neighbor cache for the nexthop */ - if (rt && (rt->rt_flags & RTF_LLINFO) != 0) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - else { - /* - * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), - * the condition below is not very efficient. But we believe - * it is tolerable, because this should be a rare case. - */ - if (nd6_is_addr_neighbor(dst, ifp) && - (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - } - if (ln == NULL || rt == NULL) { + flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0; + if (ln == NULL) { + retry: + IF_AFDATA_LOCK(rt->rt_ifp); + ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst); + IF_AFDATA_UNLOCK(rt->rt_ifp); + if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) { + /* + * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), + * the condition below is not very efficient. But we believe + * it is tolerable, because this should be a rare case. + */ + flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0); + IF_AFDATA_LOCK(rt->rt_ifp); + ln = nd6_lookup(&dst->sin6_addr, flags, ifp); + IF_AFDATA_UNLOCK(rt->rt_ifp); + } + } + if (ln == NULL) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { char ip6buf[INET6_ADDRSTRLEN]; @@ -2096,15 +1759,18 @@ again: ip6_sprintf(ip6buf, &dst->sin6_addr), ln, rt); senderr(EIO); /* XXX: good error? */ } - goto sendpkt; /* send anyway */ } /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + goto retry; + } ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } /* @@ -2115,9 +1781,14 @@ again: * (RFC 2461 7.3.3) */ if (ln->ln_state == ND6_LLINFO_STALE) { - ln->ln_asked = 0; + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(ln); + goto retry; + } + ln->la_asked = 0; ln->ln_state = ND6_LLINFO_DELAY; - nd6_llinfo_settimer(ln, (long)V_nd6_delay * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz); } /* @@ -2137,12 +1808,18 @@ again: */ if (ln->ln_state == ND6_LLINFO_NOSTATE) ln->ln_state = ND6_LLINFO_INCOMPLETE; - if (ln->ln_hold) { + + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(ln); + goto retry; + } + if (ln->la_hold) { struct mbuf *m_hold; int i; - + i = 0; - for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) { i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; @@ -2150,21 +1827,32 @@ again: } } while (i >= V_nd6_maxqueuelen) { - m_hold = ln->ln_hold; - ln->ln_hold = ln->ln_hold->m_nextpkt; + m_hold = ln->la_hold; + ln->la_hold = ln->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { - ln->ln_hold = m; + ln->la_hold = m; } - + /* + * We did the lookup (no lle arg) so we + * need to do the unlock here + */ + if (lle == NULL) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } + /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. */ - if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { - ln->ln_asked++; + if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) { + ln->la_asked++; + nd6_llinfo_settimer(ln, (long)ND_IFINFO(ifp)->retrans * hz / 1000); nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); @@ -2177,23 +1865,82 @@ again: error = ENETDOWN; /* better error? */ goto bad; } + /* + * ln is valid and the caller did not pass in + * an llentry + */ + if (ln && (lle == NULL)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } #ifdef MAC mac_netinet6_nd6_send(ifp, m); #endif + if (lle != NULL) { + if (*tail == NULL) + *tail = m; + else + (*tail)->m_nextpkt = m; + return (error); + } if ((ifp->if_flags & IFF_LOOPBACK) != 0) { return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, rt)); } - return ((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt)); + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt); + return (error); bad: + /* + * ln is valid and the caller did not pass in + * an llentry + */ + if (ln && (lle == NULL)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } if (m) m_freem(m); return (error); } #undef senderr + +int +nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, + struct sockaddr_in6 *dst, struct rtentry *rt) +{ + struct mbuf *m, *m_head; + struct ifnet *outifp; + int error = 0; + + m_head = chain; + if ((ifp->if_flags & IFF_LOOPBACK) != 0) + outifp = origifp; + else + outifp = ifp; + + while (m_head) { + m = m_head; + m_head = m_head->m_nextpkt; + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt); + } + + /* + * XXX + * note that intermediate errors are blindly ignored - but this is + * the same convention as used with nd6_output when called by + * nd6_cache_lladdr + */ + return (error); +} + + int nd6_need_cache(struct ifnet *ifp) { @@ -2229,14 +1976,18 @@ nd6_need_cache(struct ifnet *ifp) } } +/* + * the callers of this function need to be re-worked to drop + * the lle lock, drop here for now + */ int nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, - struct sockaddr *dst, u_char *desten) + struct sockaddr *dst, u_char *desten, struct llentry **lle) { - struct sockaddr_dl *sdl; - struct rtentry *rt; - int error; + struct llentry *ln; + *lle = NULL; + IF_AFDATA_UNLOCK_ASSERT(ifp); if (m->m_flags & M_MCAST) { int i; @@ -2271,48 +2022,42 @@ nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, } } - if (rt0 == NULL) { - /* this could happen, if we could not allocate memory */ - m_freem(m); - return (ENOMEM); - } - - error = rt_check(&rt, &rt0, dst); - if (error) { - m_freem(m); - return (error); - } - RT_UNLOCK(rt); - if (rt->rt_gateway->sa_family != AF_LINK) { - printf("nd6_storelladdr: something odd happens\n"); - m_freem(m); - return (EINVAL); - } - sdl = SDL(rt->rt_gateway); - if (sdl->sdl_alen == 0) { - /* this should be impossible, but we bark here for debugging */ - printf("nd6_storelladdr: sdl_alen == 0\n"); + /* + * the entry should have been created in nd6_store_lladdr + */ + IF_AFDATA_LOCK(ifp); + ln = lla_lookup(LLTABLE6(ifp), 0, dst); + IF_AFDATA_UNLOCK(ifp); + if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) { + if (ln) + LLE_RUNLOCK(ln); + /* this could happen, if we could not allocate memory */ m_freem(m); - return (EINVAL); + return (1); } - bcopy(LLADDR(sdl), desten, sdl->sdl_alen); + bcopy(&ln->ll_addr, desten, ifp->if_addrlen); + *lle = ln; + LLE_RUNLOCK(ln); + /* + * A *small* use after free race exists here + */ return (0); } -static void -clear_llinfo_pqueue(struct llinfo_nd6 *ln) +static void +clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; - for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold_next) { + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; m_freem(m_hold); } - ln->ln_hold = NULL; + ln->la_hold = NULL; return; } diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index 4d3c06bfecde..f4ccd07ec8f5 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -41,20 +41,7 @@ #include <sys/queue.h> #include <sys/callout.h> -struct llinfo_nd6 { - struct llinfo_nd6 *ln_next; - struct llinfo_nd6 *ln_prev; - struct rtentry *ln_rt; - struct mbuf *ln_hold; /* last packet until resolved/timeout */ - long ln_asked; /* number of queries already sent for this addr */ - u_long ln_expire; /* lifetime for NDP state transition */ - short ln_state; /* reachability state */ - short ln_router; /* 2^0: ND6 router bit */ - int ln_byhint; /* # of times we made it reachable by UL hint */ - - long ln_ntick; - struct callout ln_timer_ch; -}; +struct llentry; #define ND6_LLINFO_NOSTATE -2 /* @@ -72,7 +59,7 @@ struct llinfo_nd6 { #define ND6_LLINFO_PROBE 4 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) -#define ND6_LLINFO_PERMANENT(n) (((n)->ln_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) +#define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) struct nd_ifinfo { u_int32_t linkmtu; /* LinkMTU */ @@ -98,6 +85,9 @@ struct nd_ifinfo { */ #define ND6_IFF_DONT_SET_IFROUTE 0x10 +#define ND6_CREATE LLE_CREATE +#define ND6_EXCLUSIVE LLE_EXCLUSIVE + #ifdef _KERNEL #define ND_IFINFO(ifp) \ (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo) @@ -336,7 +326,6 @@ extern int nd6_mmaxtries; extern int nd6_useloopback; extern int nd6_maxnudhint; extern int nd6_gctimer; -extern struct llinfo_nd6 llinfo_nd6; extern struct nd_drhead nd_defrouter; extern struct nd_prhead nd_prefix; extern int nd6_debug; @@ -388,23 +377,28 @@ int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *)); void nd6_option_init __P((void *, int, union nd_opts *)); struct nd_opt_hdr *nd6_option __P((union nd_opts *)); int nd6_options __P((union nd_opts *)); -struct rtentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *)); +struct llentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *)); void nd6_setmtu __P((struct ifnet *)); -void nd6_llinfo_settimer __P((struct llinfo_nd6 *, long)); +void nd6_llinfo_settimer __P((struct llentry *, long)); +void nd6_llinfo_settimer_locked __P((struct llentry *, long)); void nd6_timer __P((void *)); void nd6_purge __P((struct ifnet *)); void nd6_nud_hint __P((struct rtentry *, struct in6_addr *, int)); int nd6_resolve __P((struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *)); -void nd6_rtrequest __P((int, struct rtentry *, struct rt_addrinfo *)); int nd6_ioctl __P((u_long, caddr_t, struct ifnet *)); -struct rtentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *, +struct llentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *, char *, int, int, int)); int nd6_output __P((struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *, struct rtentry *)); +int nd6_output_lle __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *, struct llentry *, + struct mbuf **)); +int nd6_output_flush __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *)); int nd6_need_cache __P((struct ifnet *)); int nd6_storelladdr __P((struct ifnet *, struct rtentry *, struct mbuf *, - struct sockaddr *, u_char *)); + struct sockaddr *, u_char *, struct llentry **)); /* nd6_nbr.c */ void nd6_na_input __P((struct mbuf *, int, int)); @@ -412,7 +406,7 @@ void nd6_na_output __P((struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *)); void nd6_ns_input __P((struct mbuf *, int, int)); void nd6_ns_output __P((struct ifnet *, const struct in6_addr *, - const struct in6_addr *, struct llinfo_nd6 *, int)); + const struct in6_addr *, struct llentry *, int)); caddr_t nd6_ifptomac __P((struct ifnet *)); void nd6_dad_start __P((struct ifaddr *, int)); void nd6_dad_stop __P((struct ifaddr *)); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index ecfad0efe8e0..2dcabdb7cdec 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> +#include <sys/lock.h> +#include <sys/rwlock.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -63,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> +#include <net/if_llatbl.h> +#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include <netinet6/in6_var.h> #include <netinet6/in6_ifattach.h> #include <netinet/ip6.h> @@ -167,7 +171,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) src_sa6.sin6_family = AF_INET6; src_sa6.sin6_len = sizeof(src_sa6); src_sa6.sin6_addr = saddr6; - if (!nd6_is_addr_neighbor(&src_sa6, ifp)) { + if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; @@ -378,8 +382,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) * dad - duplicate address detection */ void -nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, - const struct in6_addr *taddr6, struct llinfo_nd6 *ln, int dad) +nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, + const struct in6_addr *taddr6, struct llentry *ln, int dad) { INIT_VNET_INET6(ifp->if_vnet); struct mbuf *m; @@ -470,14 +474,14 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, struct ip6_hdr *hip6; /* hold ip6 */ struct in6_addr *hsrc = NULL; - if (ln && ln->ln_hold) { + if (ln && ln->la_hold) { /* - * assuming every packet in ln_hold has the same IP + * assuming every packet in la_hold has the same IP * header */ - hip6 = mtod(ln->ln_hold, struct ip6_hdr *); + hip6 = mtod(ln->la_hold, struct ip6_hdr *); /* XXX pullup? */ - if (sizeof(*hip6) < ln->ln_hold->m_len) + if (sizeof(*hip6) < ln->la_hold->m_len) hsrc = &hip6->ip6_src; else hsrc = NULL; @@ -600,10 +604,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) char *lladdr = NULL; int lladdrlen = 0; struct ifaddr *ifa; - struct llinfo_nd6 *ln; - struct rtentry *rt; - struct sockaddr_dl *sdl; + struct llentry *ln = NULL; union nd_opts ndopts; + struct mbuf *chain = NULL; + struct sockaddr_in6 sin6; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { @@ -697,35 +701,37 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ - rt = nd6_lookup(&taddr6, 0, ifp); - if ((rt == NULL) || - ((ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) || - ((sdl = SDL(rt->rt_gateway)) == NULL)) + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) { goto freeit; + } if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, * discard the packet. */ - if (ifp->if_addrlen && lladdr == NULL) + if (ifp->if_addrlen && lladdr == NULL) { goto freeit; + } /* * Record link-layer address, and update the state. */ - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer(ln, - (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); + nd6_llinfo_settimer_locked(ln, + (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz); } } else { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } if ((ln->ln_router = is_router) != 0) { /* @@ -744,8 +750,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) if (lladdr == NULL) llchange = 0; else { - if (sdl->sdl_alen) { - if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen)) + if (ln->la_flags & LLE_VALID) { + if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; @@ -779,7 +785,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ if (ln->ln_state == ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } goto freeit; } else if (is_override /* (2a) */ @@ -789,8 +795,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * Update link-local address, if any. */ if (lladdr != NULL) { - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; } /* @@ -802,13 +808,13 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer(ln, + nd6_llinfo_settimer_locked(ln, (long)ND_IFINFO(ifp)->reachable * hz); } } else { if (lladdr != NULL && llchange) { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } } @@ -822,9 +828,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ struct nd_defrouter *dr; struct in6_addr *in6; - int s; - in6 = &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + in6 = &L3_ADDR_SIN6(ln)->sin6_addr; /* * Lock to protect the default router list. @@ -832,8 +837,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * is only called under the network software interrupt * context. However, we keep it just for safety. */ - s = splnet(); - dr = defrouter_lookup(in6, ifp); + dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); if (dr) defrtrlist_del(dr); else if (!V_ip6_forwarding) { @@ -846,21 +850,23 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ rt6_flush(&ip6->ip6_src, ifp); } - splx(s); } ln->ln_router = is_router; } - rt->rt_flags &= ~RTF_REJECT; - ln->ln_asked = 0; - if (ln->ln_hold) { + /* XXX - QL + * Does this matter? + * rt->rt_flags &= ~RTF_REJECT; + */ + ln->la_asked = 0; + if (ln->la_hold) { struct mbuf *m_hold, *m_hold_next; /* - * reset the ln_hold in advance, to explicitly - * prevent a ln_hold lookup in nd6_output() + * reset the la_hold in advance, to explicitly + * prevent a la_hold lookup in nd6_output() * (wouldn't happen, though...) */ - for (m_hold = ln->ln_hold; + for (m_hold = ln->la_hold, ln->la_hold = NULL; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; @@ -868,17 +874,25 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * we assume ifp is not a loopback here, so just set * the 2nd argument as the 1st one. */ - nd6_output(ifp, ifp, m_hold, - (struct sockaddr_in6 *)rt_key(rt), rt); + nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); } - ln->ln_hold = NULL; } - freeit: + if (ln) { + if (chain) + memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); + LLE_WUNLOCK(ln); + + if (chain) + nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + } m_freem(m); return; bad: + if (ln) + LLE_WUNLOCK(ln); + V_icmp6stat.icp6s_badna++; m_freem(m); } diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index c9ed36d29e75..7b4af3cc73bd 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #include <netinet/in.h> +#include <net/if_llatbl.h> #include <netinet6/in6_var.h> #include <netinet6/in6_ifattach.h> #include <netinet/ip6.h> @@ -471,10 +472,8 @@ defrouter_addreq(struct nd_defrouter *new) (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt); if (newrt) { - RT_LOCK(newrt); nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ - RT_REMREF(newrt); - RT_UNLOCK(newrt); + RTFREE(newrt); } if (error == 0) new->installed = 1; @@ -615,8 +614,7 @@ defrouter_select(void) INIT_VNET_INET6(curvnet); int s = splnet(); struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; - struct rtentry *rt = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; /* * This function should be called only when acting as an autoconfigured @@ -648,12 +646,13 @@ defrouter_select(void) */ for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { + IF_AFDATA_LOCK(dr->ifp); if (selected_dr == NULL && - (rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && + (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; } + IF_AFDATA_UNLOCK(dr->ifp); if (dr->installed && installed_dr == NULL) installed_dr = dr; @@ -676,12 +675,14 @@ defrouter_select(void) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); - } else if (installed_dr && - (rt = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && - ND6_IS_LLINFO_PROBREACH(ln) && - rtpref(selected_dr) <= rtpref(installed_dr)) { - selected_dr = installed_dr; + } else if (installed_dr) { + IF_AFDATA_LOCK(installed_dr->ifp); + if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && + ND6_IS_LLINFO_PROBREACH(ln) && + rtpref(selected_dr) <= rtpref(installed_dr)) { + selected_dr = installed_dr; + } + IF_AFDATA_UNLOCK(installed_dr->ifp); } /* @@ -1323,18 +1324,19 @@ static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) { struct nd_pfxrouter *pfxrtr; - struct rtentry *rt; - struct llinfo_nd6 *ln; + struct llentry *ln; for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr; pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { - if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0, + IF_AFDATA_LOCK(pfxrtr->router->ifp); + if ((ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && - ND6_IS_LLINFO_PROBREACH(ln)) + ND6_IS_LLINFO_PROBREACH(ln)) { + IF_AFDATA_UNLOCK(pfxrtr->router->ifp); break; /* found */ + } + IF_AFDATA_UNLOCK(pfxrtr->router->ifp); } - return (pfxrtr); } @@ -1541,8 +1543,10 @@ nd6_prefix_onlink(struct nd_prefix *pr) struct nd_prefix *opr; u_long rtflags; int error = 0; + struct radix_node_head *rnh; struct rtentry *rt = NULL; char ip6buf[INET6_ADDRSTRLEN]; + struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { @@ -1609,21 +1613,24 @@ nd6_prefix_onlink(struct nd_prefix *pr) bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; - rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP; - if (nd6_need_cache(ifp)) { - /* explicitly set in case ifa_flags does not set the flag. */ - rtflags |= RTF_CLONING; - } else { - /* - * explicitly clear the cloning bit in case ifa_flags sets it. - */ - rtflags &= ~RTF_CLONING; - } + rtflags = ifa->ifa_flags | RTF_UP; error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); if (error == 0) { - if (rt != NULL) /* this should be non NULL, though */ + if (rt != NULL) /* this should be non NULL, though */ { + rnh = V_rt_tables[rt->rt_fibnum][AF_INET6]; + RADIX_NODE_HEAD_LOCK(rnh); + RT_LOCK(rt); + if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) { + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = + rt->rt_ifp->if_type; + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = + rt->rt_ifp->if_index; + } + RADIX_NODE_HEAD_UNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); + RT_UNLOCK(rt); + } pr->ndpr_stateflags |= NDPRF_ONLINK; } else { char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN]; diff --git a/sys/netinet6/vinet6.h b/sys/netinet6/vinet6.h index d6c3f336772b..e271d4ff217e 100644 --- a/sys/netinet6/vinet6.h +++ b/sys/netinet6/vinet6.h @@ -78,7 +78,6 @@ struct vnet_inet6 { int _nd6_inuse; int _nd6_allocated; int _nd6_onlink_ns_rfc4861; - struct llinfo_nd6 _llinfo_nd6; struct nd_drhead _nd_defrouter; struct nd_prhead _nd_prefix; struct ifnet * _nd6_defifp; |
