diff options
Diffstat (limited to 'sys/netinet/ip_carp.c')
-rw-r--r-- | sys/netinet/ip_carp.c | 654 |
1 files changed, 568 insertions, 86 deletions
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index ddbc13e7c878..cf7c8d03ca73 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -37,6 +37,7 @@ #include <sys/systm.h> #include <sys/devctl.h> #include <sys/jail.h> +#include <sys/kassert.h> #include <sys/kernel.h> #include <sys/limits.h> #include <sys/malloc.h> @@ -95,6 +96,7 @@ static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ + carp_version_t sc_version; /* carp or VRRPv3 */ struct sockaddr_dl sc_addr; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET @@ -106,10 +108,17 @@ struct carp_softc { struct mtx sc_mtx; int sc_vhid; - int sc_advskew; - int sc_advbase; - struct in_addr sc_carpaddr; - struct in6_addr sc_carpaddr6; + struct { /* CARP specific context */ + int sc_advskew; + int sc_advbase; + struct in_addr sc_carpaddr; + struct in6_addr sc_carpaddr6; + }; + struct { /* VRRPv3 specific context */ + uint8_t sc_vrrp_prio; + uint16_t sc_vrrp_adv_inter; + uint16_t sc_vrrp_master_inter; + }; int sc_naddrs; int sc_naddrs6; @@ -166,6 +175,9 @@ struct carpkreq { /* Everything above this is identical to carpreq */ struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_priority; + uint16_t carpr_vrrp_adv_inter; }; /* @@ -325,6 +337,7 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 0 : ((sc)->sc_advskew + V_carp_demotion))) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); +static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); static struct carp_softc *carp_alloc(struct ifnet *); static void carp_destroy(struct carp_softc *); @@ -339,6 +352,7 @@ static void carp_master_down_locked(struct carp_softc *, const char* reason); static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); +static void vrrp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); @@ -373,7 +387,7 @@ carp_is_supported_if(if_t ifp) static void carp_hmac_prepare(struct carp_softc *sc) { - uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; @@ -478,6 +492,22 @@ carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], return (bcmp(md, md2, sizeof(md2))); } +static int +vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) +{ + uint16_t cksum; + + /* + * Note that VRRPv3 checksums are different from CARP checksums. + * Carp just calculates the checksum over the packet. + * VRRPv3 includes the pseudo-header checksum as well. + */ + cksum = in_cksum_skip(m, off + len, off); + cksum -= phdrcksum; + + return (cksum); +} + /* * process input packet. * we have rearranged checks order compared to the rfc, @@ -489,8 +519,11 @@ carp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); - struct carp_header *ch; - int iplen, len; + struct vrrpv3_header *vh; + int iplen; + int minlen; + int totlen; + uint16_t phdrcksum = 0; iplen = *offp; *mp = NULL; @@ -503,59 +536,93 @@ carp_input(struct mbuf **mp, int *offp, int proto) } iplen = ip->ip_hl << 2; + totlen = ntohs(ip->ip_len); - if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + /* Ensure we have enough header to figure out the version. */ + if (m->m_pkthdr.len < iplen + sizeof(*vh)) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " + CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if (iplen + sizeof(*ch) < m->m_len) { - if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + if (iplen + sizeof(*vh) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { CARPSTATS_INC(carps_hdrops); - CARP_DEBUG("%s: pullup failed\n", __func__); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } - ch = (struct carp_header *)((char *)ip + iplen); + vh = (struct vrrpv3_header *)((char *)ip + iplen); - /* - * verify that the received packet length is - * equal to the CARP header - */ - len = iplen + sizeof(*ch); - if (len > m->m_pkthdr.len) { + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrpv3_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < iplen + minlen) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: packet too short %d on %s\n", __func__, - m->m_pkthdr.len, + CARP_DEBUG("%s: received len %zd < %d " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + iplen + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if ((m = m_pullup(m, len)) == NULL) { - CARPSTATS_INC(carps_hdrops); - return (IPPROTO_DONE); + if (iplen + minlen < m->m_len) { + if ((m = m_pullup(m, iplen + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); + return (IPPROTO_DONE); + } + ip = mtod(m, struct ip *); + vh = (struct vrrpv3_header *)((char *)ip + iplen); } - ip = mtod(m, struct ip *); - ch = (struct carp_header *)((char *)ip + iplen); - /* verify the CARP checksum */ - m->m_data += iplen; - if (in_cksum(m, len - iplen)) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed on %s\n", __func__, - if_name(m->m_pkthdr.rcvif)); - m_freem(m); - return (IPPROTO_DONE); + phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl((u_short)(totlen - iplen) + ip->ip_p)); + + if (vh->vrrp_version == CARP_VERSION_CARP) { + /* verify the CARP checksum */ + m->m_data += iplen; + if (in_cksum(m, totlen - iplen)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= iplen; + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch = (struct carp_header *)((char *)ip + iplen); + carp_input_c(m, ch, AF_INET, ip->ip_ttl); + break; + } + case CARP_VERSION_VRRPv3: + vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, phdrcksum); + break; + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); } - m->m_data -= iplen; - carp_input_c(m, ch, AF_INET, ip->ip_ttl); return (IPPROTO_DONE); } #endif @@ -566,8 +633,9 @@ carp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct carp_header *ch; - u_int len; + struct vrrpv3_header *vh; + u_int len, minlen; + uint16_t phdrcksum = 0; CARPSTATS_INC(carps_ipackets6); @@ -585,10 +653,9 @@ carp6_input(struct mbuf **mp, int *offp, int proto) return (IPPROTO_DONE); } - /* verify that we have a complete carp packet */ - if (m->m_len < *offp + sizeof(*ch)) { + if (m->m_len < *offp + sizeof(*vh)) { len = m->m_len; - m = m_pullup(m, *offp + sizeof(*ch)); + m = m_pullup(m, *offp + sizeof(*vh)); if (m == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); @@ -596,20 +663,72 @@ carp6_input(struct mbuf **mp, int *offp, int proto) } ip6 = mtod(m, struct ip6_hdr *); } - ch = (struct carp_header *)(mtod(m, char *) + *offp); + vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); - /* verify the CARP checksum */ - m->m_data += *offp; - if (in_cksum(m, sizeof(*ch))) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrpv3_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { + CARPSTATS_INC(carps_badlen); + CARP_DEBUG("%s: received len %zd < %zd " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + sizeof(*ip6) + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - m->m_data -= *offp; - carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + if (sizeof (*ip6) + minlen < m->m_len) { + if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); + return (IPPROTO_DONE); + } + ip6 = mtod(m, struct ip6_hdr *); + vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); + } + + phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), ip6->ip6_nxt, 0); + + /* verify the CARP checksum */ + if (vh->vrrp_version == CARP_VERSION_CARP) { + m->m_data += *offp; + if (in_cksum(m, sizeof(struct carp_header))) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= *offp; + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); + carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + break; + } + case CARP_VERSION_VRRPv3: + vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, ntohs(ip6->ip6_plen), + phdrcksum); + break; + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); + } return (IPPROTO_DONE); } #endif /* INET6 */ @@ -629,7 +748,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto) * The VHID test is outside this mini-function. */ static int -carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) +carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) { #ifdef INET struct ip *ip4; @@ -659,16 +778,12 @@ carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) return (0); } -static void -carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +static struct ifaddr * +carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa, *match; - struct carp_softc *sc; - uint64_t tmp_counter; - struct timeval sc_tv, ch_tv; int error; - bool multicast = false; NET_EPOCH_ASSERT(); @@ -688,9 +803,9 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) IFNET_FOREACH_IFA(ifp, ifa) { if (match == NULL && ifa->ifa_carp != NULL && ifa->ifa_addr->sa_family == af && - ifa->ifa_carp->sc_vhid == ch->carp_vhid) + ifa->ifa_carp->sc_vhid == vhid) match = ifa; - if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) + if (vhid == 0 && carp_source_is_self(m, ifa, af)) error = ELOOP; } ifa = error ? NULL : match; @@ -705,12 +820,37 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) } else { CARPSTATS_INC(carps_badvhid); } + } + + return (ifa); +} + +static void +carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + uint64_t tmp_counter; + struct timeval sc_tv, ch_tv; + bool multicast = false; + + NET_EPOCH_ASSERT(); + + ifa = carp_find_ifa(m, af, ch->carp_vhid); + if (ifa == NULL) { m_freem(m); return; } + sc = ifa->ifa_carp; + CARP_LOCK(sc); + /* verify the CARP version. */ - if (ch->carp_version != CARP_VERSION) { + if (ch->carp_version != CARP_VERSION_CARP || + sc->sc_version != CARP_VERSION_CARP) { + CARP_UNLOCK(sc); + CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), ch->carp_version); @@ -719,8 +859,6 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) return; } - sc = ifa->ifa_carp; - CARP_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); } else { @@ -809,11 +947,130 @@ out: m_freem(m); } +static void +vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, + int len, uint16_t phdrcksum) +{ + struct vrrpv3_header *vh = mtodo(m, off); + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + + NET_EPOCH_ASSERT(); + + ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); + if (ifa == NULL) { + m_freem(m); + return; + } + + sc = ifa->ifa_carp; + CARP_LOCK(sc); + + ifa_free(ifa); + + /* verify the CARP version. */ + if (vh->vrrp_version != CARP_VERSION_VRRPv3 || sc->sc_version != CARP_VERSION_VRRPv3) { + CARP_UNLOCK(sc); + + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), + vh->vrrp_version); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ttl != CARP_DFLTTL) { + CARPSTATS_INC(carps_badttl); + CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, + ttl, if_name(m->m_pkthdr.rcvif)); + goto out; + } + + if (vrrp_checksum_verify(m, off, len, phdrcksum)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, + sc->sc_vhid, if_name(ifp)); + goto out; + } + + /* RFC9568, 7.1 Receiving VRRP packets. */ + if (sc->sc_vrrp_prio == 255) { + CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", + __func__); + goto out; + } + + /* XXX TODO Check IP address payload. */ + + sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + * Same if the peer has a higher priority than us. + */ + if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || + vh->vrrp_priority > sc->sc_vrrp_prio) { + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP, + "more frequent advertisement received"); + carp_setrun(sc, 0); + carp_delroute(sc); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter + || vh->vrrp_priority < sc->sc_vrrp_prio)) { + carp_master_down_locked(sc, + "preempting a slower master"); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + +out: + CARP_UNLOCK(sc); + m_freem(m); +} + static int -carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +carp_tag(struct carp_softc *sc, struct mbuf *m) { struct m_tag *mtag; + /* Tag packet for carp_output */ + if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), + M_NOWAIT)) == NULL) { + m_freem(m); + CARPSTATS_INC(carps_onomem); + return (ENOMEM); + } + bcopy(&sc, mtag + 1, sizeof(sc)); + m_tag_prepend(m, mtag); + + return (0); +} + +static int +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); @@ -827,17 +1084,7 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); - /* Tag packet for carp_output */ - if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), - M_NOWAIT)) == NULL) { - m_freem(m); - CARPSTATS_INC(carps_onomem); - return (ENOMEM); - } - bcopy(&sc, mtag + 1, sizeof(sc)); - m_tag_prepend(m, mtag); - - return (0); + return (carp_tag(sc, m)); } /* @@ -959,11 +1206,16 @@ carp_send_ad_locked(struct carp_softc *sc) NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); + if (sc->sc_version == CARP_VERSION_VRRPv3) { + vrrp_send_ad_locked(sc); + return; + } + advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; - ch.carp_version = CARP_VERSION; + ch.carp_version = CARP_VERSION_CARP; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; @@ -1091,6 +1343,187 @@ resched: } static void +vrrp_send_ad_locked(struct carp_softc *sc) +{ + struct vrrpv3_header *vh_ptr; + struct ifaddr *ifa; + struct mbuf *m; + int len; + struct vrrpv3_header vh = { + .vrrp_version = CARP_VERSION_VRRPv3, + .vrrp_type = VRRP_TYPE_ADVERTISEMENT, + .vrrp_vrtid = sc->sc_vhid, + .vrrp_priority = sc->sc_vrrp_prio, + .vrrp_count_addr = 0, + .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), + .vrrp_checksum = 0, + }; + + NET_EPOCH_ASSERT(); + CARP_LOCK_ASSERT(sc); + MPASS(sc->sc_version == CARP_VERSION_VRRPv3); + +#ifdef INET + if (sc->sc_naddrs) { + struct ip *ip; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; + ip->ip_off = htons(IP_DF); + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip_fillid(ip); + + ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); + if (ifa != NULL) { + ip->ip_src.s_addr = + ifatoia(ifa)->ia_addr.sin_addr.s_addr; + ifa_free(ifa); + } else + ip->ip_src.s_addr = 0; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + /* Include the IP addresses in the announcement. */ + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in *in; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) + continue; + + in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in->sin_addr), + (caddr_t)&in->sin_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in->sin_addr); + } + ip->ip_len = htons(len); + + vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); + + if (carp_tag(sc, m)) + goto resched; + + CARPSTATS_INC(carps_opackets); + + carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, + &sc->sc_carpdev->if_carp->cif_imo, NULL)); + } +#endif +#ifdef INET6 + if (sc->sc_naddrs6) { + struct ip6_hdr *ip6; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip6) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + /* Traffic class isn't defined in ip6 struct instead + * it gets offset into flowid field */ + ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + + IPTOS_DSCP_OFFSET)); + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + + /* set the source address */ + ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); + if (ifa != NULL) { + bcopy(IFA_IN6(ifa), &ip6->ip6_src, + sizeof(struct in6_addr)); + ifa_free(ifa); + } else + /* This should never happen with IPv6. */ + bzero(&ip6->ip6_src, sizeof(struct in6_addr)); + + /* Set the multicast destination. */ + bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); + ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + ip6->ip6_dst.s6_addr8[15] = 0x12; + + /* Include the IP addresses in the announcement. */ + len = sizeof(vh); + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in6 *in6; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in6->sin6_addr), + (char *)&in6->sin6_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in6->sin6_addr); + } + ip6->ip6_plen = htonl(len); + + vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); + + if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { + m_freem(m); + CARP_DEBUG("%s: in6_setscope failed\n", __func__); + goto resched; + } + + if (carp_tag(sc, m)) + goto resched; + CARPSTATS_INC(carps_opackets6); + + carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, + &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); + } +#endif + +resched: + callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, + carp_send_ad, sc); +} + +static void carp_addroute(struct carp_softc *sc) { struct ifaddr *ifa; @@ -1357,6 +1790,7 @@ static void carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; + int timeout; CARP_LOCK_ASSERT(sc); @@ -1373,40 +1807,57 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) break; case BACKUP: callout_stop(&sc->sc_ad_tmo); - tv.tv_sec = 3 * sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; + + if (sc->sc_version == CARP_VERSION_CARP) { + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + timeout = tvtohz(&tv); + } else { + /* skew time */ + timeout = (256 - sc->sc_vrrp_prio) * sc->sc_vrrp_master_inter / 256; + timeout += (3 * sc->sc_vrrp_master_inter); + timeout *= hz; + timeout /= 100; /* master interval is in centiseconds. */ + } + switch (af) { #ifdef INET case AF_INET: - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); #endif break; } break; case MASTER: - tv.tv_sec = sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; - callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), - carp_send_ad, sc); + if (sc->sc_version == CARP_VERSION_CARP) { + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + } else { + callout_reset(&sc->sc_ad_tmo, + sc->sc_vrrp_adv_inter * hz / 100, + carp_send_ad, sc); + } break; } } @@ -1634,6 +2085,7 @@ carp_alloc(struct ifnet *ifp) sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + sc->sc_version = CARP_VERSION_CARP; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_init_counter = 1; @@ -1647,6 +2099,10 @@ carp_alloc(struct ifnet *ifp) sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; sc->sc_carpaddr6.s6_addr8[15] = 0x12; + sc->sc_vrrp_adv_inter = 100; + sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; + sc->sc_vrrp_prio = 100; + CARP_LOCK_INIT(sc); #ifdef INET callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); @@ -1788,9 +2244,10 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr) struct carp_softc *sc = NULL; int error = 0; - if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID || - carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) { + carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0 || + (carpr->carpr_version != CARP_VERSION_CARP && + carpr->carpr_version != CARP_VERSION_VRRPv3)) { return (EINVAL); } @@ -1811,6 +2268,7 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr) LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; } else CARP_LOCK(sc); + sc->sc_version = carpr->carpr_version; if (carpr->carpr_advbase > 0) { if (carpr->carpr_advbase > 255 || carpr->carpr_advbase < CARP_DFLTINTV) { @@ -1834,6 +2292,11 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr) bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); carp_hmac_prepare(sc); } + if (carpr->carpr_vrrp_priority != 0) + sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; + if (carpr->carpr_vrrp_adv_inter) + sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; + if (sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { switch (carpr->carpr_state) { @@ -2283,6 +2746,9 @@ carp_nl_send(void *arg, struct carp_softc *sc, int priv) nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); + nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); + nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); + nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, sc->sc_vrrp_adv_inter); if (priv) nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); @@ -2307,6 +2773,9 @@ struct nl_carp_parsed { char key[CARP_KEY_LEN]; struct in_addr addr; struct in6_addr addr6; + carp_version_t version; + uint8_t vrrp_prio; + uint16_t vrrp_adv_inter; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) @@ -2322,6 +2791,9 @@ static const struct nlattr_parser nla_p_set[] = { { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, + { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, }; static const struct nlfield_parser nlf_p_set[] = { }; @@ -2399,6 +2871,13 @@ carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) return (EINVAL); if (attrs.advskew >= 255) return (EINVAL); + if (attrs.version == 0) + attrs.version = CARP_VERSION_CARP; + if (attrs.version != CARP_VERSION_CARP && + attrs.version != CARP_VERSION_VRRPv3) + return (EINVAL); + if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) + return (EINVAL); NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) @@ -2422,6 +2901,9 @@ carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) carpr.carpr_advskew = attrs.advskew; carpr.carpr_addr = attrs.addr; carpr.carpr_addr6 = attrs.addr6; + carpr.carpr_version = attrs.version; + carpr.carpr_vrrp_priority = attrs.vrrp_prio; + carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); |