aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/ip_carp.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/ip_carp.c')
-rw-r--r--sys/netinet/ip_carp.c654
1 files changed, 568 insertions, 86 deletions
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index ddbc13e7c878..cf7c8d03ca73 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -37,6 +37,7 @@
#include <sys/systm.h>
#include <sys/devctl.h>
#include <sys/jail.h>
+#include <sys/kassert.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/malloc.h>
@@ -95,6 +96,7 @@ static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
struct carp_softc {
struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */
struct ifaddr **sc_ifas; /* Our ifaddrs. */
+ carp_version_t sc_version; /* carp or VRRPv3 */
struct sockaddr_dl sc_addr; /* Our link level address. */
struct callout sc_ad_tmo; /* Advertising timeout. */
#ifdef INET
@@ -106,10 +108,17 @@ struct carp_softc {
struct mtx sc_mtx;
int sc_vhid;
- int sc_advskew;
- int sc_advbase;
- struct in_addr sc_carpaddr;
- struct in6_addr sc_carpaddr6;
+ struct { /* CARP specific context */
+ int sc_advskew;
+ int sc_advbase;
+ struct in_addr sc_carpaddr;
+ struct in6_addr sc_carpaddr6;
+ };
+ struct { /* VRRPv3 specific context */
+ uint8_t sc_vrrp_prio;
+ uint16_t sc_vrrp_adv_inter;
+ uint16_t sc_vrrp_master_inter;
+ };
int sc_naddrs;
int sc_naddrs6;
@@ -166,6 +175,9 @@ struct carpkreq {
/* Everything above this is identical to carpreq */
struct in_addr carpr_addr;
struct in6_addr carpr_addr6;
+ carp_version_t carpr_version;
+ uint8_t carpr_vrrp_priority;
+ uint16_t carpr_vrrp_adv_inter;
};
/*
@@ -325,6 +337,7 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
0 : ((sc)->sc_advskew + V_carp_demotion)))
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int);
+static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t);
static struct carp_softc
*carp_alloc(struct ifnet *);
static void carp_destroy(struct carp_softc *);
@@ -339,6 +352,7 @@ static void carp_master_down_locked(struct carp_softc *,
const char* reason);
static void carp_send_ad(void *);
static void carp_send_ad_locked(struct carp_softc *);
+static void vrrp_send_ad_locked(struct carp_softc *);
static void carp_addroute(struct carp_softc *);
static void carp_ifa_addroute(struct ifaddr *);
static void carp_delroute(struct carp_softc *);
@@ -373,7 +387,7 @@ carp_is_supported_if(if_t ifp)
static void
carp_hmac_prepare(struct carp_softc *sc)
{
- uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT;
uint8_t vhid = sc->sc_vhid & 0xff;
struct ifaddr *ifa;
int i, found;
@@ -478,6 +492,22 @@ carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
return (bcmp(md, md2, sizeof(md2)));
}
+static int
+vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum)
+{
+ uint16_t cksum;
+
+ /*
+ * Note that VRRPv3 checksums are different from CARP checksums.
+ * Carp just calculates the checksum over the packet.
+ * VRRPv3 includes the pseudo-header checksum as well.
+ */
+ cksum = in_cksum_skip(m, off + len, off);
+ cksum -= phdrcksum;
+
+ return (cksum);
+}
+
/*
* process input packet.
* we have rearranged checks order compared to the rfc,
@@ -489,8 +519,11 @@ carp_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
- struct carp_header *ch;
- int iplen, len;
+ struct vrrpv3_header *vh;
+ int iplen;
+ int minlen;
+ int totlen;
+ uint16_t phdrcksum = 0;
iplen = *offp;
*mp = NULL;
@@ -503,59 +536,93 @@ carp_input(struct mbuf **mp, int *offp, int proto)
}
iplen = ip->ip_hl << 2;
+ totlen = ntohs(ip->ip_len);
- if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
+ /* Ensure we have enough header to figure out the version. */
+ if (m->m_pkthdr.len < iplen + sizeof(*vh)) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
+ CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) "
"on %s\n", __func__, m->m_len - sizeof(struct ip),
if_name(m->m_pkthdr.rcvif));
m_freem(m);
return (IPPROTO_DONE);
}
- if (iplen + sizeof(*ch) < m->m_len) {
- if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
+ if (iplen + sizeof(*vh) < m->m_len) {
+ if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) {
CARPSTATS_INC(carps_hdrops);
- CARP_DEBUG("%s: pullup failed\n", __func__);
+ CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__);
return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
}
- ch = (struct carp_header *)((char *)ip + iplen);
+ vh = (struct vrrpv3_header *)((char *)ip + iplen);
- /*
- * verify that the received packet length is
- * equal to the CARP header
- */
- len = iplen + sizeof(*ch);
- if (len > m->m_pkthdr.len) {
+ switch (vh->vrrp_version) {
+ case CARP_VERSION_CARP:
+ minlen = sizeof(struct carp_header);
+ break;
+ case CARP_VERSION_VRRPv3:
+ minlen = sizeof(struct vrrpv3_header);
+ break;
+ default:
+ CARPSTATS_INC(carps_badver);
+ CARP_DEBUG("%s: unsupported version %d on %s\n", __func__,
+ vh->vrrp_version, if_name(m->m_pkthdr.rcvif));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* And now check the length again but with the real minimal length. */
+ if (m->m_pkthdr.len < iplen + minlen) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
- m->m_pkthdr.len,
+ CARP_DEBUG("%s: received len %zd < %d "
+ "on %s\n", __func__, m->m_len - sizeof(struct ip),
+ iplen + minlen,
if_name(m->m_pkthdr.rcvif));
m_freem(m);
return (IPPROTO_DONE);
}
- if ((m = m_pullup(m, len)) == NULL) {
- CARPSTATS_INC(carps_hdrops);
- return (IPPROTO_DONE);
+ if (iplen + minlen < m->m_len) {
+ if ((m = m_pullup(m, iplen + minlen)) == NULL) {
+ CARPSTATS_INC(carps_hdrops);
+ CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__);
+ return (IPPROTO_DONE);
+ }
+ ip = mtod(m, struct ip *);
+ vh = (struct vrrpv3_header *)((char *)ip + iplen);
}
- ip = mtod(m, struct ip *);
- ch = (struct carp_header *)((char *)ip + iplen);
- /* verify the CARP checksum */
- m->m_data += iplen;
- if (in_cksum(m, len - iplen)) {
- CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("%s: checksum failed on %s\n", __func__,
- if_name(m->m_pkthdr.rcvif));
- m_freem(m);
- return (IPPROTO_DONE);
+ phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htonl((u_short)(totlen - iplen) + ip->ip_p));
+
+ if (vh->vrrp_version == CARP_VERSION_CARP) {
+ /* verify the CARP checksum */
+ m->m_data += iplen;
+ if (in_cksum(m, totlen - iplen)) {
+ CARPSTATS_INC(carps_badsum);
+ CARP_DEBUG("%s: checksum failed on %s\n", __func__,
+ if_name(m->m_pkthdr.rcvif));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+ m->m_data -= iplen;
+ }
+
+ switch (vh->vrrp_version) {
+ case CARP_VERSION_CARP: {
+ struct carp_header *ch = (struct carp_header *)((char *)ip + iplen);
+ carp_input_c(m, ch, AF_INET, ip->ip_ttl);
+ break;
+ }
+ case CARP_VERSION_VRRPv3:
+ vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, phdrcksum);
+ break;
+ default:
+ KASSERT(false, ("Unsupported version %d", vh->vrrp_version));
}
- m->m_data -= iplen;
- carp_input_c(m, ch, AF_INET, ip->ip_ttl);
return (IPPROTO_DONE);
}
#endif
@@ -566,8 +633,9 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct carp_header *ch;
- u_int len;
+ struct vrrpv3_header *vh;
+ u_int len, minlen;
+ uint16_t phdrcksum = 0;
CARPSTATS_INC(carps_ipackets6);
@@ -585,10 +653,9 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
- /* verify that we have a complete carp packet */
- if (m->m_len < *offp + sizeof(*ch)) {
+ if (m->m_len < *offp + sizeof(*vh)) {
len = m->m_len;
- m = m_pullup(m, *offp + sizeof(*ch));
+ m = m_pullup(m, *offp + sizeof(*vh));
if (m == NULL) {
CARPSTATS_INC(carps_badlen);
CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
@@ -596,20 +663,72 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
}
ip6 = mtod(m, struct ip6_hdr *);
}
- ch = (struct carp_header *)(mtod(m, char *) + *offp);
+ vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp);
- /* verify the CARP checksum */
- m->m_data += *offp;
- if (in_cksum(m, sizeof(*ch))) {
- CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
+ switch (vh->vrrp_version) {
+ case CARP_VERSION_CARP:
+ minlen = sizeof(struct carp_header);
+ break;
+ case CARP_VERSION_VRRPv3:
+ minlen = sizeof(struct vrrpv3_header);
+ break;
+ default:
+ CARPSTATS_INC(carps_badver);
+ CARP_DEBUG("%s: unsupported version %d on %s\n", __func__,
+ vh->vrrp_version, if_name(m->m_pkthdr.rcvif));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* And now check the length again but with the real minimal length. */
+ if (m->m_pkthdr.len < sizeof(*ip6) + minlen) {
+ CARPSTATS_INC(carps_badlen);
+ CARP_DEBUG("%s: received len %zd < %zd "
+ "on %s\n", __func__, m->m_len - sizeof(struct ip),
+ sizeof(*ip6) + minlen,
if_name(m->m_pkthdr.rcvif));
m_freem(m);
return (IPPROTO_DONE);
}
- m->m_data -= *offp;
- carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
+ if (sizeof (*ip6) + minlen < m->m_len) {
+ if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) {
+ CARPSTATS_INC(carps_hdrops);
+ CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__);
+ return (IPPROTO_DONE);
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6));
+ }
+
+ phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), ip6->ip6_nxt, 0);
+
+ /* verify the CARP checksum */
+ if (vh->vrrp_version == CARP_VERSION_CARP) {
+ m->m_data += *offp;
+ if (in_cksum(m, sizeof(struct carp_header))) {
+ CARPSTATS_INC(carps_badsum);
+ CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
+ if_name(m->m_pkthdr.rcvif));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+ m->m_data -= *offp;
+ }
+
+ switch (vh->vrrp_version) {
+ case CARP_VERSION_CARP: {
+ struct carp_header *ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6));
+ carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
+ break;
+ }
+ case CARP_VERSION_VRRPv3:
+ vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, ntohs(ip6->ip6_plen),
+ phdrcksum);
+ break;
+ default:
+ KASSERT(false, ("Unsupported version %d", vh->vrrp_version));
+ }
return (IPPROTO_DONE);
}
#endif /* INET6 */
@@ -629,7 +748,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
* The VHID test is outside this mini-function.
*/
static int
-carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
+carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
{
#ifdef INET
struct ip *ip4;
@@ -659,16 +778,12 @@ carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
return (0);
}
-static void
-carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
+static struct ifaddr *
+carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ifaddr *ifa, *match;
- struct carp_softc *sc;
- uint64_t tmp_counter;
- struct timeval sc_tv, ch_tv;
int error;
- bool multicast = false;
NET_EPOCH_ASSERT();
@@ -688,9 +803,9 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
IFNET_FOREACH_IFA(ifp, ifa) {
if (match == NULL && ifa->ifa_carp != NULL &&
ifa->ifa_addr->sa_family == af &&
- ifa->ifa_carp->sc_vhid == ch->carp_vhid)
+ ifa->ifa_carp->sc_vhid == vhid)
match = ifa;
- if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
+ if (vhid == 0 && carp_source_is_self(m, ifa, af))
error = ELOOP;
}
ifa = error ? NULL : match;
@@ -705,12 +820,37 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
} else {
CARPSTATS_INC(carps_badvhid);
}
+ }
+
+ return (ifa);
+}
+
+static void
+carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
+{
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct ifaddr *ifa;
+ struct carp_softc *sc;
+ uint64_t tmp_counter;
+ struct timeval sc_tv, ch_tv;
+ bool multicast = false;
+
+ NET_EPOCH_ASSERT();
+
+ ifa = carp_find_ifa(m, af, ch->carp_vhid);
+ if (ifa == NULL) {
m_freem(m);
return;
}
+ sc = ifa->ifa_carp;
+ CARP_LOCK(sc);
+
/* verify the CARP version. */
- if (ch->carp_version != CARP_VERSION) {
+ if (ch->carp_version != CARP_VERSION_CARP ||
+ sc->sc_version != CARP_VERSION_CARP) {
+ CARP_UNLOCK(sc);
+
CARPSTATS_INC(carps_badver);
CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
ch->carp_version);
@@ -719,8 +859,6 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
return;
}
- sc = ifa->ifa_carp;
- CARP_LOCK(sc);
if (ifa->ifa_addr->sa_family == AF_INET) {
multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr);
} else {
@@ -809,11 +947,130 @@ out:
m_freem(m);
}
+static void
+vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl,
+ int len, uint16_t phdrcksum)
+{
+ struct vrrpv3_header *vh = mtodo(m, off);
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct ifaddr *ifa;
+ struct carp_softc *sc;
+
+ NET_EPOCH_ASSERT();
+
+ ifa = carp_find_ifa(m, af, vh->vrrp_vrtid);
+ if (ifa == NULL) {
+ m_freem(m);
+ return;
+ }
+
+ sc = ifa->ifa_carp;
+ CARP_LOCK(sc);
+
+ ifa_free(ifa);
+
+ /* verify the CARP version. */
+ if (vh->vrrp_version != CARP_VERSION_VRRPv3 || sc->sc_version != CARP_VERSION_VRRPv3) {
+ CARP_UNLOCK(sc);
+
+ CARPSTATS_INC(carps_badver);
+ CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
+ vh->vrrp_version);
+ m_freem(m);
+ return;
+ }
+
+ /* verify that the IP TTL is 255. */
+ if (ttl != CARP_DFLTTL) {
+ CARPSTATS_INC(carps_badttl);
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
+ ttl, if_name(m->m_pkthdr.rcvif));
+ goto out;
+ }
+
+ if (vrrp_checksum_verify(m, off, len, phdrcksum)) {
+ CARPSTATS_INC(carps_badsum);
+ CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__,
+ sc->sc_vhid, if_name(ifp));
+ goto out;
+ }
+
+ /* RFC9568, 7.1 Receiving VRRP packets. */
+ if (sc->sc_vrrp_prio == 255) {
+ CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n",
+ __func__);
+ goto out;
+ }
+
+ /* XXX TODO Check IP address payload. */
+
+ sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int);
+
+ switch (sc->sc_state) {
+ case INIT:
+ break;
+ case MASTER:
+ /*
+ * If we receive an advertisement from a master who's going to
+ * be more frequent than us, go into BACKUP state.
+ * Same if the peer has a higher priority than us.
+ */
+ if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter ||
+ vh->vrrp_priority > sc->sc_vrrp_prio) {
+ callout_stop(&sc->sc_ad_tmo);
+ carp_set_state(sc, BACKUP,
+ "more frequent advertisement received");
+ carp_setrun(sc, 0);
+ carp_delroute(sc);
+ }
+ break;
+ case BACKUP:
+ /*
+ * If we're pre-empting masters who advertise slower than us,
+ * and this one claims to be slower, treat him as down.
+ */
+ if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter
+ || vh->vrrp_priority < sc->sc_vrrp_prio)) {
+ carp_master_down_locked(sc,
+ "preempting a slower master");
+ break;
+ }
+
+ /*
+ * Otherwise, we reset the counter and wait for the next
+ * advertisement.
+ */
+ carp_setrun(sc, af);
+ break;
+ }
+
+out:
+ CARP_UNLOCK(sc);
+ m_freem(m);
+}
+
static int
-carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+carp_tag(struct carp_softc *sc, struct mbuf *m)
{
struct m_tag *mtag;
+ /* Tag packet for carp_output */
+ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
+ M_NOWAIT)) == NULL) {
+ m_freem(m);
+ CARPSTATS_INC(carps_onomem);
+ return (ENOMEM);
+ }
+ bcopy(&sc, mtag + 1, sizeof(sc));
+ m_tag_prepend(m, mtag);
+
+ return (0);
+}
+
+static int
+carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+{
+
if (sc->sc_init_counter) {
/* this could also be seconds since unix epoch */
sc->sc_counter = arc4random();
@@ -827,17 +1084,7 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
- /* Tag packet for carp_output */
- if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
- M_NOWAIT)) == NULL) {
- m_freem(m);
- CARPSTATS_INC(carps_onomem);
- return (ENOMEM);
- }
- bcopy(&sc, mtag + 1, sizeof(sc));
- m_tag_prepend(m, mtag);
-
- return (0);
+ return (carp_tag(sc, m));
}
/*
@@ -959,11 +1206,16 @@ carp_send_ad_locked(struct carp_softc *sc)
NET_EPOCH_ASSERT();
CARP_LOCK_ASSERT(sc);
+ if (sc->sc_version == CARP_VERSION_VRRPv3) {
+ vrrp_send_ad_locked(sc);
+ return;
+ }
+
advskew = DEMOTE_ADVSKEW(sc);
tv.tv_sec = sc->sc_advbase;
tv.tv_usec = advskew * 1000000 / 256;
- ch.carp_version = CARP_VERSION;
+ ch.carp_version = CARP_VERSION_CARP;
ch.carp_type = CARP_ADVERTISEMENT;
ch.carp_vhid = sc->sc_vhid;
ch.carp_advbase = sc->sc_advbase;
@@ -1091,6 +1343,187 @@ resched:
}
static void
+vrrp_send_ad_locked(struct carp_softc *sc)
+{
+ struct vrrpv3_header *vh_ptr;
+ struct ifaddr *ifa;
+ struct mbuf *m;
+ int len;
+ struct vrrpv3_header vh = {
+ .vrrp_version = CARP_VERSION_VRRPv3,
+ .vrrp_type = VRRP_TYPE_ADVERTISEMENT,
+ .vrrp_vrtid = sc->sc_vhid,
+ .vrrp_priority = sc->sc_vrrp_prio,
+ .vrrp_count_addr = 0,
+ .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter),
+ .vrrp_checksum = 0,
+ };
+
+ NET_EPOCH_ASSERT();
+ CARP_LOCK_ASSERT(sc);
+ MPASS(sc->sc_version == CARP_VERSION_VRRPv3);
+
+#ifdef INET
+ if (sc->sc_naddrs) {
+ struct ip *ip;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ CARPSTATS_INC(carps_onomem);
+ goto resched;
+ }
+ len = sizeof(*ip) + sizeof(vh);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ M_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
+ ip->ip_off = htons(IP_DF);
+ ip->ip_ttl = CARP_DFLTTL;
+ ip->ip_p = IPPROTO_CARP;
+ ip->ip_sum = 0;
+ ip_fillid(ip);
+
+ ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
+ if (ifa != NULL) {
+ ip->ip_src.s_addr =
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+ ifa_free(ifa);
+ } else
+ ip->ip_src.s_addr = 0;
+ ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
+
+ /* Include the IP addresses in the announcement. */
+ for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) {
+ struct sockaddr_in *in;
+
+ MPASS(sc->sc_ifas[i] != NULL);
+ if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET)
+ continue;
+
+ in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr;
+
+ if (m_append(m, sizeof(in->sin_addr),
+ (caddr_t)&in->sin_addr) != 1) {
+ m_freem(m);
+ goto resched;
+ }
+
+ vh.vrrp_count_addr++;
+ len += sizeof(in->sin_addr);
+ }
+ ip->ip_len = htons(len);
+
+ vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip));
+ bcopy(&vh, vh_ptr, sizeof(vh));
+
+ vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr,
+ htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p));
+ vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip));
+
+ if (carp_tag(sc, m))
+ goto resched;
+
+ CARPSTATS_INC(carps_opackets);
+
+ carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
+ &sc->sc_carpdev->if_carp->cif_imo, NULL));
+ }
+#endif
+#ifdef INET6
+ if (sc->sc_naddrs6) {
+ struct ip6_hdr *ip6;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ CARPSTATS_INC(carps_onomem);
+ goto resched;
+ }
+ len = sizeof(*ip6) + sizeof(vh);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ M_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip6 = mtod(m, struct ip6_hdr *);
+ bzero(ip6, sizeof(*ip6));
+ ip6->ip6_vfc |= IPV6_VERSION;
+ /* Traffic class isn't defined in ip6 struct instead
+ * it gets offset into flowid field */
+ ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
+ IPTOS_DSCP_OFFSET));
+ ip6->ip6_hlim = CARP_DFLTTL;
+ ip6->ip6_nxt = IPPROTO_CARP;
+
+ /* set the source address */
+ ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
+ if (ifa != NULL) {
+ bcopy(IFA_IN6(ifa), &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ ifa_free(ifa);
+ } else
+ /* This should never happen with IPv6. */
+ bzero(&ip6->ip6_src, sizeof(struct in6_addr));
+
+ /* Set the multicast destination. */
+ bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst));
+ ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
+ ip6->ip6_dst.s6_addr8[15] = 0x12;
+
+ /* Include the IP addresses in the announcement. */
+ len = sizeof(vh);
+ for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) {
+ struct sockaddr_in6 *in6;
+
+ MPASS(sc->sc_ifas[i] != NULL);
+ if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr;
+
+ if (m_append(m, sizeof(in6->sin6_addr),
+ (char *)&in6->sin6_addr) != 1) {
+ m_freem(m);
+ goto resched;
+ }
+
+ vh.vrrp_count_addr++;
+ len += sizeof(in6->sin6_addr);
+ }
+ ip6->ip6_plen = htonl(len);
+
+ vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6));
+ bcopy(&vh, vh_ptr, sizeof(vh));
+
+ vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0);
+ vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6));
+
+ if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+ m_freem(m);
+ CARP_DEBUG("%s: in6_setscope failed\n", __func__);
+ goto resched;
+ }
+
+ if (carp_tag(sc, m))
+ goto resched;
+ CARPSTATS_INC(carps_opackets6);
+
+ carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
+ &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
+ }
+#endif
+
+resched:
+ callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100,
+ carp_send_ad, sc);
+}
+
+static void
carp_addroute(struct carp_softc *sc)
{
struct ifaddr *ifa;
@@ -1357,6 +1790,7 @@ static void
carp_setrun(struct carp_softc *sc, sa_family_t af)
{
struct timeval tv;
+ int timeout;
CARP_LOCK_ASSERT(sc);
@@ -1373,40 +1807,57 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
break;
case BACKUP:
callout_stop(&sc->sc_ad_tmo);
- tv.tv_sec = 3 * sc->sc_advbase;
- tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+
+ if (sc->sc_version == CARP_VERSION_CARP) {
+ tv.tv_sec = 3 * sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ timeout = tvtohz(&tv);
+ } else {
+ /* skew time */
+ timeout = (256 - sc->sc_vrrp_prio) * sc->sc_vrrp_master_inter / 256;
+ timeout += (3 * sc->sc_vrrp_master_inter);
+ timeout *= hz;
+ timeout /= 100; /* master interval is in centiseconds. */
+ }
+
switch (af) {
#ifdef INET
case AF_INET:
- callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+ callout_reset(&sc->sc_md_tmo, timeout,
carp_master_down, sc);
break;
#endif
#ifdef INET6
case AF_INET6:
- callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+ callout_reset(&sc->sc_md6_tmo, timeout,
carp_master_down, sc);
break;
#endif
default:
#ifdef INET
if (sc->sc_naddrs)
- callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+ callout_reset(&sc->sc_md_tmo, timeout,
carp_master_down, sc);
#endif
#ifdef INET6
if (sc->sc_naddrs6)
- callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+ callout_reset(&sc->sc_md6_tmo, timeout,
carp_master_down, sc);
#endif
break;
}
break;
case MASTER:
- tv.tv_sec = sc->sc_advbase;
- tv.tv_usec = sc->sc_advskew * 1000000 / 256;
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
+ if (sc->sc_version == CARP_VERSION_CARP) {
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
+ } else {
+ callout_reset(&sc->sc_ad_tmo,
+ sc->sc_vrrp_adv_inter * hz / 100,
+ carp_send_ad, sc);
+ }
break;
}
}
@@ -1634,6 +2085,7 @@ carp_alloc(struct ifnet *ifp)
sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
+ sc->sc_version = CARP_VERSION_CARP;
sc->sc_advbase = CARP_DFLTINTV;
sc->sc_vhid = -1; /* required setting */
sc->sc_init_counter = 1;
@@ -1647,6 +2099,10 @@ carp_alloc(struct ifnet *ifp)
sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
sc->sc_carpaddr6.s6_addr8[15] = 0x12;
+ sc->sc_vrrp_adv_inter = 100;
+ sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter;
+ sc->sc_vrrp_prio = 100;
+
CARP_LOCK_INIT(sc);
#ifdef INET
callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
@@ -1788,9 +2244,10 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
struct carp_softc *sc = NULL;
int error = 0;
-
if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID ||
- carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) {
+ carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0 ||
+ (carpr->carpr_version != CARP_VERSION_CARP &&
+ carpr->carpr_version != CARP_VERSION_VRRPv3)) {
return (EINVAL);
}
@@ -1811,6 +2268,7 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
} else
CARP_LOCK(sc);
+ sc->sc_version = carpr->carpr_version;
if (carpr->carpr_advbase > 0) {
if (carpr->carpr_advbase > 255 ||
carpr->carpr_advbase < CARP_DFLTINTV) {
@@ -1834,6 +2292,11 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
carp_hmac_prepare(sc);
}
+ if (carpr->carpr_vrrp_priority != 0)
+ sc->sc_vrrp_prio = carpr->carpr_vrrp_priority;
+ if (carpr->carpr_vrrp_adv_inter)
+ sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter;
+
if (sc->sc_state != INIT &&
carpr->carpr_state != sc->sc_state) {
switch (carpr->carpr_state) {
@@ -2283,6 +2746,9 @@ carp_nl_send(void *arg, struct carp_softc *sc, int priv)
nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
+ nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version);
+ nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio);
+ nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, sc->sc_vrrp_adv_inter);
if (priv)
nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key);
@@ -2307,6 +2773,9 @@ struct nl_carp_parsed {
char key[CARP_KEY_LEN];
struct in_addr addr;
struct in6_addr addr6;
+ carp_version_t version;
+ uint8_t vrrp_prio;
+ uint16_t vrrp_adv_inter;
};
#define _IN(_field) offsetof(struct genlmsghdr, _field)
@@ -2322,6 +2791,9 @@ static const struct nlattr_parser nla_p_set[] = {
{ .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr },
{ .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr },
{ .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string },
+ { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 },
+ { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 },
+ { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 },
};
static const struct nlfield_parser nlf_p_set[] = {
};
@@ -2399,6 +2871,13 @@ carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
return (EINVAL);
if (attrs.advskew >= 255)
return (EINVAL);
+ if (attrs.version == 0)
+ attrs.version = CARP_VERSION_CARP;
+ if (attrs.version != CARP_VERSION_CARP &&
+ attrs.version != CARP_VERSION_VRRPv3)
+ return (EINVAL);
+ if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL)
+ return (EINVAL);
NET_EPOCH_ENTER(et);
if (attrs.ifname != NULL)
@@ -2422,6 +2901,9 @@ carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
carpr.carpr_advskew = attrs.advskew;
carpr.carpr_addr = attrs.addr;
carpr.carpr_addr6 = attrs.addr6;
+ carpr.carpr_version = attrs.version;
+ carpr.carpr_vrrp_priority = attrs.vrrp_prio;
+ carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter;
memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));