aboutsummaryrefslogtreecommitdiff
path: root/net/openbgpd/files/patch-bgpd_session.c
diff options
context:
space:
mode:
authorKurt Jaeger <pi@FreeBSD.org>2019-05-31 15:04:30 +0000
committerKurt Jaeger <pi@FreeBSD.org>2019-05-31 15:04:30 +0000
commite3e5543a6dc3364105a52ea6b86c3f060bd230e2 (patch)
tree41240bb6a84aa0d4143fc4a8d939e719a4f83e79 /net/openbgpd/files/patch-bgpd_session.c
parent75b69fed682eeb918b8cb5661e89976ee7328f25 (diff)
downloadports-e3e5543a6dc3364105a52ea6b86c3f060bd230e2.tar.gz
ports-e3e5543a6dc3364105a52ea6b86c3f060bd230e2.zip
Notes
Diffstat (limited to 'net/openbgpd/files/patch-bgpd_session.c')
-rw-r--r--net/openbgpd/files/patch-bgpd_session.c2075
1 files changed, 0 insertions, 2075 deletions
diff --git a/net/openbgpd/files/patch-bgpd_session.c b/net/openbgpd/files/patch-bgpd_session.c
deleted file mode 100644
index 66c05a92aec0..000000000000
--- a/net/openbgpd/files/patch-bgpd_session.c
+++ /dev/null
@@ -1,2075 +0,0 @@
-Index: bgpd/session.c
-===================================================================
-RCS file: /home/cvs/private/hrs/openbgpd/bgpd/session.c,v
-retrieving revision 1.1.1.8
-retrieving revision 1.13
-diff -u -p -r1.1.1.8 -r1.13
---- bgpd/session.c 14 Feb 2010 20:19:57 -0000 1.1.1.8
-+++ bgpd/session.c 8 Dec 2012 20:17:59 -0000 1.13
-@@ -1,4 +1,4 @@
--/* $OpenBSD: session.c,v 1.293 2009/06/07 05:56:24 eric Exp $ */
-+/* $OpenBSD: session.c,v 1.325 2012/09/18 09:45:50 claudio Exp $ */
-
- /*
- * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
-@@ -21,18 +21,21 @@
-
- #include <sys/mman.h>
- #include <sys/socket.h>
-+#include <sys/time.h>
-+#include <sys/resource.h>
- #include <sys/un.h>
-+#include <sys/queue.h>
- #include <net/if_types.h>
- #include <netinet/in.h>
- #include <netinet/in_systm.h>
- #include <netinet/ip.h>
- #include <netinet/tcp.h>
-+#include <netinet/tcp_var.h>
- #include <arpa/inet.h>
-
- #include <err.h>
- #include <errno.h>
- #include <fcntl.h>
--#include <limits.h>
- #include <poll.h>
- #include <pwd.h>
- #include <signal.h>
-@@ -50,7 +53,12 @@
- #define PFD_PIPE_ROUTE_CTL 2
- #define PFD_SOCK_CTL 3
- #define PFD_SOCK_RCTL 4
--#define PFD_LISTENERS_START 5
-+#define PFD_SOCK_PFKEY 5
-+#define PFD_LISTENERS_START 6
-+
-+#if defined(__FreeBSD__) /* FreeBSD has no LINK_STATE_IS_UP macro. */
-+#define LINK_STATE_IS_UP(_s) ((_s) >= LINK_STATE_UP)
-+#endif /* defined(__FreeBSD__) */
-
- void session_sighdlr(int);
- int setup_listeners(u_int *);
-@@ -65,9 +73,9 @@ void session_accept(int);
- int session_connect(struct peer *);
- void session_tcp_established(struct peer *);
- void session_capa_ann_none(struct peer *);
--int session_capa_add(struct peer *, struct buf *, u_int8_t, u_int8_t,
-- u_int8_t *);
--int session_capa_add_mp(struct buf *, u_int16_t, u_int8_t);
-+int session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
-+int session_capa_add_mp(struct ibuf *, u_int8_t);
-+int session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
- struct bgp_msg *session_newmsg(enum msg_type, u_int16_t);
- int session_sendmsg(struct bgp_msg *, struct peer *);
- void session_open(struct peer *);
-@@ -75,30 +83,34 @@ void session_keepalive(struct peer *);
- void session_update(u_int32_t, void *, size_t);
- void session_notification(struct peer *, u_int8_t, u_int8_t, void *,
- ssize_t);
--void session_rrefresh(struct peer *, u_int16_t, u_int8_t);
-+void session_rrefresh(struct peer *, u_int8_t);
-+int session_graceful_restart(struct peer *);
-+int session_graceful_is_restarting(struct peer *);
-+int session_graceful_stop(struct peer *);
- int session_dispatch_msg(struct pollfd *, struct peer *);
-+int session_process_msg(struct peer *);
- int parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
- int parse_open(struct peer *);
- int parse_update(struct peer *);
- int parse_refresh(struct peer *);
- int parse_notification(struct peer *);
- int parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
-+int capa_neg_calc(struct peer *);
- void session_dispatch_imsg(struct imsgbuf *, int, u_int *);
- void session_up(struct peer *);
- void session_down(struct peer *);
- void session_demote(struct peer *, int);
-
--int la_cmp(struct listen_addr *, struct listen_addr *);
--struct peer *getpeerbyip(struct sockaddr *);
--int session_match_mask(struct peer *, struct sockaddr *);
--struct peer *getpeerbyid(u_int32_t);
--static struct sockaddr *addr2sa(struct bgpd_addr *, u_int16_t);
-+int la_cmp(struct listen_addr *, struct listen_addr *);
-+struct peer *getpeerbyip(struct sockaddr *);
-+int session_match_mask(struct peer *, struct bgpd_addr *);
-+struct peer *getpeerbyid(u_int32_t);
-
--struct bgpd_config *conf, *nconf = NULL;
-+struct bgpd_config *conf, *nconf;
- struct bgpd_sysdep sysdep;
--struct peer *npeers;
--volatile sig_atomic_t session_quit = 0;
--int pending_reconf = 0;
-+struct peer *peers, *npeers;
-+volatile sig_atomic_t session_quit;
-+int pending_reconf;
- int csock = -1, rcsock = -1;
- u_int peer_cnt;
- struct imsgbuf *ibuf_rde;
-@@ -106,6 +118,7 @@ struct imsgbuf *ibuf_rde_ctl;
- struct imsgbuf *ibuf_main;
-
- struct mrt_head mrthead;
-+time_t pauseaccept;
-
- void
- session_sighdlr(int sig)
-@@ -125,6 +138,22 @@ setup_listeners(u_int *la_cnt)
- int opt;
- struct listen_addr *la;
- u_int cnt = 0;
-+#if defined(__FreeBSD__)
-+ int s;
-+
-+ /* Check if TCP_MD5SIG is supported. */
-+ s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-+ if (s < 0)
-+ fatal("socket open for TCP_MD5SIG check");
-+ opt = TF_SIGNATURE;
-+ if (setsockopt(s, IPPROTO_TCP, TCP_MD5SIG, &opt, sizeof(opt)) == -1) {
-+ if (errno == ENOPROTOOPT || errno == EINVAL)
-+ sysdep.no_md5sig = 1;
-+ else
-+ fatal("setsockopt TCP_MD5SIG");
-+ }
-+ close(s);
-+#endif /* defined(__FreeBSD__) */
-
- TAILQ_FOREACH(la, conf->listen_addrs, entry) {
- la->reconf = RECONF_NONE;
-@@ -140,6 +169,7 @@ setup_listeners(u_int *la_cnt)
- }
-
- opt = 1;
-+#if !defined(__FreeBSD__)
- if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
- &opt, sizeof(opt)) == -1) {
- if (errno == ENOPROTOOPT) { /* system w/o md5sig */
-@@ -148,6 +178,7 @@ setup_listeners(u_int *la_cnt)
- } else
- fatal("setsockopt TCP_MD5SIG");
- }
-+#endif /* !defined(__FreeBSD__) */
-
- /* set ttl to 255 so that ttl-security works */
- if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
-@@ -175,12 +206,10 @@ setup_listeners(u_int *la_cnt)
- }
-
- pid_t
--session_main(struct bgpd_config *config, struct peer *cpeers,
-- struct network_head *net_l, struct filter_head *rules,
-- struct mrt_head *m_l, struct rib_names *rib_l, int pipe_m2s[2],
-- int pipe_s2r[2], int pipe_m2r[2], int pipe_s2rctl[2])
-+session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
-+ int pipe_s2rctl[2])
- {
-- int nfds, timeout;
-+ int nfds, timeout, pfkeysock;
- unsigned int i, j, idx_peers, idx_listeners, idx_mrts;
- pid_t pid;
- u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
-@@ -189,19 +218,13 @@ session_main(struct bgpd_config *config,
- u_int32_t ctl_queued;
- struct passwd *pw;
- struct peer *p, **peer_l = NULL, *last, *next;
-- struct network *net;
-- struct mrt *m, **mrt_l = NULL;
-- struct filter_rule *r;
-+ struct mrt *m, *xm, **mrt_l = NULL;
- struct pollfd *pfd = NULL;
- struct ctl_conn *ctl_conn;
- struct listen_addr *la;
-- struct rde_rib *rr;
- void *newp;
- short events;
-
-- conf = config;
-- peers = cpeers;
--
- switch (pid = fork()) {
- case -1:
- fatal("cannot fork");
-@@ -211,13 +234,6 @@ session_main(struct bgpd_config *config,
- return (pid);
- }
-
-- /* control socket is outside chroot */
-- if ((csock = control_init(0, conf->csock)) == -1)
-- fatalx("control socket setup failed");
-- if (conf->rcsock != NULL &&
-- (rcsock = control_init(1, conf->rcsock)) == -1)
-- fatalx("control socket setup failed");
--
- if ((pw = getpwnam(BGPD_USER)) == NULL)
- fatal(NULL);
-
-@@ -228,29 +244,25 @@ session_main(struct bgpd_config *config,
-
- setproctitle("session engine");
- bgpd_process = PROC_SE;
--
-- if (pfkey_init(&sysdep) == -1)
-- fatalx("pfkey setup failed");
-+ pfkeysock = pfkey_init(&sysdep);
-
- if (setgroups(1, &pw->pw_gid) ||
- setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
- setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
- fatal("can't drop privileges");
-
-- listener_cnt = 0;
-- setup_listeners(&listener_cnt);
--
- signal(SIGTERM, session_sighdlr);
- signal(SIGINT, session_sighdlr);
- signal(SIGPIPE, SIG_IGN);
- signal(SIGHUP, SIG_IGN);
-- log_info("session engine ready");
-+ signal(SIGALRM, SIG_IGN);
-+ signal(SIGUSR1, SIG_IGN);
-+
- close(pipe_m2s[0]);
- close(pipe_s2r[1]);
- close(pipe_s2rctl[1]);
- close(pipe_m2r[0]);
- close(pipe_m2r[1]);
-- init_conf(conf);
- if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
- (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
- (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
-@@ -258,37 +270,21 @@ session_main(struct bgpd_config *config,
- imsg_init(ibuf_rde, pipe_s2r[0]);
- imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
- imsg_init(ibuf_main, pipe_m2s[1]);
-+
- TAILQ_INIT(&ctl_conns);
-- control_listen(csock);
-- control_listen(rcsock);
- LIST_INIT(&mrthead);
-+ listener_cnt = 0;
- peer_cnt = 0;
- ctl_cnt = 0;
-
-- /* filter rules are not used in the SE */
-- while ((r = TAILQ_FIRST(rules)) != NULL) {
-- TAILQ_REMOVE(rules, r, entry);
-- free(r);
-- }
-- free(rules);
--
-- /* network list is not used in the SE */
-- while ((net = TAILQ_FIRST(net_l)) != NULL) {
-- TAILQ_REMOVE(net_l, net, entry);
-- filterset_free(&net->net.attrset);
-- free(net);
-- }
-+ if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
-+ fatal(NULL);
-+ if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
-+ NULL)
-+ fatal(NULL);
-+ TAILQ_INIT(conf->listen_addrs);
-
-- /* main mrt list is not used in the SE */
-- while ((m = LIST_FIRST(m_l)) != NULL) {
-- LIST_REMOVE(m, entry);
-- free(m);
-- }
-- /* rib names not used in the SE */
-- while ((rr = SIMPLEQ_FIRST(&ribnames))) {
-- SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
-- free(rr);
-- }
-+ log_info("session engine ready");
-
- while (session_quit == 0) {
- /* check for peers to be initialized or deleted */
-@@ -308,8 +304,9 @@ session_main(struct bgpd_config *config,
-
- /* reinit due? */
- if (p->conf.reconf_action == RECONF_REINIT) {
-- bgp_fsm(p, EVNT_STOP);
-- timer_set(p, Timer_IdleHold, 0);
-+ session_stop(p, ERR_CEASE_ADMIN_RESET);
-+ if (!p->conf.down)
-+ timer_set(p, Timer_IdleHold, 0);
- }
-
- /* deletion due? */
-@@ -317,7 +314,7 @@ session_main(struct bgpd_config *config,
- if (p->demoted)
- session_demote(p, -1);
- p->conf.demote_group[0] = 0;
-- bgp_fsm(p, EVNT_STOP);
-+ session_stop(p, ERR_CEASE_PEER_UNCONF);
- log_peer_warnx(&p->conf, "removed");
- if (last != NULL)
- last->next = next;
-@@ -346,9 +343,17 @@ session_main(struct bgpd_config *config,
- }
-
- mrt_cnt = 0;
-- LIST_FOREACH(m, &mrthead, entry)
-+ for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
-+ xm = LIST_NEXT(m, entry);
-+ if (m->state == MRT_STATE_REMOVE) {
-+ mrt_clean(m);
-+ LIST_REMOVE(m, entry);
-+ free(m);
-+ continue;
-+ }
- if (m->wbuf.queued)
- mrt_cnt++;
-+ }
-
- if (mrt_cnt > mrt_l_elms) {
- if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
-@@ -394,18 +399,31 @@ session_main(struct bgpd_config *config,
- if (ctl_queued < SESSION_CTL_QUEUE_MAX)
- /*
- * Do not act as unlimited buffer. Don't read in more
-- * messages if the ctl sockets are getting full.
-+ * messages if the ctl sockets are getting full.
- */
- pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
-- pfd[PFD_SOCK_CTL].fd = csock;
-- pfd[PFD_SOCK_CTL].events = POLLIN;
-- pfd[PFD_SOCK_RCTL].fd = rcsock;
-- pfd[PFD_SOCK_RCTL].events = POLLIN;
--
-+ if (pauseaccept == 0) {
-+ pfd[PFD_SOCK_CTL].fd = csock;
-+ pfd[PFD_SOCK_CTL].events = POLLIN;
-+ pfd[PFD_SOCK_RCTL].fd = rcsock;
-+ pfd[PFD_SOCK_RCTL].events = POLLIN;
-+ } else {
-+ pfd[PFD_SOCK_CTL].fd = -1;
-+ pfd[PFD_SOCK_RCTL].fd = -1;
-+ }
-+ pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
-+#if !defined(__FreeBSD__)
-+ pfd[PFD_SOCK_PFKEY].events = POLLIN;
-+#else
-+ pfd[PFD_SOCK_PFKEY].events = 0;
-+#endif
- i = PFD_LISTENERS_START;
- TAILQ_FOREACH(la, conf->listen_addrs, entry) {
-- pfd[i].fd = la->fd;
-- pfd[i].events = POLLIN;
-+ if (pauseaccept == 0) {
-+ pfd[i].fd = la->fd;
-+ pfd[i].events = POLLIN;
-+ } else
-+ pfd[i].fd = -1;
- i++;
- }
- idx_listeners = i;
-@@ -450,6 +468,10 @@ session_main(struct bgpd_config *config,
- p->state == STATE_ESTABLISHED)
- session_demote(p, -1);
- break;
-+ case Timer_RestartTimeout:
-+ timer_stop(p, Timer_RestartTimeout);
-+ session_graceful_stop(p);
-+ break;
- default:
- fatalx("King Bula lost in time");
- }
-@@ -462,6 +484,9 @@ session_main(struct bgpd_config *config,
- events = POLLIN;
- if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
- events |= POLLOUT;
-+ /* is there still work to do? */
-+ if (p->rbuf && p->rbuf->wpos)
-+ timeout = 0;
-
- /* poll events */
- if (p->fd != -1 && events != 0) {
-@@ -492,12 +517,21 @@ session_main(struct bgpd_config *config,
- i++;
- }
-
-+ if (pauseaccept && timeout > 1)
-+ timeout = 1;
- if (timeout < 0)
- timeout = 0;
- if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
- if (errno != EINTR)
- fatal("poll error");
-
-+ /*
-+ * If we previously saw fd exhaustion, we stop accept()
-+ * for 1 second to throttle the accept() loop.
-+ */
-+ if (pauseaccept && getmonotime() > pauseaccept + 1)
-+ pauseaccept = 0;
-+
- if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
- if (msgbuf_write(&ibuf_main->w) < 0)
- fatal("pipe write error");
-@@ -534,6 +568,14 @@ session_main(struct bgpd_config *config,
- ctl_cnt += control_accept(rcsock, 1);
- }
-
-+ if (nfds > 0 && pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
-+ nfds--;
-+ if (pfkey_read(pfkeysock, NULL) == -1) {
-+ log_warnx("pfkey_read failed, exiting...");
-+ session_quit = 1;
-+ }
-+ }
-+
- for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
- j++)
- if (pfd[j].revents & POLLIN) {
-@@ -545,6 +587,10 @@ session_main(struct bgpd_config *config,
- nfds -= session_dispatch_msg(&pfd[j],
- peer_l[j - idx_listeners]);
-
-+ for (p = peers; p != NULL; p = p->next)
-+ if (p->rbuf && p->rbuf->wpos)
-+ session_process_msg(p);
-+
- for (; nfds > 0 && j < idx_mrts; j++)
- if (pfd[j].revents & POLLOUT) {
- nfds--;
-@@ -557,7 +603,7 @@ session_main(struct bgpd_config *config,
-
- while ((p = peers) != NULL) {
- peers = p->next;
-- bgp_fsm(p, EVNT_STOP);
-+ session_stop(p, ERR_CEASE_ADMIN_DOWN);
- pfkey_remove(p);
- free(p);
- }
-@@ -643,10 +689,9 @@ bgp_fsm(struct peer *peer, enum session_
- timer_stop(peer, Timer_IdleHold);
-
- /* allocate read buffer */
-- peer->rbuf = calloc(1, sizeof(struct buf_read));
-+ peer->rbuf = calloc(1, sizeof(struct ibuf_read));
- if (peer->rbuf == NULL)
- fatal(NULL);
-- peer->rbuf->wpos = 0;
-
- /* init write buffer */
- msgbuf_init(&peer->wbuf);
-@@ -746,7 +791,6 @@ bgp_fsm(struct peer *peer, enum session_
- /* ignore */
- break;
- case EVNT_STOP:
-- session_notification(peer, ERR_CEASE, 0, NULL, 0);
- change_state(peer, STATE_IDLE, event);
- break;
- case EVNT_CON_CLOSED:
-@@ -780,7 +824,8 @@ bgp_fsm(struct peer *peer, enum session_
- change_state(peer, STATE_IDLE, event);
- break;
- default:
-- session_notification(peer, ERR_FSM, 0, NULL, 0);
-+ session_notification(peer,
-+ ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
- change_state(peer, STATE_IDLE, event);
- break;
- }
-@@ -791,7 +836,6 @@ bgp_fsm(struct peer *peer, enum session_
- /* ignore */
- break;
- case EVNT_STOP:
-- session_notification(peer, ERR_CEASE, 0, NULL, 0);
- change_state(peer, STATE_IDLE, event);
- break;
- case EVNT_CON_CLOSED:
-@@ -815,7 +859,8 @@ bgp_fsm(struct peer *peer, enum session_
- change_state(peer, STATE_IDLE, event);
- break;
- default:
-- session_notification(peer, ERR_FSM, 0, NULL, 0);
-+ session_notification(peer,
-+ ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
- change_state(peer, STATE_IDLE, event);
- break;
- }
-@@ -826,7 +871,6 @@ bgp_fsm(struct peer *peer, enum session_
- /* ignore */
- break;
- case EVNT_STOP:
-- session_notification(peer, ERR_CEASE, 0, NULL, 0);
- change_state(peer, STATE_IDLE, event);
- break;
- case EVNT_CON_CLOSED:
-@@ -856,7 +900,8 @@ bgp_fsm(struct peer *peer, enum session_
- change_state(peer, STATE_IDLE, event);
- break;
- default:
-- session_notification(peer, ERR_FSM, 0, NULL, 0);
-+ session_notification(peer,
-+ ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
- change_state(peer, STATE_IDLE, event);
- break;
- }
-@@ -885,9 +930,10 @@ start_timer_keepalive(struct peer *peer)
- void
- session_close_connection(struct peer *peer)
- {
-- if (peer->fd != -1)
-+ if (peer->fd != -1) {
- close(peer->fd);
--
-+ pauseaccept = 0;
-+ }
- peer->fd = peer->wbuf.fd = -1;
- }
-
-@@ -923,20 +969,31 @@ change_state(struct peer *peer, enum ses
- timer_stop(peer, Timer_ConnectRetry);
- timer_stop(peer, Timer_Keepalive);
- timer_stop(peer, Timer_Hold);
-+ timer_stop(peer, Timer_IdleHold);
- timer_stop(peer, Timer_IdleHoldReset);
- session_close_connection(peer);
- msgbuf_clear(&peer->wbuf);
- free(peer->rbuf);
- peer->rbuf = NULL;
- bzero(&peer->capa.peer, sizeof(peer->capa.peer));
-- if (peer->state == STATE_ESTABLISHED)
-- session_down(peer);
-+
- if (event != EVNT_STOP) {
- timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
- if (event != EVNT_NONE &&
- peer->IdleHoldTime < MAX_IDLE_HOLD/2)
- peer->IdleHoldTime *= 2;
- }
-+ if (peer->state == STATE_ESTABLISHED) {
-+ if (peer->capa.neg.grestart.restart == 2 &&
-+ (event == EVNT_CON_CLOSED ||
-+ event == EVNT_CON_FATAL)) {
-+ /* don't punish graceful restart */
-+ timer_set(peer, Timer_IdleHold, 0);
-+ peer->IdleHoldTime /= 2;
-+ session_graceful_restart(peer);
-+ } else
-+ session_down(peer);
-+ }
- if (peer->state == STATE_NONE ||
- peer->state == STATE_ESTABLISHED) {
- /* initialize capability negotiation structures */
-@@ -947,6 +1004,20 @@ change_state(struct peer *peer, enum ses
- }
- break;
- case STATE_CONNECT:
-+ if (peer->state == STATE_ESTABLISHED &&
-+ peer->capa.neg.grestart.restart == 2) {
-+ /* do the graceful restart dance */
-+ session_graceful_restart(peer);
-+ peer->holdtime = INTERVAL_HOLD_INITIAL;
-+ timer_stop(peer, Timer_ConnectRetry);
-+ timer_stop(peer, Timer_Keepalive);
-+ timer_stop(peer, Timer_Hold);
-+ timer_stop(peer, Timer_IdleHold);
-+ timer_stop(peer, Timer_IdleHoldReset);
-+ session_close_connection(peer);
-+ msgbuf_clear(&peer->wbuf);
-+ bzero(&peer->capa.peer, sizeof(peer->capa.peer));
-+ }
- break;
- case STATE_ACTIVE:
- break;
-@@ -990,7 +1061,10 @@ session_accept(int listenfd)
- len = sizeof(cliaddr);
- if ((connfd = accept(listenfd,
- (struct sockaddr *)&cliaddr, &len)) == -1) {
-- if (errno == EWOULDBLOCK || errno == EINTR)
-+ if (errno == ENFILE || errno == EMFILE) {
-+ pauseaccept = getmonotime();
-+ return;
-+ } else if (errno == EWOULDBLOCK || errno == EINTR)
- return;
- else
- log_warn("accept");
-@@ -1017,6 +1091,7 @@ session_accept(int listenfd)
- }
- }
-
-+open:
- if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
- log_peer_warnx(&p->conf,
- "ipsec or md5sig configured but not available");
-@@ -1049,6 +1124,13 @@ session_accept(int listenfd)
- }
- session_socket_blockmode(connfd, BM_NONBLOCK);
- bgp_fsm(p, EVNT_CON_OPEN);
-+ return;
-+ } else if (p != NULL && p->state == STATE_ESTABLISHED &&
-+ p->capa.neg.grestart.restart == 2) {
-+ /* first do the graceful restart dance */
-+ change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
-+ /* then do part of the open dance */
-+ goto open;
- } else {
- log_conn_attempt(p, (struct sockaddr *)&cliaddr);
- close(connfd);
-@@ -1069,7 +1151,7 @@ session_connect(struct peer *peer)
- if (peer->fd != -1)
- return (-1);
-
-- if ((peer->fd = socket(peer->conf.remote_addr.af, SOCK_STREAM,
-+ if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM,
- IPPROTO_TCP)) == -1) {
- log_peer_warn(&peer->conf, "session_connect socket");
- bgp_fsm(peer, EVNT_CON_OPENFAIL);
-@@ -1100,8 +1182,7 @@ session_connect(struct peer *peer)
- peer->wbuf.fd = peer->fd;
-
- /* if update source is set we need to bind() */
-- if (peer->conf.local_addr.af) {
-- sa = addr2sa(&peer->conf.local_addr, 0);
-+ if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
- if (bind(peer->fd, sa, sa->sa_len) == -1) {
- log_peer_warn(&peer->conf, "session_connect bind");
- bgp_fsm(peer, EVNT_CON_OPENFAIL);
-@@ -1139,42 +1220,50 @@ session_setup_socket(struct peer *p)
- int nodelay = 1;
- int bsize;
-
-- if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET) {
-- /* set TTL to foreign router's distance - 1=direct n=multihop
-- with ttlsec, we always use 255 */
-- if (p->conf.ttlsec) {
-- ttl = 256 - p->conf.distance;
-- if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl,
-+ switch (p->conf.remote_addr.aid) {
-+ case AID_INET:
-+ /* set precedence, see RFC 1771 appendix 5 */
-+ if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
-+ -1) {
-+ log_peer_warn(&p->conf,
-+ "session_setup_socket setsockopt TOS");
-+ return (-1);
-+ }
-+
-+ if (p->conf.ebgp) {
-+ /* set TTL to foreign router's distance
-+ 1=direct n=multihop with ttlsec, we always use 255 */
-+ if (p->conf.ttlsec) {
-+ ttl = 256 - p->conf.distance;
-+ if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
-+ &ttl, sizeof(ttl)) == -1) {
-+ log_peer_warn(&p->conf,
-+ "session_setup_socket: "
-+ "setsockopt MINTTL");
-+ return (-1);
-+ }
-+ ttl = 255;
-+ }
-+
-+ if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
- sizeof(ttl)) == -1) {
- log_peer_warn(&p->conf,
-- "session_setup_socket setsockopt MINTTL");
-+ "session_setup_socket setsockopt TTL");
- return (-1);
- }
-- ttl = 255;
-- }
--
-- if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
-- sizeof(ttl)) == -1) {
-- log_peer_warn(&p->conf,
-- "session_setup_socket setsockopt TTL");
-- return (-1);
- }
-- }
--
-- if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET6)
-- /* set hoplimit to foreign router's distance */
-- if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl,
-- sizeof(ttl)) == -1) {
-- log_peer_warn(&p->conf,
-- "session_setup_socket setsockopt hoplimit");
-- return (-1);
-+ break;
-+ case AID_INET6:
-+ if (p->conf.ebgp) {
-+ /* set hoplimit to foreign router's distance */
-+ if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
-+ &ttl, sizeof(ttl)) == -1) {
-+ log_peer_warn(&p->conf,
-+ "session_setup_socket setsockopt hoplimit");
-+ return (-1);
-+ }
- }
--
-- /* if ttlsec is in use, set minttl */
-- if (p->conf.ttlsec) {
-- ttl = 256 - p->conf.distance;
-- setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl));
--
-+ break;
- }
-
- /* set TCP_NODELAY */
-@@ -1185,24 +1274,18 @@ session_setup_socket(struct peer *p)
- return (-1);
- }
-
-- /* set precedence, see RFC 1771 appendix 5 */
-- if (p->conf.remote_addr.af == AF_INET &&
-- setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == -1) {
-- log_peer_warn(&p->conf,
-- "session_setup_socket setsockopt TOS");
-- return (-1);
-- }
--
- /* only increase bufsize (and thus window) if md5 or ipsec is in use */
- if (p->conf.auth.method != AUTH_NONE) {
- /* try to increase bufsize. no biggie if it fails */
- bsize = 65535;
-- while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
-- sizeof(bsize)) == -1)
-+ while (bsize > 8192 &&
-+ setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
-+ sizeof(bsize)) == -1 && errno != EINVAL)
- bsize /= 2;
- bsize = 65535;
-- while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
-- sizeof(bsize)) == -1)
-+ while (bsize > 8192 &&
-+ setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
-+ sizeof(bsize)) == -1 && errno != EINVAL)
- bsize /= 2;
- }
-
-@@ -1244,40 +1327,56 @@ session_tcp_established(struct peer *pee
- void
- session_capa_ann_none(struct peer *peer)
- {
-- peer->capa.ann.mp_v4 = SAFI_NONE;
-- peer->capa.ann.mp_v4 = SAFI_NONE;
-- peer->capa.ann.refresh = 0;
-- peer->capa.ann.restart = 0;
-- peer->capa.ann.as4byte = 0;
-+ bzero(&peer->capa.ann, sizeof(peer->capa.ann));
- }
-
- int
--session_capa_add(struct peer *p, struct buf *opb, u_int8_t capa_code,
-- u_int8_t capa_len, u_int8_t *optparamlen)
--{
-- u_int8_t op_type, op_len, tot_len, errs = 0;
--
-- op_type = OPT_PARAM_CAPABILITIES;
-- op_len = sizeof(capa_code) + sizeof(capa_len) + capa_len;
-- tot_len = sizeof(op_type) + sizeof(op_len) + op_len;
-- errs += buf_add(opb, &op_type, sizeof(op_type));
-- errs += buf_add(opb, &op_len, sizeof(op_len));
-- errs += buf_add(opb, &capa_code, sizeof(capa_code));
-- errs += buf_add(opb, &capa_len, sizeof(capa_len));
-- *optparamlen += tot_len;
-+session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
-+{
-+ int errs = 0;
-+
-+ errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
-+ errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
- return (errs);
- }
-
- int
--session_capa_add_mp(struct buf *buf, u_int16_t afi, u_int8_t safi)
-+session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
- {
-- u_int8_t pad = 0;
-+ u_int8_t safi, pad = 0;
-+ u_int16_t afi;
- int errs = 0;
-
-+ if (aid2afi(aid, &afi, &safi) == -1)
-+ fatalx("session_capa_add_mp: bad afi/safi pair");
-+ afi = htons(afi);
-+ errs += ibuf_add(buf, &afi, sizeof(afi));
-+ errs += ibuf_add(buf, &pad, sizeof(pad));
-+ errs += ibuf_add(buf, &safi, sizeof(safi));
-+
-+ return (errs);
-+}
-+
-+int
-+session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
-+{
-+ u_int errs = 0;
-+ u_int16_t afi;
-+ u_int8_t flags, safi;
-+
-+ if (aid2afi(aid, &afi, &safi)) {
-+ log_warn("session_capa_add_gr: bad AID");
-+ return (1);
-+ }
-+ if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
-+ flags = CAPA_GR_F_FLAG;
-+ else
-+ flags = 0;
-+
- afi = htons(afi);
-- errs += buf_add(buf, &afi, sizeof(afi));
-- errs += buf_add(buf, &pad, sizeof(pad));
-- errs += buf_add(buf, &safi, sizeof(safi));
-+ errs += ibuf_add(b, &afi, sizeof(afi));
-+ errs += ibuf_add(b, &safi, sizeof(safi));
-+ errs += ibuf_add(b, &flags, sizeof(flags));
-
- return (errs);
- }
-@@ -1287,23 +1386,22 @@ session_newmsg(enum msg_type msgtype, u_
- {
- struct bgp_msg *msg;
- struct msg_header hdr;
-- struct buf *buf;
-+ struct ibuf *buf;
- int errs = 0;
-
- memset(&hdr.marker, 0xff, sizeof(hdr.marker));
- hdr.len = htons(len);
- hdr.type = msgtype;
-
-- if ((buf = buf_open(len)) == NULL)
-+ if ((buf = ibuf_open(len)) == NULL)
- return (NULL);
-
-- errs += buf_add(buf, &hdr.marker, sizeof(hdr.marker));
-- errs += buf_add(buf, &hdr.len, sizeof(hdr.len));
-- errs += buf_add(buf, &hdr.type, sizeof(hdr.type));
--
-- if (errs > 0 ||
-- (msg = calloc(1, sizeof(*msg))) == NULL) {
-- buf_free(buf);
-+ errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
-+ errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
-+ errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
-+
-+ if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
-+ ibuf_free(buf);
- return (NULL);
- }
-
-@@ -1329,7 +1427,7 @@ session_sendmsg(struct bgp_msg *msg, str
- mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
- }
-
-- buf_close(&p->wbuf, msg->buf);
-+ ibuf_close(&p->wbuf, msg->buf);
- free(msg);
- return (0);
- }
-@@ -1338,40 +1436,70 @@ void
- session_open(struct peer *p)
- {
- struct bgp_msg *buf;
-- struct buf *opb;
-+ struct ibuf *opb;
- struct msg_open msg;
- u_int16_t len;
-- u_int8_t optparamlen = 0;
-- u_int errs = 0;
-+ u_int8_t i, op_type, optparamlen = 0;
-+ int errs = 0;
-+ int mpcapa = 0;
-
-
-- if ((opb = buf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN)) == NULL) {
-+ if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
-+ sizeof(optparamlen))) == NULL) {
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
- }
-
- /* multiprotocol extensions, RFC 4760 */
-- if (p->capa.ann.mp_v4) { /* 4 bytes data */
-- errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen);
-- errs += session_capa_add_mp(opb, AFI_IPv4, p->capa.ann.mp_v4);
-- }
-- if (p->capa.ann.mp_v6) { /* 4 bytes data */
-- errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen);
-- errs += session_capa_add_mp(opb, AFI_IPv6, p->capa.ann.mp_v6);
-- }
-+ for (i = 0; i < AID_MAX; i++)
-+ if (p->capa.ann.mp[i]) { /* 4 bytes data */
-+ errs += session_capa_add(opb, CAPA_MP, 4);
-+ errs += session_capa_add_mp(opb, i);
-+ mpcapa++;
-+ }
-
- /* route refresh, RFC 2918 */
- if (p->capa.ann.refresh) /* no data */
-- errs += session_capa_add(p, opb, CAPA_REFRESH, 0, &optparamlen);
-+ errs += session_capa_add(opb, CAPA_REFRESH, 0);
-
-- /* End-of-RIB marker, RFC 4724 */
-- if (p->capa.ann.restart) { /* 2 bytes data */
-- u_char c[2];
--
-- bzero(&c, 2);
-- c[0] = 0x80; /* we're always restarting */
-- errs += session_capa_add(p, opb, CAPA_RESTART, 2, &optparamlen);
-- errs += buf_add(opb, &c, 2);
-+ /* graceful restart and End-of-RIB marker, RFC 4724 */
-+ if (p->capa.ann.grestart.restart) {
-+ int rst = 0;
-+ u_int16_t hdr;
-+ u_int8_t grlen;
-+
-+ if (mpcapa) {
-+ grlen = 2 + 4 * mpcapa;
-+ for (i = 0; i < AID_MAX; i++) {
-+ if (p->capa.neg.grestart.flags[i] &
-+ CAPA_GR_RESTARTING)
-+ rst++;
-+ }
-+ } else { /* AID_INET */
-+ grlen = 2 + 4;
-+ if (p->capa.neg.grestart.flags[AID_INET] &
-+ CAPA_GR_RESTARTING)
-+ rst++;
-+ }
-+
-+ hdr = conf->holdtime; /* default timeout */
-+ /* if client does graceful restart don't set R flag */
-+ if (!rst)
-+ hdr |= CAPA_GR_R_FLAG;
-+ hdr = htons(hdr);
-+
-+ errs += session_capa_add(opb, CAPA_RESTART, grlen);
-+ errs += ibuf_add(opb, &hdr, sizeof(hdr));
-+
-+ if (mpcapa) {
-+ for (i = 0; i < AID_MAX; i++) {
-+ if (p->capa.ann.mp[i]) {
-+ errs += session_capa_add_gr(p, opb, i);
-+ }
-+ }
-+ } else { /* AID_INET */
-+ errs += session_capa_add_gr(p, opb, AID_INET);
-+ }
- }
-
- /* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
-@@ -1379,13 +1507,17 @@ session_open(struct peer *p)
- u_int32_t nas;
-
- nas = htonl(conf->as);
-- errs += session_capa_add(p, opb, CAPA_AS4BYTE, 4, &optparamlen);
-- errs += buf_add(opb, &nas, 4);
-+ errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
-+ errs += ibuf_add(opb, &nas, sizeof(nas));
- }
-
-+ if (ibuf_size(opb))
-+ optparamlen = ibuf_size(opb) + sizeof(op_type) +
-+ sizeof(optparamlen);
-+
- len = MSGSIZE_OPEN_MIN + optparamlen;
- if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
-- buf_free(opb);
-+ ibuf_free(opb);
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
- }
-@@ -1399,19 +1531,24 @@ session_open(struct peer *p)
- msg.bgpid = conf->bgpid; /* is already in network byte order */
- msg.optparamlen = optparamlen;
-
-- errs += buf_add(buf->buf, &msg.version, sizeof(msg.version));
-- errs += buf_add(buf->buf, &msg.myas, sizeof(msg.myas));
-- errs += buf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
-- errs += buf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
-- errs += buf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
--
-- if (optparamlen)
-- errs += buf_add(buf->buf, opb->buf, optparamlen);
-+ errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
-+ errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
-+ errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
-+ errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
-+ errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
-+
-+ if (optparamlen) {
-+ op_type = OPT_PARAM_CAPABILITIES;
-+ optparamlen = ibuf_size(opb);
-+ errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
-+ errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
-+ errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
-+ }
-
-- buf_free(opb);
-+ ibuf_free(opb);
-
-- if (errs > 0) {
-- buf_free(buf->buf);
-+ if (errs) {
-+ ibuf_free(buf->buf);
- free(buf);
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
-@@ -1459,8 +1596,8 @@ session_update(u_int32_t peerid, void *d
- return;
- }
-
-- if (buf_add(buf->buf, data, datalen)) {
-- buf_free(buf->buf);
-+ if (ibuf_add(buf->buf, data, datalen)) {
-+ ibuf_free(buf->buf);
- free(buf);
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
-@@ -1480,29 +1617,27 @@ session_notification(struct peer *p, u_i
- void *data, ssize_t datalen)
- {
- struct bgp_msg *buf;
-- u_int errs = 0;
-- u_int8_t null8 = 0;
-+ int errs = 0;
-
- if (p->stats.last_sent_errcode) /* some notification already sent */
- return;
-
-+ log_notification(p, errcode, subcode, data, datalen, "sending");
-+
- if ((buf = session_newmsg(NOTIFICATION,
- MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
- }
-
-- errs += buf_add(buf->buf, &errcode, sizeof(errcode));
-- if (errcode == ERR_CEASE)
-- errs += buf_add(buf->buf, &null8, sizeof(null8));
-- else
-- errs += buf_add(buf->buf, &subcode, sizeof(subcode));
-+ errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
-+ errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
-
- if (datalen > 0)
-- errs += buf_add(buf->buf, data, datalen);
-+ errs += ibuf_add(buf->buf, data, datalen);
-
-- if (errs > 0) {
-- buf_free(buf->buf);
-+ if (errs) {
-+ ibuf_free(buf->buf);
- free(buf);
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
-@@ -1521,23 +1656,29 @@ session_notification(struct peer *p, u_i
- int
- session_neighbor_rrefresh(struct peer *p)
- {
-+ u_int8_t i;
-+
- if (!p->capa.peer.refresh)
- return (-1);
-
-- if (p->capa.peer.mp_v4 != SAFI_NONE)
-- session_rrefresh(p, AFI_IPv4, p->capa.peer.mp_v4);
-- if (p->capa.peer.mp_v6 != SAFI_NONE)
-- session_rrefresh(p, AFI_IPv6, p->capa.peer.mp_v6);
-+ for (i = 0; i < AID_MAX; i++) {
-+ if (p->capa.peer.mp[i] != 0)
-+ session_rrefresh(p, i);
-+ }
-
- return (0);
- }
-
- void
--session_rrefresh(struct peer *p, u_int16_t afi, u_int8_t safi)
-+session_rrefresh(struct peer *p, u_int8_t aid)
- {
- struct bgp_msg *buf;
- int errs = 0;
-- u_int8_t null8 = 0;
-+ u_int16_t afi;
-+ u_int8_t safi, null8 = 0;
-+
-+ if (aid2afi(aid, &afi, &safi) == -1)
-+ fatalx("session_rrefresh: bad afi/safi pair");
-
- if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
- bgp_fsm(p, EVNT_CON_FATAL);
-@@ -1545,12 +1686,12 @@ session_rrefresh(struct peer *p, u_int16
- }
-
- afi = htons(afi);
-- errs += buf_add(buf->buf, &afi, sizeof(afi));
-- errs += buf_add(buf->buf, &null8, sizeof(null8));
-- errs += buf_add(buf->buf, &safi, sizeof(safi));
-+ errs += ibuf_add(buf->buf, &afi, sizeof(afi));
-+ errs += ibuf_add(buf->buf, &null8, sizeof(null8));
-+ errs += ibuf_add(buf->buf, &safi, sizeof(safi));
-
-- if (errs > 0) {
-- buf_free(buf->buf);
-+ if (errs) {
-+ ibuf_free(buf->buf);
- free(buf);
- bgp_fsm(p, EVNT_CON_FATAL);
- return;
-@@ -1565,13 +1706,74 @@ session_rrefresh(struct peer *p, u_int16
- }
-
- int
-+session_graceful_restart(struct peer *p)
-+{
-+ u_int8_t i;
-+
-+ timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
-+
-+ for (i = 0; i < AID_MAX; i++) {
-+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
-+ if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE,
-+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
-+ return (-1);
-+ log_peer_warnx(&p->conf,
-+ "graceful restart of %s, keeping routes",
-+ aid2str(i));
-+ p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
-+ } else if (p->capa.neg.mp[i]) {
-+ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
-+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
-+ return (-1);
-+ log_peer_warnx(&p->conf,
-+ "graceful restart of %s, flushing routes",
-+ aid2str(i));
-+ }
-+ }
-+ return (0);
-+}
-+
-+int
-+session_graceful_is_restarting(struct peer *p)
-+{
-+ u_int8_t i;
-+
-+ for (i = 0; i < AID_MAX; i++)
-+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
-+ return (1);
-+ return (0);
-+}
-+
-+int
-+session_graceful_stop(struct peer *p)
-+{
-+ u_int8_t i;
-+
-+ for (i = 0; i < AID_MAX; i++) {
-+ /*
-+ * Only flush if the peer is restarting and the peer indicated
-+ * it hold the forwarding state. In all other cases the
-+ * session was already flushed when the session came up.
-+ */
-+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING &&
-+ p->capa.neg.grestart.flags[i] & CAPA_GR_FORWARD) {
-+ log_peer_warnx(&p->conf, "graceful restart of %s, "
-+ "time-out, flushing", aid2str(i));
-+ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
-+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
-+ return (-1);
-+ }
-+ p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
-+ }
-+ return (0);
-+}
-+
-+int
- session_dispatch_msg(struct pollfd *pfd, struct peer *p)
- {
-- ssize_t n, rpos, av, left;
-+ ssize_t n;
- socklen_t len;
-- int error, processed = 0;
-- u_int16_t msglen;
-- u_int8_t msgtype;
-+ int error;
-
- if (p->state == STATE_CONNECT) {
- if (pfd->revents & POLLOUT) {
-@@ -1641,71 +1843,83 @@ session_dispatch_msg(struct pollfd *pfd,
- return (1);
- }
-
-- rpos = 0;
-- av = p->rbuf->wpos + n;
-+ p->rbuf->wpos += n;
- p->stats.last_read = time(NULL);
-+ return (1);
-+ }
-+ return (0);
-+}
-
-- /*
-- * session might drop to IDLE -> buffers deallocated
-- * we MUST check rbuf != NULL before use
-- */
-- for (;;) {
-- if (rpos + MSGSIZE_HEADER > av)
-- break;
-- if (p->rbuf == NULL)
-- break;
-- if (parse_header(p, p->rbuf->buf + rpos, &msglen,
-- &msgtype) == -1)
-- return (0);
-- if (rpos + msglen > av)
-- break;
-- p->rbuf->rptr = p->rbuf->buf + rpos;
--
-- switch (msgtype) {
-- case OPEN:
-- bgp_fsm(p, EVNT_RCVD_OPEN);
-- p->stats.msg_rcvd_open++;
-- break;
-- case UPDATE:
-- bgp_fsm(p, EVNT_RCVD_UPDATE);
-- p->stats.msg_rcvd_update++;
-- break;
-- case NOTIFICATION:
-- bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
-- p->stats.msg_rcvd_notification++;
-- break;
-- case KEEPALIVE:
-- bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
-- p->stats.msg_rcvd_keepalive++;
-- break;
-- case RREFRESH:
-- parse_refresh(p);
-- p->stats.msg_rcvd_rrefresh++;
-- break;
-- default: /* cannot happen */
-- session_notification(p, ERR_HEADER,
-- ERR_HDR_TYPE, &msgtype, 1);
-- log_warnx("received message with "
-- "unknown type %u", msgtype);
-- bgp_fsm(p, EVNT_CON_FATAL);
-- }
-- rpos += msglen;
-- if (++processed > MSG_PROCESS_LIMIT)
-- break;
-- }
-- if (p->rbuf == NULL)
-- return (1);
-+int
-+session_process_msg(struct peer *p)
-+{
-+ ssize_t rpos, av, left;
-+ int processed = 0;
-+ u_int16_t msglen;
-+ u_int8_t msgtype;
-
-- if (rpos < av) {
-- left = av - rpos;
-- memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
-- p->rbuf->wpos = left;
-- } else
-- p->rbuf->wpos = 0;
-+ rpos = 0;
-+ av = p->rbuf->wpos;
-
-- return (1);
-+ /*
-+ * session might drop to IDLE -> buffers deallocated
-+ * we MUST check rbuf != NULL before use
-+ */
-+ for (;;) {
-+ if (rpos + MSGSIZE_HEADER > av)
-+ break;
-+ if (p->rbuf == NULL)
-+ break;
-+ if (parse_header(p, p->rbuf->buf + rpos, &msglen,
-+ &msgtype) == -1)
-+ return (0);
-+ if (rpos + msglen > av)
-+ break;
-+ p->rbuf->rptr = p->rbuf->buf + rpos;
-+
-+ switch (msgtype) {
-+ case OPEN:
-+ bgp_fsm(p, EVNT_RCVD_OPEN);
-+ p->stats.msg_rcvd_open++;
-+ break;
-+ case UPDATE:
-+ bgp_fsm(p, EVNT_RCVD_UPDATE);
-+ p->stats.msg_rcvd_update++;
-+ break;
-+ case NOTIFICATION:
-+ bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
-+ p->stats.msg_rcvd_notification++;
-+ break;
-+ case KEEPALIVE:
-+ bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
-+ p->stats.msg_rcvd_keepalive++;
-+ break;
-+ case RREFRESH:
-+ parse_refresh(p);
-+ p->stats.msg_rcvd_rrefresh++;
-+ break;
-+ default: /* cannot happen */
-+ session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
-+ &msgtype, 1);
-+ log_warnx("received message with unknown type %u",
-+ msgtype);
-+ bgp_fsm(p, EVNT_CON_FATAL);
-+ }
-+ rpos += msglen;
-+ if (++processed > MSG_PROCESS_LIMIT)
-+ break;
- }
-- return (0);
-+ if (p->rbuf == NULL)
-+ return (1);
-+
-+ if (rpos < av) {
-+ left = av - rpos;
-+ memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
-+ p->rbuf->wpos = left;
-+ } else
-+ p->rbuf->wpos = 0;
-+
-+ return (1);
- }
-
- int
-@@ -1853,12 +2067,6 @@ parse_open(struct peer *peer)
- p += sizeof(short_as);
- as = peer->short_as = ntohs(short_as);
-
-- /* if remote-as is zero and it's a cloned neighbor, accept any */
-- if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
-- peer->conf.remote_as = as;
-- peer->conf.ebgp = (peer->conf.remote_as != conf->as);
-- }
--
- memcpy(&oholdtime, p, sizeof(oholdtime));
- p += sizeof(oholdtime);
-
-@@ -1966,6 +2174,15 @@ parse_open(struct peer *peer)
- }
- }
-
-+ /* if remote-as is zero and it's a cloned neighbor, accept any */
-+ if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
-+ peer->conf.remote_as = as;
-+ peer->conf.ebgp = (peer->conf.remote_as != conf->as);
-+ if (!peer->conf.ebgp)
-+ /* force enforce_as off for iBGP sessions */
-+ peer->conf.enforce_as = ENFORCE_AS_OFF;
-+ }
-+
- if (peer->conf.remote_as != as) {
- log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
- log_as(as));
-@@ -1974,6 +2191,14 @@ parse_open(struct peer *peer)
- return (-1);
- }
-
-+ if (capa_neg_calc(peer) == -1) {
-+ log_peer_warnx(&peer->conf,
-+ "capability negotiation calculation failed");
-+ session_notification(peer, ERR_OPEN, 0, NULL, 0);
-+ change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
-+ return (-1);
-+ }
-+
- return (0);
- }
-
-@@ -2008,24 +2233,35 @@ int
- parse_refresh(struct peer *peer)
- {
- u_char *p;
-- struct rrefresh r;
-+ u_int16_t afi;
-+ u_int8_t aid, safi;
-
- p = peer->rbuf->rptr;
- p += MSGSIZE_HEADER; /* header is already checked */
-
-+ /*
-+ * We could check if we actually announced the capability but
-+ * as long as the message is correctly encoded we don't care.
-+ */
-+
- /* afi, 2 byte */
-- memcpy(&r.afi, p, sizeof(r.afi));
-- r.afi = ntohs(r.afi);
-+ memcpy(&afi, p, sizeof(afi));
-+ afi = ntohs(afi);
- p += 2;
- /* reserved, 1 byte */
- p += 1;
- /* safi, 1 byte */
-- memcpy(&r.safi, p, sizeof(r.safi));
-+ memcpy(&safi, p, sizeof(safi));
-
- /* afi/safi unchecked - unrecognized values will be ignored anyway */
-+ if (afi2aid(afi, safi, &aid) == -1) {
-+ log_peer_warnx(&peer->conf, "peer sent bad refresh, "
-+ "invalid afi/safi pair");
-+ return (0);
-+ }
-
-- if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &r,
-- sizeof(r)) == -1)
-+ if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
-+ sizeof(aid)) == -1)
- return (-1);
-
- return (0);
-@@ -2035,11 +2271,12 @@ int
- parse_notification(struct peer *peer)
- {
- u_char *p;
-+ u_int16_t datalen;
- u_int8_t errcode;
- u_int8_t subcode;
-- u_int16_t datalen;
- u_int8_t capa_code;
- u_int8_t capa_len;
-+ u_int8_t i;
-
- /* just log */
- p = peer->rbuf->rptr;
-@@ -2059,7 +2296,7 @@ parse_notification(struct peer *peer)
- p += sizeof(subcode);
- datalen -= sizeof(subcode);
-
-- log_notification(peer, errcode, subcode, p, datalen);
-+ log_notification(peer, errcode, subcode, p, datalen, "received");
- peer->errcnt++;
-
- if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
-@@ -2094,8 +2331,8 @@ parse_notification(struct peer *peer)
- datalen -= capa_len;
- switch (capa_code) {
- case CAPA_MP:
-- peer->capa.ann.mp_v4 = SAFI_NONE;
-- peer->capa.ann.mp_v6 = SAFI_NONE;
-+ for (i = 0; i < AID_MAX; i++)
-+ peer->capa.ann.mp[i] = 0;
- log_peer_warnx(&peer->conf,
- "disabling multiprotocol capability");
- break;
-@@ -2105,7 +2342,7 @@ parse_notification(struct peer *peer)
- "disabling route refresh capability");
- break;
- case CAPA_RESTART:
-- peer->capa.ann.restart = 0;
-+ peer->capa.ann.grestart.restart = 0;
- log_peer_warnx(&peer->conf,
- "disabling restart capability");
- break;
-@@ -2139,19 +2376,23 @@ parse_notification(struct peer *peer)
- int
- parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
- {
-+ u_char *capa_val;
-+ u_int32_t remote_as;
- u_int16_t len;
-+ u_int16_t afi;
-+ u_int16_t gr_header;
-+ u_int8_t safi;
-+ u_int8_t aid;
-+ u_int8_t gr_flags;
- u_int8_t capa_code;
- u_int8_t capa_len;
-- u_char *capa_val;
-- u_int16_t mp_afi;
-- u_int8_t mp_safi;
-- u_int32_t remote_as;
-+ u_int8_t i;
-
- len = dlen;
- while (len > 0) {
- if (len < 2) {
-- log_peer_warnx(&peer->conf, "parse_capabilities: "
-- "expect len >= 2, len is %u", len);
-+ log_peer_warnx(&peer->conf, "Bad capabilities attr "
-+ "length: %u, too short", len);
- return (-1);
- }
- memcpy(&capa_code, d, sizeof(capa_code));
-@@ -2163,7 +2404,7 @@ parse_capabilities(struct peer *peer, u_
- if (capa_len > 0) {
- if (len < capa_len) {
- log_peer_warnx(&peer->conf,
-- "parse_capabilities: "
-+ "Bad capabilities attr length: "
- "len %u smaller than capa_len %u",
- len, capa_len);
- return (-1);
-@@ -2178,47 +2419,82 @@ parse_capabilities(struct peer *peer, u_
- case CAPA_MP: /* RFC 4760 */
- if (capa_len != 4) {
- log_peer_warnx(&peer->conf,
-- "parse_capabilities: "
-- "expect len 4, len is %u", capa_len);
-- return (-1);
-- }
-- memcpy(&mp_afi, capa_val, sizeof(mp_afi));
-- mp_afi = ntohs(mp_afi);
-- memcpy(&mp_safi, capa_val + 3, sizeof(mp_safi));
-- switch (mp_afi) {
-- case AFI_IPv4:
-- if (mp_safi < 1 || mp_safi > 3)
-- log_peer_warnx(&peer->conf,
-- "parse_capabilities: AFI IPv4, "
-- "mp_safi %u unknown", mp_safi);
-- else
-- peer->capa.peer.mp_v4 = mp_safi;
-+ "Bad multi protocol capability length: "
-+ "%u", capa_len);
- break;
-- case AFI_IPv6:
-- if (mp_safi < 1 || mp_safi > 3)
-- log_peer_warnx(&peer->conf,
-- "parse_capabilities: AFI IPv6, "
-- "mp_safi %u unknown", mp_safi);
-- else
-- peer->capa.peer.mp_v6 = mp_safi;
-- break;
-- default: /* ignore */
-+ }
-+ memcpy(&afi, capa_val, sizeof(afi));
-+ afi = ntohs(afi);
-+ memcpy(&safi, capa_val + 3, sizeof(safi));
-+ if (afi2aid(afi, safi, &aid) == -1) {
-+ log_peer_warnx(&peer->conf,
-+ "Received multi protocol capability: "
-+ " unknown AFI %u, safi %u pair",
-+ afi, safi);
- break;
- }
-+ peer->capa.peer.mp[aid] = 1;
- break;
- case CAPA_REFRESH:
- peer->capa.peer.refresh = 1;
- break;
- case CAPA_RESTART:
-- peer->capa.peer.restart = 1;
-- /* we don't care about the further restart capas yet */
-+ if (capa_len == 2) {
-+ /* peer only supports EoR marker */
-+ peer->capa.peer.grestart.restart = 1;
-+ peer->capa.peer.grestart.timeout = 0;
-+ break;
-+ } else if (capa_len % 4 != 2) {
-+ log_peer_warnx(&peer->conf,
-+ "Bad graceful restart capability length: "
-+ "%u", capa_len);
-+ peer->capa.peer.grestart.restart = 0;
-+ peer->capa.peer.grestart.timeout = 0;
-+ break;
-+ }
-+
-+ memcpy(&gr_header, capa_val, sizeof(gr_header));
-+ gr_header = ntohs(gr_header);
-+ peer->capa.peer.grestart.timeout =
-+ gr_header & CAPA_GR_TIMEMASK;
-+ if (peer->capa.peer.grestart.timeout == 0) {
-+ log_peer_warnx(&peer->conf, "Received "
-+ "graceful restart timeout is zero");
-+ peer->capa.peer.grestart.restart = 0;
-+ break;
-+ }
-+
-+ for (i = 2; i <= capa_len - 4; i += 4) {
-+ memcpy(&afi, capa_val + i, sizeof(afi));
-+ afi = ntohs(afi);
-+ memcpy(&safi, capa_val + i + 2, sizeof(safi));
-+ if (afi2aid(afi, safi, &aid) == -1) {
-+ log_peer_warnx(&peer->conf,
-+ "Received graceful restart capa: "
-+ " unknown AFI %u, safi %u pair",
-+ afi, safi);
-+ continue;
-+ }
-+ memcpy(&gr_flags, capa_val + i + 3,
-+ sizeof(gr_flags));
-+ peer->capa.peer.grestart.flags[aid] |=
-+ CAPA_GR_PRESENT;
-+ if (gr_flags & CAPA_GR_F_FLAG)
-+ peer->capa.peer.grestart.flags[aid] |=
-+ CAPA_GR_FORWARD;
-+ if (gr_header & CAPA_GR_R_FLAG)
-+ peer->capa.peer.grestart.flags[aid] |=
-+ CAPA_GR_RESTART;
-+ peer->capa.peer.grestart.restart = 2;
-+ }
- break;
- case CAPA_AS4BYTE:
- if (capa_len != 4) {
- log_peer_warnx(&peer->conf,
-- "parse_capabilities: "
-- "expect len 4, len is %u", capa_len);
-- return (-1);
-+ "Bad AS4BYTE capability length: "
-+ "%u", capa_len);
-+ peer->capa.peer.as4byte = 0;
-+ break;
- }
- memcpy(&remote_as, capa_val, sizeof(remote_as));
- *as = ntohl(remote_as);
-@@ -2232,6 +2508,66 @@ parse_capabilities(struct peer *peer, u_
- return (0);
- }
-
-+int
-+capa_neg_calc(struct peer *p)
-+{
-+ u_int8_t i, hasmp = 0;
-+
-+ /* refresh: does not realy matter here, use peer setting */
-+ p->capa.neg.refresh = p->capa.peer.refresh;
-+
-+ /* as4byte: both side must announce capability */
-+ if (p->capa.ann.as4byte && p->capa.peer.as4byte)
-+ p->capa.neg.as4byte = 1;
-+ else
-+ p->capa.neg.as4byte = 0;
-+
-+ /* MP: both side must announce capability */
-+ for (i = 0; i < AID_MAX; i++) {
-+ if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
-+ p->capa.neg.mp[i] = 1;
-+ hasmp = 1;
-+ } else
-+ p->capa.neg.mp[i] = 0;
-+ }
-+ /* if no MP capability present default to IPv4 unicast mode */
-+ if (!hasmp)
-+ p->capa.neg.mp[AID_INET] = 1;
-+
-+ /*
-+ * graceful restart: only the peer capabilities are of interest here.
-+ * It is necessary to compare the new values with the previous ones
-+ * and act acordingly. AFI/SAFI that are not part in the MP capability
-+ * are treated as not being present.
-+ */
-+
-+ for (i = 0; i < AID_MAX; i++) {
-+ /* disable GR if the AFI/SAFI is not present */
-+ if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
-+ p->capa.neg.mp[i] == 0)
-+ p->capa.peer.grestart.flags[i] = 0; /* disable */
-+ /* look at current GR state and decide what to do */
-+ if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
-+ if (!(p->capa.peer.grestart.flags[i] &
-+ CAPA_GR_FORWARD)) {
-+ if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
-+ p->conf.id, 0, -1, &i, sizeof(i)) == -1)
-+ return (-1);
-+ log_peer_warnx(&p->conf, "graceful restart of "
-+ "%s, not restarted, flushing", aid2str(i));
-+ }
-+ p->capa.neg.grestart.flags[i] =
-+ p->capa.peer.grestart.flags[i] | CAPA_GR_RESTARTING;
-+ } else
-+ p->capa.neg.grestart.flags[i] =
-+ p->capa.peer.grestart.flags[i];
-+ }
-+ p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
-+ p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
-+
-+ return (0);
-+}
-+
- void
- session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
- {
-@@ -2244,8 +2580,8 @@ session_dispatch_imsg(struct imsgbuf *ib
- struct kif *kif;
- u_char *data;
- enum reconf_action reconf;
-- int n, depend_ok;
-- u_int8_t errcode, subcode;
-+ int n, depend_ok, restricted;
-+ u_int8_t aid, errcode, subcode;
-
- if ((n = imsg_read(ibuf)) == -1)
- fatal("session_dispatch_imsg: imsg_read error");
-@@ -2332,15 +2668,42 @@ session_dispatch_imsg(struct imsgbuf *ib
- }
-
- break;
-+ case IMSG_RECONF_CTRL:
-+ if (idx != PFD_PIPE_MAIN)
-+ fatalx("reconf request not from parent");
-+ if (imsg.hdr.len != IMSG_HEADER_SIZE +
-+ sizeof(restricted))
-+ fatalx("IFINFO imsg with wrong len");
-+ memcpy(&restricted, imsg.data, sizeof(restricted));
-+ if (imsg.fd == -1) {
-+ log_warnx("expected to receive fd for control "
-+ "socket but didn't receive any");
-+ break;
-+ }
-+ if (restricted) {
-+ control_shutdown(rcsock);
-+ rcsock = imsg.fd;
-+ control_listen(rcsock);
-+ } else {
-+ control_shutdown(csock);
-+ csock = imsg.fd;
-+ control_listen(csock);
-+ }
-+ break;
- case IMSG_RECONF_DONE:
- if (idx != PFD_PIPE_MAIN)
- fatalx("reconf request not from parent");
- if (nconf == NULL)
- fatalx("got IMSG_RECONF_DONE but no config");
-+ conf->flags = nconf->flags;
-+ conf->log = nconf->log;
-+ conf->bgpid = nconf->bgpid;
-+ conf->clusterid = nconf->clusterid;
- conf->as = nconf->as;
-+ conf->short_as = nconf->short_as;
- conf->holdtime = nconf->holdtime;
-- conf->bgpid = nconf->bgpid;
- conf->min_holdtime = nconf->min_holdtime;
-+ conf->connectretry = nconf->connectretry;
-
- /* add new peers */
- for (p = npeers; p != NULL; p = next) {
-@@ -2388,6 +2751,8 @@ session_dispatch_imsg(struct imsgbuf *ib
- nconf = NULL;
- pending_reconf = 0;
- log_info("SE reconfigured");
-+ imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
-+ -1, NULL, 0);
- break;
- case IMSG_IFINFO:
- if (idx != PFD_PIPE_MAIN)
-@@ -2397,9 +2762,7 @@ session_dispatch_imsg(struct imsgbuf *ib
- fatalx("IFINFO imsg with wrong len");
- kif = imsg.data;
- depend_ok = (kif->flags & IFF_UP) &&
-- (LINK_STATE_IS_UP(kif->link_state) ||
-- (kif->link_state == LINK_STATE_UNKNOWN &&
-- kif->media_type != IFT_CARP));
-+ LINK_STATE_IS_UP(kif->link_state);
-
- for (p = peers; p != NULL; p = p->next)
- if (!strcmp(p->conf.if_depend, kif->ifname)) {
-@@ -2408,7 +2771,8 @@ session_dispatch_imsg(struct imsgbuf *ib
- bgp_fsm(p, EVNT_START);
- } else if (!depend_ok && p->depend_ok) {
- p->depend_ok = depend_ok;
-- bgp_fsm(p, EVNT_STOP);
-+ session_stop(p,
-+ ERR_CEASE_OTHER_CHANGE);
- }
- }
- break;
-@@ -2456,10 +2820,10 @@ session_dispatch_imsg(struct imsgbuf *ib
- }
- break;
- case IMSG_CTL_KROUTE:
-- case IMSG_CTL_KROUTE6:
- case IMSG_CTL_KROUTE_ADDR:
- case IMSG_CTL_SHOW_NEXTHOP:
- case IMSG_CTL_SHOW_INTERFACE:
-+ case IMSG_CTL_SHOW_FIB_TABLES:
- if (idx != PFD_PIPE_MAIN)
- fatalx("ctl kroute request not from parent");
- control_imsg_relay(&imsg);
-@@ -2469,7 +2833,6 @@ session_dispatch_imsg(struct imsgbuf *ib
- case IMSG_CTL_SHOW_RIB_ATTR:
- case IMSG_CTL_SHOW_RIB_MEM:
- case IMSG_CTL_SHOW_NETWORK:
-- case IMSG_CTL_SHOW_NETWORK6:
- case IMSG_CTL_SHOW_NEIGHBOR:
- if (idx != PFD_PIPE_ROUTE_CTL)
- fatalx("ctl rib request not from RDE");
-@@ -2531,6 +2894,40 @@ session_dispatch_imsg(struct imsgbuf *ib
- break;
- }
- break;
-+ case IMSG_SESSION_RESTARTED:
-+ if (idx != PFD_PIPE_ROUTE)
-+ fatalx("update request not from RDE");
-+ if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
-+ log_warnx("RDE sent invalid restart msg");
-+ break;
-+ }
-+ if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
-+ log_warnx("no such peer: id=%u",
-+ imsg.hdr.peerid);
-+ break;
-+ }
-+ memcpy(&aid, imsg.data, sizeof(aid));
-+ if (aid >= AID_MAX)
-+ fatalx("IMSG_SESSION_RESTARTED: bad AID");
-+ if (p->capa.neg.grestart.flags[aid] &
-+ CAPA_GR_RESTARTING &&
-+ p->capa.neg.grestart.flags[aid] &
-+ CAPA_GR_FORWARD) {
-+ log_peer_warnx(&p->conf,
-+ "graceful restart of %s finished",
-+ aid2str(aid));
-+ p->capa.neg.grestart.flags[aid] &=
-+ ~CAPA_GR_RESTARTING;
-+ timer_stop(p, Timer_RestartTimeout);
-+
-+ /* signal back to RDE to cleanup stale routes */
-+ if (imsg_compose(ibuf_rde,
-+ IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0,
-+ -1, &aid, sizeof(aid)) == -1)
-+ fatal("imsg_compose: "
-+ "IMSG_SESSION_RESTARTED");
-+ }
-+ break;
- default:
- break;
- }
-@@ -2612,29 +3009,23 @@ getpeerbydesc(const char *descr)
- struct peer *
- getpeerbyip(struct sockaddr *ip)
- {
-+ struct bgpd_addr addr;
- struct peer *p, *newpeer, *loose = NULL;
- u_int32_t id;
-
-+ sa2addr(ip, &addr);
-+
- /* we might want a more effective way to find peers by IP */
- for (p = peers; p != NULL; p = p->next)
- if (!p->conf.template &&
-- p->conf.remote_addr.af == ip->sa_family) {
-- if (p->conf.remote_addr.af == AF_INET &&
-- p->conf.remote_addr.v4.s_addr ==
-- ((struct sockaddr_in *)ip)->sin_addr.s_addr)
-- return (p);
-- if (p->conf.remote_addr.af == AF_INET6 &&
-- !bcmp(&p->conf.remote_addr.v6,
-- &((struct sockaddr_in6 *)ip)->sin6_addr,
-- sizeof(p->conf.remote_addr.v6)))
-- return (p);
-- }
-+ !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
-+ return (p);
-
- /* try template matching */
- for (p = peers; p != NULL; p = p->next)
- if (p->conf.template &&
-- p->conf.remote_addr.af == ip->sa_family &&
-- session_match_mask(p, ip))
-+ p->conf.remote_addr.aid == addr.aid &&
-+ session_match_mask(p, &addr))
- if (loose == NULL || loose->conf.remote_masklen <
- p->conf.remote_masklen)
- loose = p;
-@@ -2653,21 +3044,19 @@ getpeerbyip(struct sockaddr *ip)
- break;
- }
- }
-- if (newpeer->conf.remote_addr.af == AF_INET) {
-- newpeer->conf.remote_addr.v4.s_addr =
-- ((struct sockaddr_in *)ip)->sin_addr.s_addr;
-+ sa2addr(ip, &newpeer->conf.remote_addr);
-+ switch (ip->sa_family) {
-+ case AF_INET:
- newpeer->conf.remote_masklen = 32;
-- }
-- if (newpeer->conf.remote_addr.af == AF_INET6) {
-- memcpy(&p->conf.remote_addr.v6,
-- &((struct sockaddr_in6 *)ip)->sin6_addr,
-- sizeof(newpeer->conf.remote_addr.v6));
-+ break;
-+ case AF_INET6:
- newpeer->conf.remote_masklen = 128;
-+ break;
- }
- newpeer->conf.template = 0;
- newpeer->conf.cloned = 1;
- newpeer->state = newpeer->prev_state = STATE_NONE;
-- newpeer->conf.reconf_action = RECONF_REINIT;
-+ newpeer->conf.reconf_action = RECONF_KEEP;
- newpeer->rbuf = NULL;
- init_peer(newpeer);
- bgp_fsm(newpeer, EVNT_START);
-@@ -2680,40 +3069,24 @@ getpeerbyip(struct sockaddr *ip)
- }
-
- int
--session_match_mask(struct peer *p, struct sockaddr *ip)
-+session_match_mask(struct peer *p, struct bgpd_addr *a)
- {
-- int i;
- in_addr_t v4mask;
-- struct in6_addr *in;
-- struct in6_addr mask;
-+ struct in6_addr masked;
-
-- if (p->conf.remote_addr.af == AF_INET) {
-+ switch (p->conf.remote_addr.aid) {
-+ case AID_INET:
- v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
-- if (p->conf.remote_addr.v4.s_addr ==
-- ((((struct sockaddr_in *)ip)->sin_addr.s_addr) & v4mask))
-+ if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
- return (1);
-- else
-- return (0);
-- }
--
-- if (p->conf.remote_addr.af == AF_INET6) {
-- bzero(&mask, sizeof(mask));
-- for (i = 0; i < p->conf.remote_masklen / 8; i++)
-- mask.s6_addr[i] = 0xff;
-- i = p->conf.remote_masklen % 8;
-- if (i)
-- mask.s6_addr[p->conf.remote_masklen / 8] = 0xff00 >> i;
--
-- in = &((struct sockaddr_in6 *)ip)->sin6_addr;
--
-- for (i = 0; i < 16; i++)
-- if ((in->s6_addr[i] & mask.s6_addr[i]) !=
-- p->conf.remote_addr.addr8[i])
-- return (0);
-+ return (0);
-+ case AID_INET6:
-+ inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
-
-- return (1);
-+ if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
-+ return (1);
-+ return (0);
- }
--
- return (0);
- }
-
-@@ -2733,6 +3106,7 @@ getpeerbyid(u_int32_t peerid)
- void
- session_down(struct peer *peer)
- {
-+ bzero(&peer->capa.neg, sizeof(peer->capa.neg));
- peer->stats.last_updown = time(NULL);
- if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
- NULL, 0) == -1)
-@@ -2744,39 +3118,17 @@ session_up(struct peer *p)
- {
- struct session_up sup;
-
-- if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
-- &p->conf, sizeof(p->conf)) == -1)
-- fatalx("imsg_compose error");
-+ if (!session_graceful_is_restarting(p))
-+ if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
-+ &p->conf, sizeof(p->conf)) == -1)
-+ fatalx("imsg_compose error");
-
-- switch (p->sa_local.ss_family) {
-- case AF_INET:
-- sup.local_addr.af = AF_INET;
-- memcpy(&sup.local_addr.v4,
-- &((struct sockaddr_in *)&p->sa_local)->sin_addr,
-- sizeof(sup.local_addr.v4));
-- sup.remote_addr.af = AF_INET;
-- memcpy(&sup.remote_addr.v4,
-- &((struct sockaddr_in *)&p->sa_remote)->sin_addr,
-- sizeof(sup.remote_addr.v4));
-- break;
-- case AF_INET6:
-- sup.local_addr.af = AF_INET6;
-- memcpy(&sup.local_addr.v6,
-- &((struct sockaddr_in6 *)&p->sa_local)->sin6_addr,
-- sizeof(sup.local_addr.v6));
-- sup.remote_addr.af = AF_INET6;
-- memcpy(&sup.remote_addr.v6,
-- &((struct sockaddr_in6 *)&p->sa_remote)->sin6_addr,
-- sizeof(sup.remote_addr.v6));
-- break;
-- default:
-- fatalx("session_up: unsupported address family");
-- }
-+ sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
-+ sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
-
- sup.remote_bgpid = p->remote_bgpid;
- sup.short_as = p->short_as;
-- memcpy(&sup.capa_announced, &p->capa.ann, sizeof(sup.capa_announced));
-- memcpy(&sup.capa_received, &p->capa.peer, sizeof(sup.capa_received));
-+ memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
- p->stats.last_updown = time(NULL);
- if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
- &sup, sizeof(sup)) == -1)
-@@ -2784,9 +3136,10 @@ session_up(struct peer *p)
- }
-
- int
--imsg_compose_parent(int type, pid_t pid, void *data, u_int16_t datalen)
-+imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data,
-+ u_int16_t datalen)
- {
-- return (imsg_compose(ibuf_main, type, 0, pid, -1, data, datalen));
-+ return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
- }
-
- int
-@@ -2795,34 +3148,6 @@ imsg_compose_rde(int type, pid_t pid, vo
- return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
- }
-
--static struct sockaddr *
--addr2sa(struct bgpd_addr *addr, u_int16_t port)
--{
-- static struct sockaddr_storage ss;
-- struct sockaddr_in *sa_in = (struct sockaddr_in *)&ss;
-- struct sockaddr_in6 *sa_in6 = (struct sockaddr_in6 *)&ss;
--
-- bzero(&ss, sizeof(ss));
-- switch (addr->af) {
-- case AF_INET:
-- sa_in->sin_family = AF_INET;
-- sa_in->sin_len = sizeof(struct sockaddr_in);
-- sa_in->sin_addr.s_addr = addr->v4.s_addr;
-- sa_in->sin_port = htons(port);
-- break;
-- case AF_INET6:
-- sa_in6->sin6_family = AF_INET6;
-- sa_in6->sin6_len = sizeof(struct sockaddr_in6);
-- memcpy(&sa_in6->sin6_addr, &addr->v6,
-- sizeof(sa_in6->sin6_addr));
-- sa_in6->sin6_port = htons(port);
-- sa_in6->sin6_scope_id = addr->scope_id;
-- break;
-- }
--
-- return ((struct sockaddr *)&ss);
--}
--
- void
- session_demote(struct peer *p, int level)
- {
-@@ -2837,3 +3162,19 @@ session_demote(struct peer *p, int level
-
- p->demoted += level;
- }
-+
-+void
-+session_stop(struct peer *peer, u_int8_t subcode)
-+{
-+ switch (peer->state) {
-+ case STATE_OPENSENT:
-+ case STATE_OPENCONFIRM:
-+ case STATE_ESTABLISHED:
-+ session_notification(peer, ERR_CEASE, subcode, NULL, 0);
-+ break;
-+ default:
-+ /* session not open, no need to send notification */
-+ break;
-+ }
-+ bgp_fsm(peer, EVNT_STOP);
-+}