diff options
Diffstat (limited to 'lib/kadm5/ipropd_master.c')
| -rw-r--r-- | lib/kadm5/ipropd_master.c | 960 |
1 files changed, 716 insertions, 244 deletions
diff --git a/lib/kadm5/ipropd_master.c b/lib/kadm5/ipropd_master.c index d0fdc8e26f10..3d4218f03a78 100644 --- a/lib/kadm5/ipropd_master.c +++ b/lib/kadm5/ipropd_master.c @@ -121,6 +121,7 @@ make_listen_socket (krb5_context context, const char *port_str) return fd; } + struct slave { krb5_socket_t fd; struct sockaddr_in addr; @@ -128,10 +129,45 @@ struct slave { krb5_auth_context ac; uint32_t version; uint32_t version_tstamp; + uint32_t version_ack; time_t seen; unsigned long flags; #define SLAVE_F_DEAD 0x1 #define SLAVE_F_AYT 0x2 +#define SLAVE_F_READY 0x4 + /* + * We'll use non-blocking I/O so no slave can hold us back. + * + * We call the state left over from a partial write a "tail". + * + * The krb5_data holding an KRB-PRIV will be the write buffer. + */ + struct { + /* Every message we send is a KRB-PRIV with a 4-byte length prefixed */ + uint8_t header_buf[4]; + krb5_data header; + krb5_data packet; + size_t packet_off; + /* For send_complete() we need an sp as part of the tail */ + krb5_storage *dump; + uint32_t vno; + } tail; + struct { + uint8_t header_buf[4]; + krb5_data packet; + size_t offset; + int hlen; + } input; + /* + * Continuation for fair diff sending we send N entries at a time. + */ + struct { + off_t off_next_version; /* offset in log of next diff */ + uint32_t initial_version; /* at time of previous diff */ + uint32_t initial_tstamp; /* at time of previous diff */ + uint32_t last_version_sent; + int more; /* need to send more diffs */ + } next_diff; struct slave *next; }; @@ -220,6 +256,11 @@ remove_slave (krb5_context context, slave *s, slave **root) if (s->ac) krb5_auth_con_free (context, s->ac); + /* Free any pending input/output state */ + krb5_data_free(&s->input.packet); + krb5_data_free(&s->tail.packet); + krb5_storage_free(s->tail.dump); + for (p = root; *p; p = &(*p)->next) if (*p == s) { *p = s->next; @@ -239,13 +280,17 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root, krb5_ticket *ticket = NULL; char hostname[128]; - s = malloc(sizeof(*s)); + s = calloc(1, sizeof(*s)); if (s == NULL) { krb5_warnx (context, "add_slave: no memory"); return; } s->name = NULL; s->ac = NULL; + s->input.packet.data = NULL; + s->tail.header.data = NULL; + s->tail.packet.data = NULL; + s->tail.dump = NULL; addr_len = sizeof(s->addr); s->fd = accept (fd, (struct sockaddr *)&s->addr, &addr_len); @@ -253,6 +298,7 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root, krb5_warn (context, rk_SOCK_ERRNO, "accept"); goto error; } + if (master_hostname) strlcpy(hostname, master_hostname, sizeof(hostname)); else @@ -267,6 +313,35 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root, ret = krb5_recvauth (context, &s->ac, &s->fd, IPROP_VERSION, server, 0, keytab, &ticket); + + /* + * We'll be doing non-blocking I/O only after authentication. We don't + * want to get stuck talking to any one slave. + * + * If we get a partial write, we'll finish writing when the socket becomes + * writable. + * + * Partial reads will be treated as EOF, causing the slave to be marked + * dead. + * + * To do non-blocking I/O for authentication we'll have to implement our + * own krb5_recvauth(). + */ + socket_set_nonblocking(s->fd, 1); + + /* + * We write message lengths separately from the payload, and may do + * back-to-back small writes when flushing pending input and then a new + * update. Avoid Nagle delays. + */ +#if defined(IPPROTO_TCP) && defined(TCP_NODELAY) + { + int nodelay = 1; + (void) setsockopt(s->fd, IPPROTO_TCP, TCP_NODELAY, + (void *)&nodelay, sizeof(nodelay)); + } +#endif + krb5_free_principal (context, server); if (ret) { krb5_warn (context, ret, "krb5_recvauth"); @@ -304,6 +379,7 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root, krb5_warnx (context, "connection from %s", s->name); s->version = 0; + s->version_ack = 0; s->flags = 0; slave_seen(s); s->next = *root; @@ -372,6 +448,8 @@ write_dump (krb5_context context, krb5_storage *dump, */ ret = krb5_store_uint32(dump, 0); + if (ret) + return ret; ret = hdb_create (context, &db, database); if (ret) @@ -452,14 +530,146 @@ write_dump (krb5_context context, krb5_storage *dump, } static int -send_complete (krb5_context context, slave *s, const char *database, - uint32_t current_version, uint32_t oldest_version, - uint32_t initial_log_tstamp) +mk_priv_tail(krb5_context context, slave *s, krb5_data *data) +{ + uint32_t len; + int ret; + + ret = krb5_mk_priv(context, s->ac, data, &s->tail.packet, NULL); + if (ret) + return ret; + + len = s->tail.packet.length; + _krb5_put_int(s->tail.header_buf, len, sizeof(s->tail.header_buf)); + s->tail.header.length = sizeof(s->tail.header_buf); + s->tail.header.data = s->tail.header_buf; + return 0; +} + +static int +have_tail(slave *s) +{ + return s->tail.header.length || s->tail.packet.length || s->tail.dump; +} + +static int +more_diffs(slave *s) +{ + return s->next_diff.more; +} + +#define SEND_COMPLETE_MAX_RECORDS 50 +#define SEND_DIFFS_MAX_RECORDS 50 + +static int +send_tail(krb5_context context, slave *s) +{ + krb5_data data; + ssize_t bytes = 0; + size_t rem = 0; + size_t n; + int ret; + + if (! have_tail(s)) + return 0; + + /* + * For the case where we're continuing a send_complete() send up to + * SEND_COMPLETE_MAX_RECORDS records now, and the rest asynchronously + * later. This ensures that sending a complete dump to a slow-to-drain + * client does not prevent others from getting serviced. + */ + for (n = 0; n < SEND_COMPLETE_MAX_RECORDS; n++) { + if (! have_tail(s)) + return 0; + + if (s->tail.header.length) { + bytes = krb5_net_write(context, &s->fd, + s->tail.header.data, + s->tail.header.length); + if (bytes < 0) + goto err; + + s->tail.header.length -= bytes; + s->tail.header.data = (char *)s->tail.header.data + bytes; + rem = s->tail.header.length; + if (rem) + goto ewouldblock; + } + + if (s->tail.packet.length) { + bytes = krb5_net_write(context, &s->fd, + (char *)s->tail.packet.data + s->tail.packet_off, + s->tail.packet.length - s->tail.packet_off); + if (bytes < 0) + goto err; + s->tail.packet_off += bytes; + if (bytes) + slave_seen(s); + rem = s->tail.packet.length - s->tail.packet_off; + if (rem) + goto ewouldblock; + + krb5_data_free(&s->tail.packet); + s->tail.packet_off = 0; + } + + if (s->tail.dump == NULL) + return 0; + + /* + * We're in the middle of a send_complete() that was interrupted by + * EWOULDBLOCK. Continue the sending of the dump. + */ + ret = krb5_ret_data(s->tail.dump, &data); + if (ret == HEIM_ERR_EOF) { + krb5_storage_free(s->tail.dump); + s->tail.dump = NULL; + s->version = s->tail.vno; + return 0; + } + + if (ret) { + krb5_warn(context, ret, "failed to read entry from dump!"); + } else { + ret = mk_priv_tail(context, s, &data); + krb5_data_free(&data); + if (ret == 0) + continue; + krb5_warn(context, ret, "failed to make and send a KRB-PRIV to %s", + s->name); + } + + slave_dead(context, s); + return ret; + } + + if (ret == 0 && s->tail.dump != NULL) + return EWOULDBLOCK; + +err: + if (errno != EAGAIN && errno != EWOULDBLOCK) { + krb5_warn(context, ret = errno, + "error sending diffs to now-dead slave %s", s->name); + slave_dead(context, s); + return ret; + } + +ewouldblock: + if (verbose) + krb5_warnx(context, "would block writing %llu bytes to slave %s", + (unsigned long long)rem, s->name); + return EWOULDBLOCK; +} + +static int +send_complete(krb5_context context, slave *s, const char *database, + uint32_t current_version, uint32_t oldest_version, + uint32_t initial_log_tstamp) { krb5_error_code ret; krb5_storage *dump = NULL; uint32_t vno = 0; - krb5_data data; int fd = -1; struct stat st; char *dfn; @@ -517,7 +727,6 @@ send_complete (krb5_context context, slave *s, const char *database, * If the current dump has an appropriate version, then we can * break out of the loop and send the file below. */ - if (ret == 0 && vno != 0 && st.st_mtime > initial_log_tstamp && vno >= oldest_version && vno <= current_version) break; @@ -583,38 +792,18 @@ send_complete (krb5_context context, slave *s, const char *database, /* * Leaving the above loop, dump should have a ptr right after the initial * 4 byte DB version number and we should have a shared lock on the file - * (which we may have just created), so we are reading to simply blast + * (which we may have just created), so we are reading to start sending * the data down the wire. + * + * Note: (krb5_storage_from_fd() dup()'s the fd) */ - for (;;) { - ret = krb5_ret_data(dump, &data); - if (ret == HEIM_ERR_EOF) { - ret = 0; /* EOF is not an error, it's success */ - goto done; - } - - if (ret) { - krb5_warn(context, ret, "krb5_ret_data(dump, &data)"); - slave_dead(context, s); - goto done; - } - - ret = krb5_write_priv_message(context, s->ac, &s->fd, &data); - krb5_data_free(&data); - - if (ret) { - krb5_warn (context, ret, "krb5_write_priv_message"); - slave_dead(context, s); - goto done; - } - } + s->tail.dump = dump; + s->tail.vno = vno; + dump = NULL; + ret = send_tail(context, s); done: - if (!ret) { - s->version = vno; - slave_seen(s); - } if (fd != -1) close(fd); if (dump) @@ -633,6 +822,14 @@ send_are_you_there (krb5_context context, slave *s) if (s->flags & (SLAVE_F_DEAD|SLAVE_F_AYT)) return 0; + /* + * Write any remainder of previous write, if we can. If we'd block we'll + * return EWOULDBLOCK. + */ + ret = send_tail(context, s); + if (ret) + return ret; + krb5_warnx(context, "slave %s missing, sending AYT", s->name); s->flags |= SLAVE_F_AYT; @@ -644,232 +841,465 @@ send_are_you_there (krb5_context context, slave *s) if (sp == NULL) { krb5_warnx (context, "are_you_there: krb5_data_alloc"); slave_dead(context, s); - return 1; + return ENOMEM; } ret = krb5_store_uint32(sp, ARE_YOU_THERE); krb5_storage_free (sp); - if (ret == 0) { - ret = krb5_write_priv_message(context, s->ac, &s->fd, &data); + if (ret == 0) + ret = mk_priv_tail(context, s, &data); + if (ret == 0) + ret = send_tail(context, s); + if (ret && ret != EWOULDBLOCK) { + krb5_warn(context, ret, "are_you_there"); + slave_dead(context, s); + } + return ret; +} - if (ret) { - krb5_warn(context, ret, "are_you_there: krb5_write_priv_message"); - slave_dead(context, s); - return 1; - } +static int +diffready(krb5_context context, slave *s) +{ + /* + * Don't send any diffs until slave has sent an I_HAVE telling us the + * initial version number! + */ + if ((s->flags & SLAVE_F_READY) == 0) + return 0; + + if (s->flags & SLAVE_F_DEAD) { + if (verbose) + krb5_warnx(context, "not sending diffs to dead slave %s", s->name); + return 0; } - return 0; + /* Write any remainder of previous write, if we can. */ + if (send_tail(context, s) != 0) + return 0; + + return 1; } static int -send_diffs (kadm5_server_context *server_context, slave *s, int log_fd, - const char *database, uint32_t current_version, - uint32_t current_tstamp) +nodiffs(krb5_context context, slave *s, uint32_t current_version) { - krb5_context context = server_context->context; krb5_storage *sp; - uint32_t ver, initial_version, initial_version2; - uint32_t initial_tstamp, initial_tstamp2; - enum kadm_ops op; - uint32_t len; - off_t right, left; - krb5_ssize_t bytes; krb5_data data; - int ret = 0; + int ret; - if (s->flags & SLAVE_F_DEAD) { - krb5_warnx(context, "not sending diffs to dead slave %s", s->name); + if (s->version < current_version) return 0; - } - if (s->version == current_version) { - char buf[4]; - - sp = krb5_storage_from_mem(buf, 4); - if (sp == NULL) - krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_mem"); - ret = krb5_store_uint32(sp, YOU_HAVE_LAST_VERSION); - krb5_storage_free(sp); - data.data = buf; - data.length = 4; - if (ret == 0) { - ret = krb5_write_priv_message(context, s->ac, &s->fd, &data); - if (ret) { - krb5_warn(context, ret, "send_diffs: failed to send to slave"); - slave_dead(context, s); - } - krb5_warnx(context, "slave %s in sync already at version %ld", - s->name, (long)s->version); - } - return ret; - } + /* + * If we had sent a partial diff, and now they're caught up, then there's + * no more. + */ + s->next_diff.more = 0; if (verbose) - krb5_warnx(context, "sending diffs to live-seeming slave %s", s->name); + krb5_warnx(context, "slave %s version %ld already sent", s->name, + (long)s->version); + sp = krb5_storage_emem(); + if (sp == NULL) + krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_mem"); + + ret = krb5_store_uint32(sp, YOU_HAVE_LAST_VERSION); + if (ret == 0) { + krb5_data_zero(&data); + ret = krb5_storage_to_data(sp, &data); + } + krb5_storage_free(sp); + if (ret == 0) { + ret = mk_priv_tail(context, s, &data); + krb5_data_free(&data); + } + if (ret == 0) + send_tail(context, s); + + return 1; +} + +/* + * Lock the log and return initial version and timestamp + */ +static int +get_first(kadm5_server_context *server_context, int log_fd, + uint32_t *initial_verp, uint32_t *initial_timep) +{ + krb5_context context = server_context->context; + int ret; /* - * XXX The code that makes the diffs should be made a separate function, - * then error handling (send_are_you_there() or slave_dead()) can be done - * here. + * We don't want to perform tight retry loops on log access errors, so on + * error mark the slave dead. The slave reconnect after a delay... */ - if (flock(log_fd, LOCK_SH) == -1) { krb5_warn(context, errno, "could not obtain shared lock on log file"); - send_are_you_there(context, s); - return errno; + return -1; } + ret = kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_FIRST, - &initial_version, &initial_tstamp); - sp = kadm5_log_goto_end(server_context, log_fd); - flock(log_fd, LOCK_UN); - if (ret) { - if (sp != NULL) - krb5_storage_free(sp); - krb5_warn(context, ret, "send_diffs: failed to read log"); - send_are_you_there(context, s); - return ret; - } - if (sp == NULL) { - send_are_you_there(context, s); - krb5_warn(context, errno ? errno : EINVAL, - "send_diffs: failed to read log"); - return errno ? errno : EINVAL; - } - /* - * We're not holding any locks here, so we can't prevent truncations. - * - * We protect against this by re-checking that the initial version and - * timestamp are the same before and after this loop. - */ - right = krb5_storage_seek(sp, 0, SEEK_CUR); - if (right == (off_t)-1) { - krb5_storage_free(sp); - send_are_you_there(context, s); - return errno; + initial_verp, initial_timep); + if (ret != 0) { + flock(log_fd, LOCK_UN); + krb5_warnx(context, "could not read initial log entry"); + return -1; } + + return 0; +} + +/*- + * Find the left end of the diffs in the log we want to send. + * + * - On success, return a positive offset to the first new entry, retaining + * a read lock on the log file. + * - On error, return a negative offset, with the lock released. + * - If we simply find no successor entry in the log, return zero + * with the lock released, which indicates that fallback to send_complete() + * is needed. + */ +static off_t +get_left(kadm5_server_context *server_context, slave *s, krb5_storage *sp, + int log_fd, uint32_t current_version, + uint32_t *initial_verp, uint32_t *initial_timep) +{ + krb5_context context = server_context->context; + off_t pos; + off_t left; + int ret; + for (;;) { - ret = kadm5_log_previous (context, sp, &ver, NULL, &op, &len); - if (ret) - krb5_err(context, IPROPD_RESTART, ret, - "send_diffs: failed to find previous entry"); - left = krb5_storage_seek(sp, -16, SEEK_CUR); - if (left == (off_t)-1) { - krb5_storage_free(sp); - send_are_you_there(context, s); - return errno; + uint32_t ver = s->version; + + /* This acquires a read lock on success */ + ret = get_first(server_context, log_fd, + initial_verp, initial_timep); + if (ret != 0) + return -1; + + /* When the slave version is out of range, send the whole database. */ + if (ver == 0 || ver < *initial_verp || ver > current_version) { + flock(log_fd, LOCK_UN); + return 0; } - if (ver == s->version + 1) - break; + + /* Avoid seeking past the last committed record */ + if (kadm5_log_goto_end(server_context, sp) != 0 || + (pos = krb5_storage_seek(sp, 0, SEEK_CUR)) < 0) + goto err; /* - * We don't expect to reach the slave's version, except when it is - * starting empty with the uber record. + * First try to see if we can find it quickly by seeking to the right + * end of the previous diff sent. */ - if (ver == s->version && !(ver == 0 && op == kadm_nop)) { + if (s->next_diff.last_version_sent > 0 && + s->next_diff.off_next_version > 0 && + s->next_diff.off_next_version < pos && + s->next_diff.initial_version == *initial_verp && + s->next_diff.initial_tstamp == *initial_timep) { /* - * This shouldn't happen, but recall we're not holding a lock on - * the log. + * Sanity check that the left version matches what we wanted, the + * log may have been truncated since. */ - krb5_storage_free(sp); - krb5_warnx(context, "iprop log truncated while sending diffs to " - "slave?? ver = %lu", (unsigned long)ver); - send_are_you_there(context, s); - return 0; + left = s->next_diff.off_next_version; + if (krb5_storage_seek(sp, left, SEEK_SET) != left) + goto err; + if (kadm5_log_next(context, sp, &ver, NULL, NULL, NULL) == 0 && + ver == s->next_diff.last_version_sent + 1) + return left; } - /* If we've reached the uber record, send the complete database */ - if (left == 0 || (ver == 0 && op == kadm_nop)) { - krb5_storage_free(sp); - krb5_warnx(context, - "slave %s (version %lu) out of sync with master " - "(first version in log %lu), sending complete database", - s->name, (unsigned long)s->version, (unsigned long)ver); - return send_complete (context, s, database, current_version, ver, - initial_tstamp); - } + if (krb5_storage_seek(sp, pos, SEEK_SET) != pos) + goto err; + + /* + * Drop the lock and try to find the left entry by seeking backward + * from the end of the end of the log. If we succeed, re-acquire the + * lock, update "next_diff", and retry the fast-path. + */ + flock(log_fd, LOCK_UN); + + /* Slow path: seek backwards, entry by entry, from the end */ + for (;;) { + enum kadm_ops op; + uint32_t len; + + ret = kadm5_log_previous(context, sp, &ver, NULL, &op, &len); + if (ret) + return -1; + left = krb5_storage_seek(sp, -16, SEEK_CUR); + if (left < 0) + return left; + if (ver == s->version + 1) + break; + + /* + * We don't expect to reach the slave's version, unless the log + * has been modified after we released the lock. + */ + if (ver == s->version) { + krb5_warnx(context, "iprop log truncated while sending diffs " + "to slave?? ver = %lu", (unsigned long)ver); + return -1; + } + + /* If we've reached the uber record, send the complete database */ + if (left == 0 || (ver == 0 && op == kadm_nop)) + return 0; + } + assert(ver == s->version + 1); + + /* Set up the fast-path pre-conditions */ + s->next_diff.last_version_sent = s->version; + s->next_diff.off_next_version = left; + s->next_diff.initial_version = *initial_verp; + s->next_diff.initial_tstamp = *initial_timep; + + /* + * If we loop then we're hoping to hit the fast path so we can return a + * non-zero, positive left offset with the lock held. + * + * We just updated the fast path pre-conditions, so unless a log + * truncation event happens between the point where we dropped the lock + * and the point where we rearcuire it above, we will hit the fast + * path. + */ } - assert(ver == s->version + 1); + return left; - krb5_warnx(context, - "syncing slave %s from version %lu to version %lu", - s->name, (unsigned long)s->version, - (unsigned long)current_version); + err: + flock(log_fd, LOCK_UN); + return -1; +} - ret = krb5_data_alloc (&data, right - left + 4); - if (ret) { - krb5_storage_free(sp); - krb5_warn (context, ret, "send_diffs: krb5_data_alloc"); - send_are_you_there(context, s); - return 1; +static off_t +get_right(krb5_context context, int log_fd, krb5_storage *sp, + int lastver, slave *s, off_t left, uint32_t *verp) +{ + int ret = 0; + int i = 0; + uint32_t ver = s->version; + off_t right = krb5_storage_seek(sp, left, SEEK_SET); + + if (right <= 0) { + flock(log_fd, LOCK_UN); + return -1; } - bytes = krb5_storage_read(sp, (char *)data.data + 4, data.length - 4); - krb5_storage_free(sp); - if (bytes != data.length - 4) { - krb5_warnx(context, "iprop log truncated while sending diffs to " - "slave?? ver = %lu", (unsigned long)ver); - send_are_you_there(context, s); - return 1; + + /* The "lastver" bound should preclude us reaching EOF */ + for (; ret == 0 && i < SEND_DIFFS_MAX_RECORDS && ver < lastver; ++i) { + uint32_t logver; + + ret = kadm5_log_next(context, sp, &logver, NULL, NULL, NULL); + if (logver != ++ver) + ret = KADM5_LOG_CORRUPT; } - /* - * Check that we have the same log initial version and timestamp now as - * when we dropped the shared lock on the log file! Else we could be - * sending garbage to the slave. - */ - if (flock(log_fd, LOCK_SH) == -1) { - krb5_warn(context, errno, "could not obtain shared lock on log file"); - send_are_you_there(context, s); - return 1; + if (ret == 0) + right = krb5_storage_seek(sp, 0, SEEK_CUR); + else + right = -1; + if (right <= 0) { + flock(log_fd, LOCK_UN); + return -1; } - ret = kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_FIRST, - &initial_version2, &initial_tstamp2); - flock(log_fd, LOCK_UN); - if (ret) { - krb5_warn(context, ret, - "send_diffs: failed to read log while producing diffs"); - send_are_you_there(context, s); - return 1; + *verp = ver; + return right; +} + +static void +send_diffs(kadm5_server_context *server_context, slave *s, int log_fd, + const char *database, uint32_t current_version) +{ + krb5_context context = server_context->context; + krb5_storage *sp; + uint32_t initial_version; + uint32_t initial_tstamp; + uint32_t ver; + off_t left = 0; + off_t right = 0; + krb5_ssize_t bytes; + krb5_data data; + int ret = 0; + + if (!diffready(context, s) || nodiffs(context, s, current_version)) + return; + + if (verbose) + krb5_warnx(context, "sending diffs to live-seeming slave %s", s->name); + + sp = krb5_storage_from_fd(log_fd); + if (sp == NULL) + krb5_err(context, IPROPD_RESTART_SLOW, ENOMEM, + "send_diffs: out of memory"); + + left = get_left(server_context, s, sp, log_fd, current_version, + &initial_version, &initial_tstamp); + if (left < 0) { + krb5_storage_free(sp); + slave_dead(context, s); + return; } - if (initial_version != initial_version2 || - initial_tstamp != initial_tstamp2) { - krb5_warn(context, ret, - "send_diffs: log truncated while producing diffs"); - send_are_you_there(context, s); - return 1; + + if (left == 0) { + /* Slave's version is not in the log, fall back on send_complete() */ + krb5_storage_free(sp); + send_complete(context, s, database, current_version, + initial_version, initial_tstamp); + return; } - sp = krb5_storage_from_data (&data); + /* We still hold the read lock, if right > 0 */ + right = get_right(server_context->context, log_fd, sp, current_version, + s, left, &ver); + if (right == left) { + flock(log_fd, LOCK_UN); + krb5_storage_free(sp); + return; + } + if (right < left) { + assert(right < 0); + krb5_storage_free(sp); + slave_dead(context, s); + return; + } + + if (krb5_storage_seek(sp, left, SEEK_SET) != left) { + ret = errno ? errno : EIO; + flock(log_fd, LOCK_UN); + krb5_warn(context, ret, "send_diffs: krb5_storage_seek"); + krb5_storage_free(sp); + slave_dead(context, s); + return; + } + + ret = krb5_data_alloc(&data, right - left + 4); + if (ret) { + flock(log_fd, LOCK_UN); + krb5_warn(context, ret, "send_diffs: krb5_data_alloc"); + krb5_storage_free(sp); + slave_dead(context, s); + return; + } + + bytes = krb5_storage_read(sp, (char *)data.data + 4, data.length - 4); + flock(log_fd, LOCK_UN); + krb5_storage_free(sp); + if (bytes != data.length - 4) + krb5_errx(context, IPROPD_RESTART, "locked log truncated???"); + + sp = krb5_storage_from_data(&data); if (sp == NULL) { - krb5_warnx (context, "send_diffs: krb5_storage_from_data"); - send_are_you_there(context, s); - return 1; + krb5_err(context, IPROPD_RESTART_SLOW, ENOMEM, "out of memory"); + krb5_warnx(context, "send_diffs: krb5_storage_from_data"); + return; } - krb5_store_uint32 (sp, FOR_YOU); + krb5_store_uint32(sp, FOR_YOU); krb5_storage_free(sp); - ret = krb5_write_priv_message(context, s->ac, &s->fd, &data); + ret = mk_priv_tail(context, s, &data); krb5_data_free(&data); + if (ret == 0) { + /* Save the fast-path continuation */ + s->next_diff.last_version_sent = ver; + s->next_diff.off_next_version = right; + s->next_diff.initial_version = initial_version; + s->next_diff.initial_tstamp = initial_tstamp; + s->next_diff.more = ver < current_version; + ret = send_tail(context, s); + + krb5_warnx(context, + "syncing slave %s from version %lu to version %lu", + s->name, (unsigned long)s->version, + (unsigned long)ver); + s->version = ver; + } - if (ret) { - krb5_warn (context, ret, "send_diffs: krb5_write_priv_message"); - slave_dead(context, s); - return 1; + if (ret && ret != EWOULDBLOCK) { + krb5_warn(context, ret, "send_diffs: making or sending " + "KRB-PRIV message"); + slave_dead(context, s); + return; } slave_seen(s); + return; +} + +/* Sensible bound on slave message size */ +#define SLAVE_MSG_MAX 65536 - s->version = current_version; +static int +fill_input(krb5_context context, slave *s) +{ + krb5_error_code ret; - krb5_warnx(context, "slave %s is now up to date (%u)", s->name, s->version); + if (s->input.hlen < 4) { + uint8_t *buf = s->input.header_buf + s->input.hlen; + size_t len = 4 - s->input.hlen; + krb5_ssize_t bytes = krb5_net_read(context, &s->fd, buf, len); + + if (bytes == 0) + return HEIM_ERR_EOF; + if (bytes < 0) { + if (errno == EWOULDBLOCK || errno == EAGAIN) + return EWOULDBLOCK; + return errno ? errno : EIO; + } + s->input.hlen += bytes; + if (bytes < len) + return EWOULDBLOCK; + + buf = s->input.header_buf; + len = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; + if (len > SLAVE_MSG_MAX) + return EINVAL; + ret = krb5_data_alloc(&s->input.packet, len); + if (ret != 0) + return ret; + } + if (s->input.offset < s->input.packet.length) { + u_char *buf = (u_char *)s->input.packet.data + s->input.offset; + size_t len = s->input.packet.length - s->input.offset; + krb5_ssize_t bytes = krb5_net_read(context, &s->fd, buf, len); + + if (bytes == 0) + return HEIM_ERR_EOF; + if (bytes < 0) { + if (errno == EWOULDBLOCK || errno == EAGAIN) + return EWOULDBLOCK; + return errno ? errno : EIO; + } + s->input.offset += bytes; + if (bytes != len) + return EWOULDBLOCK; + } return 0; } static int -process_msg (kadm5_server_context *server_context, slave *s, int log_fd, - const char *database, uint32_t current_version, - uint32_t current_tstamp) +read_msg(krb5_context context, slave *s, krb5_data *out) +{ + int ret = fill_input(context, s); + + if (ret != 0) + return ret; + + ret = krb5_rd_priv(context, s->ac, &s->input.packet, out, NULL); + + /* Prepare for next packet */ + krb5_data_free(&s->input.packet); + s->input.offset = 0; + s->input.hlen = 0; + + return ret; +} + +static int +process_msg(kadm5_server_context *server_context, slave *s, int log_fd, + const char *database, uint32_t current_version) { krb5_context context = server_context->context; int ret = 0; @@ -877,10 +1307,11 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd, krb5_storage *sp; uint32_t tmp; - ret = krb5_read_priv_message(context, s->ac, &s->fd, &out); - if(ret) { - krb5_warn(context, ret, "error reading message from %s", s->name); - return 1; + ret = read_msg(context, s, &out); + if (ret) { + if (ret != EWOULDBLOCK) + krb5_warn(context, ret, "error reading message from %s", s->name); + return ret; } sp = krb5_storage_from_mem(out.data, out.length); @@ -901,25 +1332,48 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd, krb5_warnx(context, "process_msg: client send too little I_HAVE data"); break; } - /* new started slave that have old log */ - if (s->version == 0 && tmp != 0) { + /* + * XXX Make the slave send the timestamp as well, and try to get it + * here, and pass it to send_diffs(). + */ + /* + * New slave whose version number we've not yet seen. If the version + * number is zero, the slave has no data, and we'll send a complete + * database (that happens in send_diffs()). Otherwise, we'll record a + * non-zero initial version and attempt an incremental update. + * + * NOTE!: Once the slave is "ready" (its first I_HAVE has conveyed its + * initial version), we MUST NOT update s->version to the slave's + * I_HAVE version, since we may already have sent later updates, and + * MUST NOT send them again, otherwise we can get further and further + * out of sync resending larger and larger diffs. The "not yet ready" + * is an essential precondition for setting s->version to the value + * in the I_HAVE message. This happens only once when the slave + * first connects. + */ + if (!(s->flags & SLAVE_F_READY)) { if (current_version < tmp) { - krb5_warnx(context, "Slave %s (version %u) have later version " - "the master (version %u) OUT OF SYNC", + krb5_warnx(context, "Slave %s (version %u) has later version " + "than the master (version %u) OUT OF SYNC", s->name, tmp, current_version); + /* Force send_complete() */ + tmp = 0; } - if (verbose) - krb5_warnx(context, "slave %s updated from %u to %u", - s->name, s->version, tmp); + /* + * Mark the slave as ready for updates based on incoming signals. + * Prior to the initial I_HAVE, we don't know the slave's version + * number, and MUST not send it anything, since we'll needlessly + * attempt to send the whole database! + */ s->version = tmp; + s->flags |= SLAVE_F_READY; + if (verbose) + krb5_warnx(context, "slave %s ready for updates from version %u", + s->name, tmp); } - if (tmp < s->version) { - krb5_warnx(context, "Slave %s claims to not have " - "version we already sent to it", s->name); - s->version = tmp; - } - ret = send_diffs(server_context, s, log_fd, database, current_version, - current_tstamp); + if ((s->version_ack = tmp) < s->version) + break; + send_diffs(server_context, s, log_fd, database, current_version); break; case I_AM_HERE : if (verbose) @@ -1035,7 +1489,7 @@ write_stats(krb5_context context, slave *slaves, uint32_t current_version) } else rtbl_add_column_entry(tbl, SLAVE_ADDRESS, "<unknown>"); - snprintf(str, sizeof(str), "%u", (unsigned)slaves->version); + snprintf(str, sizeof(str), "%u", (unsigned)slaves->version_ack); rtbl_add_column_entry(tbl, SLAVE_VERSION, str); if (slaves->flags & SLAVE_F_DEAD) @@ -1044,7 +1498,10 @@ write_stats(krb5_context context, slave *slaves, uint32_t current_version) rtbl_add_column_entry(tbl, SLAVE_STATUS, "Up"); ret = krb5_format_time(context, slaves->seen, str, sizeof(str), TRUE); - rtbl_add_column_entry(tbl, SLAVE_SEEN, str); + if (ret) + rtbl_add_column_entry(tbl, SLAVE_SEEN, "<error-formatting-time>"); + else + rtbl_add_column_entry(tbl, SLAVE_SEEN, str); slaves = slaves->next; } @@ -1105,7 +1562,6 @@ main(int argc, char **argv) int log_fd; slave *slaves = NULL; uint32_t current_version = 0, old_version = 0; - uint32_t current_tstamp = 0; krb5_keytab keytab; char **files; int aret; @@ -1198,7 +1654,7 @@ main(int argc, char **argv) krb5_err(context, 1, errno, "shared flock %s", server_context->log_context.log_file); kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, - ¤t_version, ¤t_tstamp); + ¤t_version, NULL); flock(log_fd, LOCK_UN); signal_fd = make_signal_socket (context); @@ -1212,7 +1668,7 @@ main(int argc, char **argv) while (exit_flag == 0){ slave *p; - fd_set readset; + fd_set readset, writeset; int max_fd = 0; struct timeval to = {30, 0}; uint32_t vers; @@ -1225,6 +1681,7 @@ main(int argc, char **argv) #endif FD_ZERO(&readset); + FD_ZERO(&writeset); FD_SET(signal_fd, &readset); max_fd = max(max_fd, signal_fd); FD_SET(listen_fd, &readset); @@ -1238,11 +1695,12 @@ main(int argc, char **argv) if (p->flags & SLAVE_F_DEAD) continue; FD_SET(p->fd, &readset); + if (have_tail(p) || more_diffs(p)) + FD_SET(p->fd, &writeset); max_fd = max(max_fd, p->fd); } - ret = select (max_fd + 1, - &readset, NULL, NULL, &to); + ret = select(max_fd + 1, &readset, &writeset, NULL, &to); if (ret < 0) { if (errno == EINTR) continue; @@ -1260,7 +1718,7 @@ main(int argc, char **argv) log_fd = open(server_context->log_context.log_file, O_RDONLY, 0); if (log_fd < 0) - krb5_err(context, 1, IPROPD_RESTART_SLOW, "open %s", + krb5_err(context, IPROPD_RESTART_SLOW, errno, "open %s", server_context->log_context.log_file); if (fstat(log_fd, &st) == -1) @@ -1271,7 +1729,7 @@ main(int argc, char **argv) krb5_err(context, IPROPD_RESTART, errno, "shared flock %s", server_context->log_context.log_file); kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, - ¤t_version, ¤t_tstamp); + ¤t_version, NULL); flock(log_fd, LOCK_UN); } @@ -1284,19 +1742,20 @@ main(int argc, char **argv) krb5_err(context, IPROPD_RESTART, errno, "could not lock log file"); kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, - ¤t_version, ¤t_tstamp); + ¤t_version, NULL); flock(log_fd, LOCK_UN); if (current_version > old_version) { - krb5_warnx(context, - "Missed a signal, updating slaves %lu to %lu", - (unsigned long)old_version, - (unsigned long)current_version); + if (verbose) + krb5_warnx(context, + "Missed a signal, updating slaves %lu to %lu", + (unsigned long)old_version, + (unsigned long)current_version); for (p = slaves; p != NULL; p = p->next) { if (p->flags & SLAVE_F_DEAD) continue; - send_diffs (server_context, p, log_fd, database, - current_version, current_tstamp); + send_diffs(server_context, p, log_fd, database, + current_version); } old_version = current_version; } @@ -1327,7 +1786,7 @@ main(int argc, char **argv) krb5_err(context, IPROPD_RESTART, errno, "shared flock %s", server_context->log_context.log_file); kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, - ¤t_version, ¤t_tstamp); + ¤t_version, NULL); flock(log_fd, LOCK_UN); if (current_version != old_version) { /* @@ -1342,31 +1801,44 @@ main(int argc, char **argv) * breaking backwards compatibility for the protocol or * adding new messages to it. */ - krb5_warnx(context, - "Got a signal, updating slaves %lu to %lu", - (unsigned long)old_version, - (unsigned long)current_version); + if (verbose) + krb5_warnx(context, + "Got a signal, updating slaves %lu to %lu", + (unsigned long)old_version, + (unsigned long)current_version); for (p = slaves; p != NULL; p = p->next) { if (p->flags & SLAVE_F_DEAD) continue; - send_diffs (server_context, p, log_fd, database, - current_version, current_tstamp); + send_diffs(server_context, p, log_fd, database, + current_version); } } else { - krb5_warnx(context, - "Got a signal, but no update in log version %lu", - (unsigned long)current_version); + if (verbose) + krb5_warnx(context, + "Got a signal, but no update in log version %lu", + (unsigned long)current_version); } } + for (p = slaves; p != NULL; p = p->next) { + if (!(p->flags & SLAVE_F_DEAD) && + FD_ISSET(p->fd, &writeset) && + ((have_tail(p) && send_tail(context, p) == 0) || + (!have_tail(p) && more_diffs(p)))) { + send_diffs(server_context, p, log_fd, database, + current_version); + } + } + for(p = slaves; p != NULL; p = p->next) { if (p->flags & SLAVE_F_DEAD) continue; if (ret && FD_ISSET(p->fd, &readset)) { --ret; assert(ret >= 0); - if(process_msg (server_context, p, log_fd, database, - current_version, current_tstamp)) + ret = process_msg(server_context, p, log_fd, database, + current_version); + if (ret && ret != EWOULDBLOCK) slave_dead(context, p); } else if (slave_gone_p (p)) slave_dead(context, p); |
