aboutsummaryrefslogtreecommitdiff
path: root/lib/kadm5/ipropd_master.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/kadm5/ipropd_master.c')
-rw-r--r--lib/kadm5/ipropd_master.c960
1 files changed, 716 insertions, 244 deletions
diff --git a/lib/kadm5/ipropd_master.c b/lib/kadm5/ipropd_master.c
index d0fdc8e26f10..3d4218f03a78 100644
--- a/lib/kadm5/ipropd_master.c
+++ b/lib/kadm5/ipropd_master.c
@@ -121,6 +121,7 @@ make_listen_socket (krb5_context context, const char *port_str)
return fd;
}
+
struct slave {
krb5_socket_t fd;
struct sockaddr_in addr;
@@ -128,10 +129,45 @@ struct slave {
krb5_auth_context ac;
uint32_t version;
uint32_t version_tstamp;
+ uint32_t version_ack;
time_t seen;
unsigned long flags;
#define SLAVE_F_DEAD 0x1
#define SLAVE_F_AYT 0x2
+#define SLAVE_F_READY 0x4
+ /*
+ * We'll use non-blocking I/O so no slave can hold us back.
+ *
+ * We call the state left over from a partial write a "tail".
+ *
+ * The krb5_data holding an KRB-PRIV will be the write buffer.
+ */
+ struct {
+ /* Every message we send is a KRB-PRIV with a 4-byte length prefixed */
+ uint8_t header_buf[4];
+ krb5_data header;
+ krb5_data packet;
+ size_t packet_off;
+ /* For send_complete() we need an sp as part of the tail */
+ krb5_storage *dump;
+ uint32_t vno;
+ } tail;
+ struct {
+ uint8_t header_buf[4];
+ krb5_data packet;
+ size_t offset;
+ int hlen;
+ } input;
+ /*
+ * Continuation for fair diff sending we send N entries at a time.
+ */
+ struct {
+ off_t off_next_version; /* offset in log of next diff */
+ uint32_t initial_version; /* at time of previous diff */
+ uint32_t initial_tstamp; /* at time of previous diff */
+ uint32_t last_version_sent;
+ int more; /* need to send more diffs */
+ } next_diff;
struct slave *next;
};
@@ -220,6 +256,11 @@ remove_slave (krb5_context context, slave *s, slave **root)
if (s->ac)
krb5_auth_con_free (context, s->ac);
+ /* Free any pending input/output state */
+ krb5_data_free(&s->input.packet);
+ krb5_data_free(&s->tail.packet);
+ krb5_storage_free(s->tail.dump);
+
for (p = root; *p; p = &(*p)->next)
if (*p == s) {
*p = s->next;
@@ -239,13 +280,17 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root,
krb5_ticket *ticket = NULL;
char hostname[128];
- s = malloc(sizeof(*s));
+ s = calloc(1, sizeof(*s));
if (s == NULL) {
krb5_warnx (context, "add_slave: no memory");
return;
}
s->name = NULL;
s->ac = NULL;
+ s->input.packet.data = NULL;
+ s->tail.header.data = NULL;
+ s->tail.packet.data = NULL;
+ s->tail.dump = NULL;
addr_len = sizeof(s->addr);
s->fd = accept (fd, (struct sockaddr *)&s->addr, &addr_len);
@@ -253,6 +298,7 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root,
krb5_warn (context, rk_SOCK_ERRNO, "accept");
goto error;
}
+
if (master_hostname)
strlcpy(hostname, master_hostname, sizeof(hostname));
else
@@ -267,6 +313,35 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root,
ret = krb5_recvauth (context, &s->ac, &s->fd,
IPROP_VERSION, server, 0, keytab, &ticket);
+
+ /*
+ * We'll be doing non-blocking I/O only after authentication. We don't
+ * want to get stuck talking to any one slave.
+ *
+ * If we get a partial write, we'll finish writing when the socket becomes
+ * writable.
+ *
+ * Partial reads will be treated as EOF, causing the slave to be marked
+ * dead.
+ *
+ * To do non-blocking I/O for authentication we'll have to implement our
+ * own krb5_recvauth().
+ */
+ socket_set_nonblocking(s->fd, 1);
+
+ /*
+ * We write message lengths separately from the payload, and may do
+ * back-to-back small writes when flushing pending input and then a new
+ * update. Avoid Nagle delays.
+ */
+#if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
+ {
+ int nodelay = 1;
+ (void) setsockopt(s->fd, IPPROTO_TCP, TCP_NODELAY,
+ (void *)&nodelay, sizeof(nodelay));
+ }
+#endif
+
krb5_free_principal (context, server);
if (ret) {
krb5_warn (context, ret, "krb5_recvauth");
@@ -304,6 +379,7 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root,
krb5_warnx (context, "connection from %s", s->name);
s->version = 0;
+ s->version_ack = 0;
s->flags = 0;
slave_seen(s);
s->next = *root;
@@ -372,6 +448,8 @@ write_dump (krb5_context context, krb5_storage *dump,
*/
ret = krb5_store_uint32(dump, 0);
+ if (ret)
+ return ret;
ret = hdb_create (context, &db, database);
if (ret)
@@ -452,14 +530,146 @@ write_dump (krb5_context context, krb5_storage *dump,
}
static int
-send_complete (krb5_context context, slave *s, const char *database,
- uint32_t current_version, uint32_t oldest_version,
- uint32_t initial_log_tstamp)
+mk_priv_tail(krb5_context context, slave *s, krb5_data *data)
+{
+ uint32_t len;
+ int ret;
+
+ ret = krb5_mk_priv(context, s->ac, data, &s->tail.packet, NULL);
+ if (ret)
+ return ret;
+
+ len = s->tail.packet.length;
+ _krb5_put_int(s->tail.header_buf, len, sizeof(s->tail.header_buf));
+ s->tail.header.length = sizeof(s->tail.header_buf);
+ s->tail.header.data = s->tail.header_buf;
+ return 0;
+}
+
+static int
+have_tail(slave *s)
+{
+ return s->tail.header.length || s->tail.packet.length || s->tail.dump;
+}
+
+static int
+more_diffs(slave *s)
+{
+ return s->next_diff.more;
+}
+
+#define SEND_COMPLETE_MAX_RECORDS 50
+#define SEND_DIFFS_MAX_RECORDS 50
+
+static int
+send_tail(krb5_context context, slave *s)
+{
+ krb5_data data;
+ ssize_t bytes = 0;
+ size_t rem = 0;
+ size_t n;
+ int ret;
+
+ if (! have_tail(s))
+ return 0;
+
+ /*
+ * For the case where we're continuing a send_complete() send up to
+ * SEND_COMPLETE_MAX_RECORDS records now, and the rest asynchronously
+ * later. This ensures that sending a complete dump to a slow-to-drain
+ * client does not prevent others from getting serviced.
+ */
+ for (n = 0; n < SEND_COMPLETE_MAX_RECORDS; n++) {
+ if (! have_tail(s))
+ return 0;
+
+ if (s->tail.header.length) {
+ bytes = krb5_net_write(context, &s->fd,
+ s->tail.header.data,
+ s->tail.header.length);
+ if (bytes < 0)
+ goto err;
+
+ s->tail.header.length -= bytes;
+ s->tail.header.data = (char *)s->tail.header.data + bytes;
+ rem = s->tail.header.length;
+ if (rem)
+ goto ewouldblock;
+ }
+
+ if (s->tail.packet.length) {
+ bytes = krb5_net_write(context, &s->fd,
+ (char *)s->tail.packet.data + s->tail.packet_off,
+ s->tail.packet.length - s->tail.packet_off);
+ if (bytes < 0)
+ goto err;
+ s->tail.packet_off += bytes;
+ if (bytes)
+ slave_seen(s);
+ rem = s->tail.packet.length - s->tail.packet_off;
+ if (rem)
+ goto ewouldblock;
+
+ krb5_data_free(&s->tail.packet);
+ s->tail.packet_off = 0;
+ }
+
+ if (s->tail.dump == NULL)
+ return 0;
+
+ /*
+ * We're in the middle of a send_complete() that was interrupted by
+ * EWOULDBLOCK. Continue the sending of the dump.
+ */
+ ret = krb5_ret_data(s->tail.dump, &data);
+ if (ret == HEIM_ERR_EOF) {
+ krb5_storage_free(s->tail.dump);
+ s->tail.dump = NULL;
+ s->version = s->tail.vno;
+ return 0;
+ }
+
+ if (ret) {
+ krb5_warn(context, ret, "failed to read entry from dump!");
+ } else {
+ ret = mk_priv_tail(context, s, &data);
+ krb5_data_free(&data);
+ if (ret == 0)
+ continue;
+ krb5_warn(context, ret, "failed to make and send a KRB-PRIV to %s",
+ s->name);
+ }
+
+ slave_dead(context, s);
+ return ret;
+ }
+
+ if (ret == 0 && s->tail.dump != NULL)
+ return EWOULDBLOCK;
+
+err:
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
+ krb5_warn(context, ret = errno,
+ "error sending diffs to now-dead slave %s", s->name);
+ slave_dead(context, s);
+ return ret;
+ }
+
+ewouldblock:
+ if (verbose)
+ krb5_warnx(context, "would block writing %llu bytes to slave %s",
+ (unsigned long long)rem, s->name);
+ return EWOULDBLOCK;
+}
+
+static int
+send_complete(krb5_context context, slave *s, const char *database,
+ uint32_t current_version, uint32_t oldest_version,
+ uint32_t initial_log_tstamp)
{
krb5_error_code ret;
krb5_storage *dump = NULL;
uint32_t vno = 0;
- krb5_data data;
int fd = -1;
struct stat st;
char *dfn;
@@ -517,7 +727,6 @@ send_complete (krb5_context context, slave *s, const char *database,
* If the current dump has an appropriate version, then we can
* break out of the loop and send the file below.
*/
-
if (ret == 0 && vno != 0 && st.st_mtime > initial_log_tstamp &&
vno >= oldest_version && vno <= current_version)
break;
@@ -583,38 +792,18 @@ send_complete (krb5_context context, slave *s, const char *database,
/*
* Leaving the above loop, dump should have a ptr right after the initial
* 4 byte DB version number and we should have a shared lock on the file
- * (which we may have just created), so we are reading to simply blast
+ * (which we may have just created), so we are reading to start sending
* the data down the wire.
+ *
+ * Note: (krb5_storage_from_fd() dup()'s the fd)
*/
- for (;;) {
- ret = krb5_ret_data(dump, &data);
- if (ret == HEIM_ERR_EOF) {
- ret = 0; /* EOF is not an error, it's success */
- goto done;
- }
-
- if (ret) {
- krb5_warn(context, ret, "krb5_ret_data(dump, &data)");
- slave_dead(context, s);
- goto done;
- }
-
- ret = krb5_write_priv_message(context, s->ac, &s->fd, &data);
- krb5_data_free(&data);
-
- if (ret) {
- krb5_warn (context, ret, "krb5_write_priv_message");
- slave_dead(context, s);
- goto done;
- }
- }
+ s->tail.dump = dump;
+ s->tail.vno = vno;
+ dump = NULL;
+ ret = send_tail(context, s);
done:
- if (!ret) {
- s->version = vno;
- slave_seen(s);
- }
if (fd != -1)
close(fd);
if (dump)
@@ -633,6 +822,14 @@ send_are_you_there (krb5_context context, slave *s)
if (s->flags & (SLAVE_F_DEAD|SLAVE_F_AYT))
return 0;
+ /*
+ * Write any remainder of previous write, if we can. If we'd block we'll
+ * return EWOULDBLOCK.
+ */
+ ret = send_tail(context, s);
+ if (ret)
+ return ret;
+
krb5_warnx(context, "slave %s missing, sending AYT", s->name);
s->flags |= SLAVE_F_AYT;
@@ -644,232 +841,465 @@ send_are_you_there (krb5_context context, slave *s)
if (sp == NULL) {
krb5_warnx (context, "are_you_there: krb5_data_alloc");
slave_dead(context, s);
- return 1;
+ return ENOMEM;
}
ret = krb5_store_uint32(sp, ARE_YOU_THERE);
krb5_storage_free (sp);
- if (ret == 0) {
- ret = krb5_write_priv_message(context, s->ac, &s->fd, &data);
+ if (ret == 0)
+ ret = mk_priv_tail(context, s, &data);
+ if (ret == 0)
+ ret = send_tail(context, s);
+ if (ret && ret != EWOULDBLOCK) {
+ krb5_warn(context, ret, "are_you_there");
+ slave_dead(context, s);
+ }
+ return ret;
+}
- if (ret) {
- krb5_warn(context, ret, "are_you_there: krb5_write_priv_message");
- slave_dead(context, s);
- return 1;
- }
+static int
+diffready(krb5_context context, slave *s)
+{
+ /*
+ * Don't send any diffs until slave has sent an I_HAVE telling us the
+ * initial version number!
+ */
+ if ((s->flags & SLAVE_F_READY) == 0)
+ return 0;
+
+ if (s->flags & SLAVE_F_DEAD) {
+ if (verbose)
+ krb5_warnx(context, "not sending diffs to dead slave %s", s->name);
+ return 0;
}
- return 0;
+ /* Write any remainder of previous write, if we can. */
+ if (send_tail(context, s) != 0)
+ return 0;
+
+ return 1;
}
static int
-send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
- const char *database, uint32_t current_version,
- uint32_t current_tstamp)
+nodiffs(krb5_context context, slave *s, uint32_t current_version)
{
- krb5_context context = server_context->context;
krb5_storage *sp;
- uint32_t ver, initial_version, initial_version2;
- uint32_t initial_tstamp, initial_tstamp2;
- enum kadm_ops op;
- uint32_t len;
- off_t right, left;
- krb5_ssize_t bytes;
krb5_data data;
- int ret = 0;
+ int ret;
- if (s->flags & SLAVE_F_DEAD) {
- krb5_warnx(context, "not sending diffs to dead slave %s", s->name);
+ if (s->version < current_version)
return 0;
- }
- if (s->version == current_version) {
- char buf[4];
-
- sp = krb5_storage_from_mem(buf, 4);
- if (sp == NULL)
- krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_mem");
- ret = krb5_store_uint32(sp, YOU_HAVE_LAST_VERSION);
- krb5_storage_free(sp);
- data.data = buf;
- data.length = 4;
- if (ret == 0) {
- ret = krb5_write_priv_message(context, s->ac, &s->fd, &data);
- if (ret) {
- krb5_warn(context, ret, "send_diffs: failed to send to slave");
- slave_dead(context, s);
- }
- krb5_warnx(context, "slave %s in sync already at version %ld",
- s->name, (long)s->version);
- }
- return ret;
- }
+ /*
+ * If we had sent a partial diff, and now they're caught up, then there's
+ * no more.
+ */
+ s->next_diff.more = 0;
if (verbose)
- krb5_warnx(context, "sending diffs to live-seeming slave %s", s->name);
+ krb5_warnx(context, "slave %s version %ld already sent", s->name,
+ (long)s->version);
+ sp = krb5_storage_emem();
+ if (sp == NULL)
+ krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_mem");
+
+ ret = krb5_store_uint32(sp, YOU_HAVE_LAST_VERSION);
+ if (ret == 0) {
+ krb5_data_zero(&data);
+ ret = krb5_storage_to_data(sp, &data);
+ }
+ krb5_storage_free(sp);
+ if (ret == 0) {
+ ret = mk_priv_tail(context, s, &data);
+ krb5_data_free(&data);
+ }
+ if (ret == 0)
+ send_tail(context, s);
+
+ return 1;
+}
+
+/*
+ * Lock the log and return initial version and timestamp
+ */
+static int
+get_first(kadm5_server_context *server_context, int log_fd,
+ uint32_t *initial_verp, uint32_t *initial_timep)
+{
+ krb5_context context = server_context->context;
+ int ret;
/*
- * XXX The code that makes the diffs should be made a separate function,
- * then error handling (send_are_you_there() or slave_dead()) can be done
- * here.
+ * We don't want to perform tight retry loops on log access errors, so on
+ * error mark the slave dead. The slave reconnect after a delay...
*/
-
if (flock(log_fd, LOCK_SH) == -1) {
krb5_warn(context, errno, "could not obtain shared lock on log file");
- send_are_you_there(context, s);
- return errno;
+ return -1;
}
+
ret = kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_FIRST,
- &initial_version, &initial_tstamp);
- sp = kadm5_log_goto_end(server_context, log_fd);
- flock(log_fd, LOCK_UN);
- if (ret) {
- if (sp != NULL)
- krb5_storage_free(sp);
- krb5_warn(context, ret, "send_diffs: failed to read log");
- send_are_you_there(context, s);
- return ret;
- }
- if (sp == NULL) {
- send_are_you_there(context, s);
- krb5_warn(context, errno ? errno : EINVAL,
- "send_diffs: failed to read log");
- return errno ? errno : EINVAL;
- }
- /*
- * We're not holding any locks here, so we can't prevent truncations.
- *
- * We protect against this by re-checking that the initial version and
- * timestamp are the same before and after this loop.
- */
- right = krb5_storage_seek(sp, 0, SEEK_CUR);
- if (right == (off_t)-1) {
- krb5_storage_free(sp);
- send_are_you_there(context, s);
- return errno;
+ initial_verp, initial_timep);
+ if (ret != 0) {
+ flock(log_fd, LOCK_UN);
+ krb5_warnx(context, "could not read initial log entry");
+ return -1;
}
+
+ return 0;
+}
+
+/*-
+ * Find the left end of the diffs in the log we want to send.
+ *
+ * - On success, return a positive offset to the first new entry, retaining
+ * a read lock on the log file.
+ * - On error, return a negative offset, with the lock released.
+ * - If we simply find no successor entry in the log, return zero
+ * with the lock released, which indicates that fallback to send_complete()
+ * is needed.
+ */
+static off_t
+get_left(kadm5_server_context *server_context, slave *s, krb5_storage *sp,
+ int log_fd, uint32_t current_version,
+ uint32_t *initial_verp, uint32_t *initial_timep)
+{
+ krb5_context context = server_context->context;
+ off_t pos;
+ off_t left;
+ int ret;
+
for (;;) {
- ret = kadm5_log_previous (context, sp, &ver, NULL, &op, &len);
- if (ret)
- krb5_err(context, IPROPD_RESTART, ret,
- "send_diffs: failed to find previous entry");
- left = krb5_storage_seek(sp, -16, SEEK_CUR);
- if (left == (off_t)-1) {
- krb5_storage_free(sp);
- send_are_you_there(context, s);
- return errno;
+ uint32_t ver = s->version;
+
+ /* This acquires a read lock on success */
+ ret = get_first(server_context, log_fd,
+ initial_verp, initial_timep);
+ if (ret != 0)
+ return -1;
+
+ /* When the slave version is out of range, send the whole database. */
+ if (ver == 0 || ver < *initial_verp || ver > current_version) {
+ flock(log_fd, LOCK_UN);
+ return 0;
}
- if (ver == s->version + 1)
- break;
+
+ /* Avoid seeking past the last committed record */
+ if (kadm5_log_goto_end(server_context, sp) != 0 ||
+ (pos = krb5_storage_seek(sp, 0, SEEK_CUR)) < 0)
+ goto err;
/*
- * We don't expect to reach the slave's version, except when it is
- * starting empty with the uber record.
+ * First try to see if we can find it quickly by seeking to the right
+ * end of the previous diff sent.
*/
- if (ver == s->version && !(ver == 0 && op == kadm_nop)) {
+ if (s->next_diff.last_version_sent > 0 &&
+ s->next_diff.off_next_version > 0 &&
+ s->next_diff.off_next_version < pos &&
+ s->next_diff.initial_version == *initial_verp &&
+ s->next_diff.initial_tstamp == *initial_timep) {
/*
- * This shouldn't happen, but recall we're not holding a lock on
- * the log.
+ * Sanity check that the left version matches what we wanted, the
+ * log may have been truncated since.
*/
- krb5_storage_free(sp);
- krb5_warnx(context, "iprop log truncated while sending diffs to "
- "slave?? ver = %lu", (unsigned long)ver);
- send_are_you_there(context, s);
- return 0;
+ left = s->next_diff.off_next_version;
+ if (krb5_storage_seek(sp, left, SEEK_SET) != left)
+ goto err;
+ if (kadm5_log_next(context, sp, &ver, NULL, NULL, NULL) == 0 &&
+ ver == s->next_diff.last_version_sent + 1)
+ return left;
}
- /* If we've reached the uber record, send the complete database */
- if (left == 0 || (ver == 0 && op == kadm_nop)) {
- krb5_storage_free(sp);
- krb5_warnx(context,
- "slave %s (version %lu) out of sync with master "
- "(first version in log %lu), sending complete database",
- s->name, (unsigned long)s->version, (unsigned long)ver);
- return send_complete (context, s, database, current_version, ver,
- initial_tstamp);
- }
+ if (krb5_storage_seek(sp, pos, SEEK_SET) != pos)
+ goto err;
+
+ /*
+ * Drop the lock and try to find the left entry by seeking backward
+ * from the end of the end of the log. If we succeed, re-acquire the
+ * lock, update "next_diff", and retry the fast-path.
+ */
+ flock(log_fd, LOCK_UN);
+
+ /* Slow path: seek backwards, entry by entry, from the end */
+ for (;;) {
+ enum kadm_ops op;
+ uint32_t len;
+
+ ret = kadm5_log_previous(context, sp, &ver, NULL, &op, &len);
+ if (ret)
+ return -1;
+ left = krb5_storage_seek(sp, -16, SEEK_CUR);
+ if (left < 0)
+ return left;
+ if (ver == s->version + 1)
+ break;
+
+ /*
+ * We don't expect to reach the slave's version, unless the log
+ * has been modified after we released the lock.
+ */
+ if (ver == s->version) {
+ krb5_warnx(context, "iprop log truncated while sending diffs "
+ "to slave?? ver = %lu", (unsigned long)ver);
+ return -1;
+ }
+
+ /* If we've reached the uber record, send the complete database */
+ if (left == 0 || (ver == 0 && op == kadm_nop))
+ return 0;
+ }
+ assert(ver == s->version + 1);
+
+ /* Set up the fast-path pre-conditions */
+ s->next_diff.last_version_sent = s->version;
+ s->next_diff.off_next_version = left;
+ s->next_diff.initial_version = *initial_verp;
+ s->next_diff.initial_tstamp = *initial_timep;
+
+ /*
+ * If we loop then we're hoping to hit the fast path so we can return a
+ * non-zero, positive left offset with the lock held.
+ *
+ * We just updated the fast path pre-conditions, so unless a log
+ * truncation event happens between the point where we dropped the lock
+ * and the point where we rearcuire it above, we will hit the fast
+ * path.
+ */
}
- assert(ver == s->version + 1);
+ return left;
- krb5_warnx(context,
- "syncing slave %s from version %lu to version %lu",
- s->name, (unsigned long)s->version,
- (unsigned long)current_version);
+ err:
+ flock(log_fd, LOCK_UN);
+ return -1;
+}
- ret = krb5_data_alloc (&data, right - left + 4);
- if (ret) {
- krb5_storage_free(sp);
- krb5_warn (context, ret, "send_diffs: krb5_data_alloc");
- send_are_you_there(context, s);
- return 1;
+static off_t
+get_right(krb5_context context, int log_fd, krb5_storage *sp,
+ int lastver, slave *s, off_t left, uint32_t *verp)
+{
+ int ret = 0;
+ int i = 0;
+ uint32_t ver = s->version;
+ off_t right = krb5_storage_seek(sp, left, SEEK_SET);
+
+ if (right <= 0) {
+ flock(log_fd, LOCK_UN);
+ return -1;
}
- bytes = krb5_storage_read(sp, (char *)data.data + 4, data.length - 4);
- krb5_storage_free(sp);
- if (bytes != data.length - 4) {
- krb5_warnx(context, "iprop log truncated while sending diffs to "
- "slave?? ver = %lu", (unsigned long)ver);
- send_are_you_there(context, s);
- return 1;
+
+ /* The "lastver" bound should preclude us reaching EOF */
+ for (; ret == 0 && i < SEND_DIFFS_MAX_RECORDS && ver < lastver; ++i) {
+ uint32_t logver;
+
+ ret = kadm5_log_next(context, sp, &logver, NULL, NULL, NULL);
+ if (logver != ++ver)
+ ret = KADM5_LOG_CORRUPT;
}
- /*
- * Check that we have the same log initial version and timestamp now as
- * when we dropped the shared lock on the log file! Else we could be
- * sending garbage to the slave.
- */
- if (flock(log_fd, LOCK_SH) == -1) {
- krb5_warn(context, errno, "could not obtain shared lock on log file");
- send_are_you_there(context, s);
- return 1;
+ if (ret == 0)
+ right = krb5_storage_seek(sp, 0, SEEK_CUR);
+ else
+ right = -1;
+ if (right <= 0) {
+ flock(log_fd, LOCK_UN);
+ return -1;
}
- ret = kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_FIRST,
- &initial_version2, &initial_tstamp2);
- flock(log_fd, LOCK_UN);
- if (ret) {
- krb5_warn(context, ret,
- "send_diffs: failed to read log while producing diffs");
- send_are_you_there(context, s);
- return 1;
+ *verp = ver;
+ return right;
+}
+
+static void
+send_diffs(kadm5_server_context *server_context, slave *s, int log_fd,
+ const char *database, uint32_t current_version)
+{
+ krb5_context context = server_context->context;
+ krb5_storage *sp;
+ uint32_t initial_version;
+ uint32_t initial_tstamp;
+ uint32_t ver;
+ off_t left = 0;
+ off_t right = 0;
+ krb5_ssize_t bytes;
+ krb5_data data;
+ int ret = 0;
+
+ if (!diffready(context, s) || nodiffs(context, s, current_version))
+ return;
+
+ if (verbose)
+ krb5_warnx(context, "sending diffs to live-seeming slave %s", s->name);
+
+ sp = krb5_storage_from_fd(log_fd);
+ if (sp == NULL)
+ krb5_err(context, IPROPD_RESTART_SLOW, ENOMEM,
+ "send_diffs: out of memory");
+
+ left = get_left(server_context, s, sp, log_fd, current_version,
+ &initial_version, &initial_tstamp);
+ if (left < 0) {
+ krb5_storage_free(sp);
+ slave_dead(context, s);
+ return;
}
- if (initial_version != initial_version2 ||
- initial_tstamp != initial_tstamp2) {
- krb5_warn(context, ret,
- "send_diffs: log truncated while producing diffs");
- send_are_you_there(context, s);
- return 1;
+
+ if (left == 0) {
+ /* Slave's version is not in the log, fall back on send_complete() */
+ krb5_storage_free(sp);
+ send_complete(context, s, database, current_version,
+ initial_version, initial_tstamp);
+ return;
}
- sp = krb5_storage_from_data (&data);
+ /* We still hold the read lock, if right > 0 */
+ right = get_right(server_context->context, log_fd, sp, current_version,
+ s, left, &ver);
+ if (right == left) {
+ flock(log_fd, LOCK_UN);
+ krb5_storage_free(sp);
+ return;
+ }
+ if (right < left) {
+ assert(right < 0);
+ krb5_storage_free(sp);
+ slave_dead(context, s);
+ return;
+ }
+
+ if (krb5_storage_seek(sp, left, SEEK_SET) != left) {
+ ret = errno ? errno : EIO;
+ flock(log_fd, LOCK_UN);
+ krb5_warn(context, ret, "send_diffs: krb5_storage_seek");
+ krb5_storage_free(sp);
+ slave_dead(context, s);
+ return;
+ }
+
+ ret = krb5_data_alloc(&data, right - left + 4);
+ if (ret) {
+ flock(log_fd, LOCK_UN);
+ krb5_warn(context, ret, "send_diffs: krb5_data_alloc");
+ krb5_storage_free(sp);
+ slave_dead(context, s);
+ return;
+ }
+
+ bytes = krb5_storage_read(sp, (char *)data.data + 4, data.length - 4);
+ flock(log_fd, LOCK_UN);
+ krb5_storage_free(sp);
+ if (bytes != data.length - 4)
+ krb5_errx(context, IPROPD_RESTART, "locked log truncated???");
+
+ sp = krb5_storage_from_data(&data);
if (sp == NULL) {
- krb5_warnx (context, "send_diffs: krb5_storage_from_data");
- send_are_you_there(context, s);
- return 1;
+ krb5_err(context, IPROPD_RESTART_SLOW, ENOMEM, "out of memory");
+ krb5_warnx(context, "send_diffs: krb5_storage_from_data");
+ return;
}
- krb5_store_uint32 (sp, FOR_YOU);
+ krb5_store_uint32(sp, FOR_YOU);
krb5_storage_free(sp);
- ret = krb5_write_priv_message(context, s->ac, &s->fd, &data);
+ ret = mk_priv_tail(context, s, &data);
krb5_data_free(&data);
+ if (ret == 0) {
+ /* Save the fast-path continuation */
+ s->next_diff.last_version_sent = ver;
+ s->next_diff.off_next_version = right;
+ s->next_diff.initial_version = initial_version;
+ s->next_diff.initial_tstamp = initial_tstamp;
+ s->next_diff.more = ver < current_version;
+ ret = send_tail(context, s);
+
+ krb5_warnx(context,
+ "syncing slave %s from version %lu to version %lu",
+ s->name, (unsigned long)s->version,
+ (unsigned long)ver);
+ s->version = ver;
+ }
- if (ret) {
- krb5_warn (context, ret, "send_diffs: krb5_write_priv_message");
- slave_dead(context, s);
- return 1;
+ if (ret && ret != EWOULDBLOCK) {
+ krb5_warn(context, ret, "send_diffs: making or sending "
+ "KRB-PRIV message");
+ slave_dead(context, s);
+ return;
}
slave_seen(s);
+ return;
+}
+
+/* Sensible bound on slave message size */
+#define SLAVE_MSG_MAX 65536
- s->version = current_version;
+static int
+fill_input(krb5_context context, slave *s)
+{
+ krb5_error_code ret;
- krb5_warnx(context, "slave %s is now up to date (%u)", s->name, s->version);
+ if (s->input.hlen < 4) {
+ uint8_t *buf = s->input.header_buf + s->input.hlen;
+ size_t len = 4 - s->input.hlen;
+ krb5_ssize_t bytes = krb5_net_read(context, &s->fd, buf, len);
+
+ if (bytes == 0)
+ return HEIM_ERR_EOF;
+ if (bytes < 0) {
+ if (errno == EWOULDBLOCK || errno == EAGAIN)
+ return EWOULDBLOCK;
+ return errno ? errno : EIO;
+ }
+ s->input.hlen += bytes;
+ if (bytes < len)
+ return EWOULDBLOCK;
+
+ buf = s->input.header_buf;
+ len = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+ if (len > SLAVE_MSG_MAX)
+ return EINVAL;
+ ret = krb5_data_alloc(&s->input.packet, len);
+ if (ret != 0)
+ return ret;
+ }
+ if (s->input.offset < s->input.packet.length) {
+ u_char *buf = (u_char *)s->input.packet.data + s->input.offset;
+ size_t len = s->input.packet.length - s->input.offset;
+ krb5_ssize_t bytes = krb5_net_read(context, &s->fd, buf, len);
+
+ if (bytes == 0)
+ return HEIM_ERR_EOF;
+ if (bytes < 0) {
+ if (errno == EWOULDBLOCK || errno == EAGAIN)
+ return EWOULDBLOCK;
+ return errno ? errno : EIO;
+ }
+ s->input.offset += bytes;
+ if (bytes != len)
+ return EWOULDBLOCK;
+ }
return 0;
}
static int
-process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
- const char *database, uint32_t current_version,
- uint32_t current_tstamp)
+read_msg(krb5_context context, slave *s, krb5_data *out)
+{
+ int ret = fill_input(context, s);
+
+ if (ret != 0)
+ return ret;
+
+ ret = krb5_rd_priv(context, s->ac, &s->input.packet, out, NULL);
+
+ /* Prepare for next packet */
+ krb5_data_free(&s->input.packet);
+ s->input.offset = 0;
+ s->input.hlen = 0;
+
+ return ret;
+}
+
+static int
+process_msg(kadm5_server_context *server_context, slave *s, int log_fd,
+ const char *database, uint32_t current_version)
{
krb5_context context = server_context->context;
int ret = 0;
@@ -877,10 +1307,11 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
krb5_storage *sp;
uint32_t tmp;
- ret = krb5_read_priv_message(context, s->ac, &s->fd, &out);
- if(ret) {
- krb5_warn(context, ret, "error reading message from %s", s->name);
- return 1;
+ ret = read_msg(context, s, &out);
+ if (ret) {
+ if (ret != EWOULDBLOCK)
+ krb5_warn(context, ret, "error reading message from %s", s->name);
+ return ret;
}
sp = krb5_storage_from_mem(out.data, out.length);
@@ -901,25 +1332,48 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
krb5_warnx(context, "process_msg: client send too little I_HAVE data");
break;
}
- /* new started slave that have old log */
- if (s->version == 0 && tmp != 0) {
+ /*
+ * XXX Make the slave send the timestamp as well, and try to get it
+ * here, and pass it to send_diffs().
+ */
+ /*
+ * New slave whose version number we've not yet seen. If the version
+ * number is zero, the slave has no data, and we'll send a complete
+ * database (that happens in send_diffs()). Otherwise, we'll record a
+ * non-zero initial version and attempt an incremental update.
+ *
+ * NOTE!: Once the slave is "ready" (its first I_HAVE has conveyed its
+ * initial version), we MUST NOT update s->version to the slave's
+ * I_HAVE version, since we may already have sent later updates, and
+ * MUST NOT send them again, otherwise we can get further and further
+ * out of sync resending larger and larger diffs. The "not yet ready"
+ * is an essential precondition for setting s->version to the value
+ * in the I_HAVE message. This happens only once when the slave
+ * first connects.
+ */
+ if (!(s->flags & SLAVE_F_READY)) {
if (current_version < tmp) {
- krb5_warnx(context, "Slave %s (version %u) have later version "
- "the master (version %u) OUT OF SYNC",
+ krb5_warnx(context, "Slave %s (version %u) has later version "
+ "than the master (version %u) OUT OF SYNC",
s->name, tmp, current_version);
+ /* Force send_complete() */
+ tmp = 0;
}
- if (verbose)
- krb5_warnx(context, "slave %s updated from %u to %u",
- s->name, s->version, tmp);
+ /*
+ * Mark the slave as ready for updates based on incoming signals.
+ * Prior to the initial I_HAVE, we don't know the slave's version
+ * number, and MUST not send it anything, since we'll needlessly
+ * attempt to send the whole database!
+ */
s->version = tmp;
+ s->flags |= SLAVE_F_READY;
+ if (verbose)
+ krb5_warnx(context, "slave %s ready for updates from version %u",
+ s->name, tmp);
}
- if (tmp < s->version) {
- krb5_warnx(context, "Slave %s claims to not have "
- "version we already sent to it", s->name);
- s->version = tmp;
- }
- ret = send_diffs(server_context, s, log_fd, database, current_version,
- current_tstamp);
+ if ((s->version_ack = tmp) < s->version)
+ break;
+ send_diffs(server_context, s, log_fd, database, current_version);
break;
case I_AM_HERE :
if (verbose)
@@ -1035,7 +1489,7 @@ write_stats(krb5_context context, slave *slaves, uint32_t current_version)
} else
rtbl_add_column_entry(tbl, SLAVE_ADDRESS, "<unknown>");
- snprintf(str, sizeof(str), "%u", (unsigned)slaves->version);
+ snprintf(str, sizeof(str), "%u", (unsigned)slaves->version_ack);
rtbl_add_column_entry(tbl, SLAVE_VERSION, str);
if (slaves->flags & SLAVE_F_DEAD)
@@ -1044,7 +1498,10 @@ write_stats(krb5_context context, slave *slaves, uint32_t current_version)
rtbl_add_column_entry(tbl, SLAVE_STATUS, "Up");
ret = krb5_format_time(context, slaves->seen, str, sizeof(str), TRUE);
- rtbl_add_column_entry(tbl, SLAVE_SEEN, str);
+ if (ret)
+ rtbl_add_column_entry(tbl, SLAVE_SEEN, "<error-formatting-time>");
+ else
+ rtbl_add_column_entry(tbl, SLAVE_SEEN, str);
slaves = slaves->next;
}
@@ -1105,7 +1562,6 @@ main(int argc, char **argv)
int log_fd;
slave *slaves = NULL;
uint32_t current_version = 0, old_version = 0;
- uint32_t current_tstamp = 0;
krb5_keytab keytab;
char **files;
int aret;
@@ -1198,7 +1654,7 @@ main(int argc, char **argv)
krb5_err(context, 1, errno, "shared flock %s",
server_context->log_context.log_file);
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
- &current_version, &current_tstamp);
+ &current_version, NULL);
flock(log_fd, LOCK_UN);
signal_fd = make_signal_socket (context);
@@ -1212,7 +1668,7 @@ main(int argc, char **argv)
while (exit_flag == 0){
slave *p;
- fd_set readset;
+ fd_set readset, writeset;
int max_fd = 0;
struct timeval to = {30, 0};
uint32_t vers;
@@ -1225,6 +1681,7 @@ main(int argc, char **argv)
#endif
FD_ZERO(&readset);
+ FD_ZERO(&writeset);
FD_SET(signal_fd, &readset);
max_fd = max(max_fd, signal_fd);
FD_SET(listen_fd, &readset);
@@ -1238,11 +1695,12 @@ main(int argc, char **argv)
if (p->flags & SLAVE_F_DEAD)
continue;
FD_SET(p->fd, &readset);
+ if (have_tail(p) || more_diffs(p))
+ FD_SET(p->fd, &writeset);
max_fd = max(max_fd, p->fd);
}
- ret = select (max_fd + 1,
- &readset, NULL, NULL, &to);
+ ret = select(max_fd + 1, &readset, &writeset, NULL, &to);
if (ret < 0) {
if (errno == EINTR)
continue;
@@ -1260,7 +1718,7 @@ main(int argc, char **argv)
log_fd = open(server_context->log_context.log_file, O_RDONLY, 0);
if (log_fd < 0)
- krb5_err(context, 1, IPROPD_RESTART_SLOW, "open %s",
+ krb5_err(context, IPROPD_RESTART_SLOW, errno, "open %s",
server_context->log_context.log_file);
if (fstat(log_fd, &st) == -1)
@@ -1271,7 +1729,7 @@ main(int argc, char **argv)
krb5_err(context, IPROPD_RESTART, errno, "shared flock %s",
server_context->log_context.log_file);
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
- &current_version, &current_tstamp);
+ &current_version, NULL);
flock(log_fd, LOCK_UN);
}
@@ -1284,19 +1742,20 @@ main(int argc, char **argv)
krb5_err(context, IPROPD_RESTART, errno,
"could not lock log file");
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
- &current_version, &current_tstamp);
+ &current_version, NULL);
flock(log_fd, LOCK_UN);
if (current_version > old_version) {
- krb5_warnx(context,
- "Missed a signal, updating slaves %lu to %lu",
- (unsigned long)old_version,
- (unsigned long)current_version);
+ if (verbose)
+ krb5_warnx(context,
+ "Missed a signal, updating slaves %lu to %lu",
+ (unsigned long)old_version,
+ (unsigned long)current_version);
for (p = slaves; p != NULL; p = p->next) {
if (p->flags & SLAVE_F_DEAD)
continue;
- send_diffs (server_context, p, log_fd, database,
- current_version, current_tstamp);
+ send_diffs(server_context, p, log_fd, database,
+ current_version);
}
old_version = current_version;
}
@@ -1327,7 +1786,7 @@ main(int argc, char **argv)
krb5_err(context, IPROPD_RESTART, errno, "shared flock %s",
server_context->log_context.log_file);
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
- &current_version, &current_tstamp);
+ &current_version, NULL);
flock(log_fd, LOCK_UN);
if (current_version != old_version) {
/*
@@ -1342,31 +1801,44 @@ main(int argc, char **argv)
* breaking backwards compatibility for the protocol or
* adding new messages to it.
*/
- krb5_warnx(context,
- "Got a signal, updating slaves %lu to %lu",
- (unsigned long)old_version,
- (unsigned long)current_version);
+ if (verbose)
+ krb5_warnx(context,
+ "Got a signal, updating slaves %lu to %lu",
+ (unsigned long)old_version,
+ (unsigned long)current_version);
for (p = slaves; p != NULL; p = p->next) {
if (p->flags & SLAVE_F_DEAD)
continue;
- send_diffs (server_context, p, log_fd, database,
- current_version, current_tstamp);
+ send_diffs(server_context, p, log_fd, database,
+ current_version);
}
} else {
- krb5_warnx(context,
- "Got a signal, but no update in log version %lu",
- (unsigned long)current_version);
+ if (verbose)
+ krb5_warnx(context,
+ "Got a signal, but no update in log version %lu",
+ (unsigned long)current_version);
}
}
+ for (p = slaves; p != NULL; p = p->next) {
+ if (!(p->flags & SLAVE_F_DEAD) &&
+ FD_ISSET(p->fd, &writeset) &&
+ ((have_tail(p) && send_tail(context, p) == 0) ||
+ (!have_tail(p) && more_diffs(p)))) {
+ send_diffs(server_context, p, log_fd, database,
+ current_version);
+ }
+ }
+
for(p = slaves; p != NULL; p = p->next) {
if (p->flags & SLAVE_F_DEAD)
continue;
if (ret && FD_ISSET(p->fd, &readset)) {
--ret;
assert(ret >= 0);
- if(process_msg (server_context, p, log_fd, database,
- current_version, current_tstamp))
+ ret = process_msg(server_context, p, log_fd, database,
+ current_version);
+ if (ret && ret != EWOULDBLOCK)
slave_dead(context, p);
} else if (slave_gone_p (p))
slave_dead(context, p);