diff options
Diffstat (limited to 'sys/contrib/openzfs/cmd/zed/zed_exec.c')
-rw-r--r-- | sys/contrib/openzfs/cmd/zed/zed_exec.c | 447 |
1 files changed, 447 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/cmd/zed/zed_exec.c b/sys/contrib/openzfs/cmd/zed/zed_exec.c new file mode 100644 index 000000000000..a14af4f20a85 --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed_exec.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * This file is part of the ZFS Event Daemon (ZED). + * + * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). + * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. + * Refer to the OpenZFS git commit log for authoritative copyright attribution. + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License Version 1.0 (CDDL-1.0). + * You can obtain a copy of the license from the top-level file + * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. + * You may not use this file except in compliance with the license. + */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> +#include <sys/avl.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> +#include <signal.h> + +#include "zed_exec.h" +#include "zed_log.h" +#include "zed_strings.h" + +#define ZEVENT_FILENO 3 + +struct launched_process_node { + avl_node_t node; + pid_t pid; + uint64_t eid; + char *name; +}; + +static int +_launched_process_node_compare(const void *x1, const void *x2) +{ + pid_t p1; + pid_t p2; + + assert(x1 != NULL); + assert(x2 != NULL); + + p1 = ((const struct launched_process_node *) x1)->pid; + p2 = ((const struct launched_process_node *) x2)->pid; + + if (p1 < p2) + return (-1); + else if (p1 == p2) + return (0); + else + return (1); +} + +static pthread_t _reap_children_tid = (pthread_t)-1; +static volatile boolean_t _reap_children_stop; +static avl_tree_t _launched_processes; +static pthread_mutex_t _launched_processes_lock = PTHREAD_MUTEX_INITIALIZER; +static int16_t _launched_processes_limit; + +/* + * Create an environment string array for passing to execve() using the + * NAME=VALUE strings in container [zsp]. + * Return a newly-allocated environment, or NULL on error. + */ +static char ** +_zed_exec_create_env(zed_strings_t *zsp) +{ + int num_ptrs; + int buflen; + char *buf; + char **pp; + char *p; + const char *q; + int i; + int len; + + num_ptrs = zed_strings_count(zsp) + 1; + buflen = num_ptrs * sizeof (char *); + for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp)) + buflen += strlen(q) + 1; + + buf = calloc(1, buflen); + if (!buf) + return (NULL); + + pp = (char **)buf; + p = buf + (num_ptrs * sizeof (char *)); + i = 0; + for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp)) { + pp[i] = p; + len = strlen(q) + 1; + memcpy(p, q, len); + p += len; + i++; + } + pp[i] = NULL; + assert(buf + buflen == p); + return ((char **)buf); +} + +/* + * Fork a child process to handle event [eid]. The program [prog] + * in directory [dir] is executed with the environment [env]. + * + * The file descriptor [zfd] is the zevent_fd used to track the + * current cursor location within the zevent nvlist. + */ +static void +_zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, + char *env[], int zfd, boolean_t in_foreground) +{ + char path[PATH_MAX]; + int n; + pid_t pid; + int fd; + struct launched_process_node *node; + sigset_t mask; + struct timespec launch_timeout = + { .tv_sec = 0, .tv_nsec = 200 * 1000 * 1000, }; + + assert(dir != NULL); + assert(prog != NULL); + assert(env != NULL); + assert(zfd >= 0); + + while (__atomic_load_n(&_launched_processes_limit, + __ATOMIC_SEQ_CST) <= 0) + (void) nanosleep(&launch_timeout, NULL); + + n = snprintf(path, sizeof (path), "%s/%s", dir, prog); + if ((n < 0) || (n >= sizeof (path))) { + zed_log_msg(LOG_WARNING, + "Failed to fork \"%s\" for eid=%llu: %s", + prog, eid, strerror(ENAMETOOLONG)); + return; + } + (void) pthread_mutex_lock(&_launched_processes_lock); + pid = fork(); + if (pid < 0) { + (void) pthread_mutex_unlock(&_launched_processes_lock); + zed_log_msg(LOG_WARNING, + "Failed to fork \"%s\" for eid=%llu: %s", + prog, eid, strerror(errno)); + return; + } else if (pid == 0) { + (void) sigemptyset(&mask); + (void) sigprocmask(SIG_SETMASK, &mask, NULL); + + (void) umask(022); + if (in_foreground && /* we're already devnulled if daemonised */ + (fd = open("/dev/null", O_RDWR | O_CLOEXEC)) != -1) { + (void) dup2(fd, STDIN_FILENO); + (void) dup2(fd, STDOUT_FILENO); + (void) dup2(fd, STDERR_FILENO); + } + (void) dup2(zfd, ZEVENT_FILENO); + execle(path, prog, NULL, env); + _exit(127); + } + + /* parent process */ + + node = calloc(1, sizeof (*node)); + if (node) { + node->pid = pid; + node->eid = eid; + node->name = strdup(prog); + if (node->name == NULL) { + perror("strdup"); + exit(EXIT_FAILURE); + } + + avl_add(&_launched_processes, node); + } + (void) pthread_mutex_unlock(&_launched_processes_lock); + + __atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST); + zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d", + prog, eid, pid); +} + +static void +_nop(int sig) +{ + (void) sig; +} + +static void +wait_for_children(boolean_t do_pause, boolean_t wait) +{ + pid_t pid; + struct rusage usage; + int status; + struct launched_process_node node, *pnode; + + for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) { + (void) pthread_mutex_lock(&_launched_processes_lock); + pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage); + if (pid == 0 || pid == (pid_t)-1) { + (void) pthread_mutex_unlock(&_launched_processes_lock); + if ((pid == 0) || (errno == ECHILD)) { + if (do_pause) + pause(); + } else if (errno != EINTR) + zed_log_msg(LOG_WARNING, + "Failed to wait for children: %s", + strerror(errno)); + if (!do_pause) + return; + + } else { + memset(&node, 0, sizeof (node)); + node.pid = pid; + pnode = avl_find(&_launched_processes, &node, NULL); + if (pnode) { + memcpy(&node, pnode, sizeof (node)); + + avl_remove(&_launched_processes, pnode); + free(pnode); + } + (void) pthread_mutex_unlock(&_launched_processes_lock); + __atomic_add_fetch(&_launched_processes_limit, 1, + __ATOMIC_SEQ_CST); + + usage.ru_utime.tv_sec += usage.ru_stime.tv_sec; + usage.ru_utime.tv_usec += usage.ru_stime.tv_usec; + usage.ru_utime.tv_sec += + usage.ru_utime.tv_usec / (1000 * 1000); + usage.ru_utime.tv_usec %= 1000 * 1000; + + if (WIFEXITED(status)) { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d " + "time=%llu.%06us exit=%d", + node.name, node.eid, pid, + (unsigned long long) usage.ru_utime.tv_sec, + (unsigned int) usage.ru_utime.tv_usec, + WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d " + "time=%llu.%06us sig=%d/%s", + node.name, node.eid, pid, + (unsigned long long) usage.ru_utime.tv_sec, + (unsigned int) usage.ru_utime.tv_usec, + WTERMSIG(status), + strsignal(WTERMSIG(status))); + } else { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d " + "time=%llu.%06us status=0x%X", + node.name, node.eid, pid, + (unsigned long long) usage.ru_utime.tv_sec, + (unsigned int) usage.ru_utime.tv_usec, + (unsigned int) status); + } + + free(node.name); + } + } + +} + +static void * +_reap_children(void *arg) +{ + (void) arg; + struct sigaction sa = {}; + + (void) sigfillset(&sa.sa_mask); + (void) sigdelset(&sa.sa_mask, SIGCHLD); + (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL); + + (void) sigemptyset(&sa.sa_mask); + sa.sa_handler = _nop; + sa.sa_flags = SA_NOCLDSTOP; + (void) sigaction(SIGCHLD, &sa, NULL); + + wait_for_children(B_TRUE, B_FALSE); + + return (NULL); +} + +void +zed_exec_fini(void) +{ + struct launched_process_node *node; + void *ck = NULL; + + if (_reap_children_tid == (pthread_t)-1) + return; + + _reap_children_stop = B_TRUE; + (void) pthread_kill(_reap_children_tid, SIGCHLD); + (void) pthread_join(_reap_children_tid, NULL); + + while ((node = avl_destroy_nodes(&_launched_processes, &ck)) != NULL) { + free(node->name); + free(node); + } + avl_destroy(&_launched_processes); + + (void) pthread_mutex_destroy(&_launched_processes_lock); + (void) pthread_mutex_init(&_launched_processes_lock, NULL); + + _reap_children_tid = (pthread_t)-1; +} + +/* + * Check if the zedlet name indicates if it is a synchronous zedlet + * + * Synchronous zedlets have a "-sync-" immediately following the event name in + * their zedlet filename, like: + * + * EVENT_NAME-sync-ZEDLETNAME.sh + * + * For example, if you wanted a synchronous statechange script: + * + * statechange-sync-myzedlet.sh + * + * Synchronous zedlets are guaranteed to be the only zedlet running. No other + * zedlets may run in parallel with a synchronous zedlet. A synchronous + * zedlet will wait for all previously spawned zedlets to finish before running. + * Users should be careful to only use synchronous zedlets when needed, since + * they decrease parallelism. + */ +static boolean_t +zedlet_is_sync(const char *zedlet, const char *event) +{ + const char *sync_str = "-sync-"; + size_t sync_str_len; + size_t zedlet_len; + size_t event_len; + + sync_str_len = strlen(sync_str); + zedlet_len = strlen(zedlet); + event_len = strlen(event); + + if (event_len + sync_str_len >= zedlet_len) + return (B_FALSE); + + if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * Process the event [eid] by synchronously invoking all zedlets with a + * matching class prefix. + * + * Each executable in [zcp->zedlets] from the directory [zcp->zedlet_dir] + * is matched against the event's [class], [subclass], and the "all" class + * (which matches all events). + * Every zedlet with a matching class prefix is invoked. + * The NAME=VALUE strings in [envs] will be passed to the zedlet as + * environment variables. + * + * The file descriptor [zcp->zevent_fd] is the zevent_fd used to track the + * current cursor location within the zevent nvlist. + * + * Return 0 on success, -1 on error. + */ +int +zed_exec_process(uint64_t eid, const char *class, const char *subclass, + struct zed_conf *zcp, zed_strings_t *envs) +{ + const char *class_strings[4]; + const char *allclass = "all"; + const char **csp; + const char *z; + char **e; + int n; + + if (!zcp->zedlet_dir || !zcp->zedlets || !envs || zcp->zevent_fd < 0) + return (-1); + + if (_reap_children_tid == (pthread_t)-1) { + _launched_processes_limit = zcp->max_jobs; + + if (pthread_create(&_reap_children_tid, NULL, + _reap_children, NULL) != 0) + return (-1); + pthread_setname_np(_reap_children_tid, "reap ZEDLETs"); + + avl_create(&_launched_processes, _launched_process_node_compare, + sizeof (struct launched_process_node), + offsetof(struct launched_process_node, node)); + } + + csp = class_strings; + + if (class) + *csp++ = class; + + if (subclass) + *csp++ = subclass; + + if (allclass) + *csp++ = allclass; + + *csp = NULL; + + e = _zed_exec_create_env(envs); + + for (z = zed_strings_first(zcp->zedlets); z; + z = zed_strings_next(zcp->zedlets)) { + for (csp = class_strings; *csp; csp++) { + n = strlen(*csp); + if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) { + boolean_t is_sync = zedlet_is_sync(z, *csp); + + if (is_sync) { + /* + * Wait for previous zedlets to + * finish + */ + wait_for_children(B_FALSE, B_TRUE); + } + + _zed_exec_fork_child(eid, zcp->zedlet_dir, + z, e, zcp->zevent_fd, zcp->do_foreground); + + if (is_sync) { + /* + * Wait for sync zedlet we just launched + * to finish. + */ + wait_for_children(B_FALSE, B_TRUE); + } + } + } + } + free(e); + return (0); +} |