diff options
| author | Martin Matuska <mm@FreeBSD.org> | 2012-07-01 14:55:35 +0000 |
|---|---|---|
| committer | Martin Matuska <mm@FreeBSD.org> | 2012-07-01 14:55:35 +0000 |
| commit | 5a27a66a81254f7b8eadd92ceac8ff50132e971c (patch) | |
| tree | a7fdb0498552fa601ffec363d7a91a3095209b8e /uts/common/dtrace | |
| parent | d661fdff24a3141883f5058d89a5f044c736ef56 (diff) | |
Notes
Diffstat (limited to 'uts/common/dtrace')
| -rw-r--r-- | uts/common/dtrace/dcpc.c | 1218 | ||||
| -rw-r--r-- | uts/common/dtrace/dtrace.c | 863 | ||||
| -rw-r--r-- | uts/common/dtrace/fasttrap.c | 48 | ||||
| -rw-r--r-- | uts/common/dtrace/lockstat.c | 10 | ||||
| -rw-r--r-- | uts/common/dtrace/profile.c | 34 | ||||
| -rw-r--r-- | uts/common/dtrace/sdt_subr.c | 311 | ||||
| -rw-r--r-- | uts/common/dtrace/systrace.c | 11 |
7 files changed, 2292 insertions, 203 deletions
diff --git a/uts/common/dtrace/dcpc.c b/uts/common/dtrace/dcpc.c new file mode 100644 index 000000000000..8fd96cc24c6c --- /dev/null +++ b/uts/common/dtrace/dcpc.c @@ -0,0 +1,1218 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/errno.h> +#include <sys/cpuvar.h> +#include <sys/stat.h> +#include <sys/modctl.h> +#include <sys/cmn_err.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/ksynch.h> +#include <sys/conf.h> +#include <sys/kmem.h> +#include <sys/kcpc.h> +#include <sys/cap_util.h> +#include <sys/cpc_pcbe.h> +#include <sys/cpc_impl.h> +#include <sys/dtrace_impl.h> + +/* + * DTrace CPU Performance Counter Provider + * --------------------------------------- + * + * The DTrace cpc provider allows DTrace consumers to access the CPU + * performance counter overflow mechanism of a CPU. The configuration + * presented in a probe specification is programmed into the performance + * counter hardware of all available CPUs on a system. Programming the + * hardware causes a counter on each CPU to begin counting events of the + * given type. When the specified number of events have occurred, an overflow + * interrupt will be generated and the probe is fired. + * + * The required configuration for the performance counter is encoded into + * the probe specification and this includes the performance counter event + * name, processor mode, overflow rate and an optional unit mask. + * + * Most processors provide several counters (PICs) which can count all or a + * subset of the events available for a given CPU. However, when overflow + * profiling is being used, not all CPUs can detect which counter generated the + * overflow interrupt. In this case we cannot reliably determine which counter + * overflowed and we therefore only allow such CPUs to configure one event at + * a time. Processors that can determine the counter which overflowed are + * allowed to program as many events at one time as possible (in theory up to + * the number of instrumentation counters supported by that platform). + * Therefore, multiple consumers can enable multiple probes at the same time + * on such platforms. Platforms which cannot determine the source of an + * overflow interrupt are only allowed to program a single event at one time. + * + * The performance counter hardware is made available to consumers on a + * first-come, first-served basis. Only a finite amount of hardware resource + * is available and, while we make every attempt to accomodate requests from + * consumers, we must deny requests when hardware resources have been exhausted. + * A consumer will fail to enable probes when resources are currently in use. + * + * The cpc provider contends for shared hardware resources along with other + * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)). + * Only one such consumer can use the performance counters at any one time and + * counters are made available on a first-come, first-served basis. As with + * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g. + * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP + * counter contexts to be invalidated. + */ + +typedef struct dcpc_probe { + char dcpc_event_name[CPC_MAX_EVENT_LEN]; + int dcpc_flag; /* flags (USER/SYS) */ + uint32_t dcpc_ovfval; /* overflow value */ + int64_t dcpc_umask; /* umask/emask for this event */ + int dcpc_picno; /* pic this event is programmed in */ + int dcpc_enabled; /* probe is actually enabled? */ + int dcpc_disabling; /* probe is currently being disabled */ + dtrace_id_t dcpc_id; /* probeid this request is enabling */ + int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */ +} dcpc_probe_t; + +static dev_info_t *dcpc_devi; +static dtrace_provider_id_t dcpc_pid; +static dcpc_probe_t **dcpc_actv_reqs; +static uint32_t dcpc_enablings = 0; +static int dcpc_ovf_mask = 0; +static int dcpc_mult_ovf_cap = 0; +static int dcpc_mask_type = 0; + +/* + * When the dcpc provider is loaded, dcpc_min_overflow is set to either + * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in + * the dcpc.conf file. Decrease this value to set probes with smaller + * overflow values. Remember that very small values could render a system + * unusable with frequently occurring events. + */ +#define DCPC_MIN_OVF_DEFAULT 5000 +static uint32_t dcpc_min_overflow; + +static int dcpc_aframes = 0; /* override for artificial frame setting */ +#if defined(__x86) +#define DCPC_ARTIFICIAL_FRAMES 8 +#elif defined(__sparc) +#define DCPC_ARTIFICIAL_FRAMES 2 +#endif + +/* + * Called from the platform overflow interrupt handler. 'bitmap' is a mask + * which contains the pic(s) that have overflowed. + */ +static void +dcpc_fire(uint64_t bitmap) +{ + int i; + + /* + * No counter was marked as overflowing. Shout about it and get out. + */ + if ((bitmap & dcpc_ovf_mask) == 0) { + cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n"); + return; + } + + /* + * This is the common case of a processor that doesn't support + * multiple overflow events. Such systems are only allowed a single + * enabling and therefore we just look for the first entry in + * the active request array. + */ + if (!dcpc_mult_ovf_cap) { + for (i = 0; i < cpc_ncounters; i++) { + if (dcpc_actv_reqs[i] != NULL) { + dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, + CPU->cpu_cpcprofile_pc, + CPU->cpu_cpcprofile_upc, 0, 0, 0); + return; + } + } + return; + } + + /* + * This is a processor capable of handling multiple overflow events. + * Iterate over the array of active requests and locate the counters + * that overflowed (note: it is possible for more than one counter to + * have overflowed at the same time). + */ + for (i = 0; i < cpc_ncounters; i++) { + if (dcpc_actv_reqs[i] != NULL && + (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) { + dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, + CPU->cpu_cpcprofile_pc, + CPU->cpu_cpcprofile_upc, 0, 0, 0); + } + } +} + +static void +dcpc_create_probe(dtrace_provider_id_t id, const char *probename, + char *eventname, int64_t umask, uint32_t ovfval, char flag) +{ + dcpc_probe_t *pp; + int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); + + if (dcpc_aframes) + nr_frames = dcpc_aframes; + + if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0) + return; + + pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP); + (void) strncpy(pp->dcpc_event_name, eventname, + sizeof (pp->dcpc_event_name) - 1); + pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0'; + pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT; + pp->dcpc_ovfval = ovfval; + pp->dcpc_umask = umask; + pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; + + pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename, + nr_frames, pp); +} + +/*ARGSUSED*/ +static void +dcpc_provide(void *arg, const dtrace_probedesc_t *desc) +{ + /* + * The format of a probe is: + * + * event_name-mode-{optional_umask}-overflow_rate + * e.g. + * DC_refill_from_system-user-0x1e-50000, or, + * DC_refill_from_system-all-10000 + * + */ + char *str, *end, *p; + int i, flag = 0; + char event[CPC_MAX_EVENT_LEN]; + long umask = -1, val = 0; + size_t evlen, len; + + /* + * The 'cpc' provider offers no probes by default. + */ + if (desc == NULL) + return; + + len = strlen(desc->dtpd_name); + p = str = kmem_alloc(len + 1, KM_SLEEP); + (void) strcpy(str, desc->dtpd_name); + + /* + * We have a poor man's strtok() going on here. Replace any hyphens + * in the the probe name with NULL characters in order to make it + * easy to parse the string with regular string functions. + */ + for (i = 0; i < len; i++) { + if (str[i] == '-') + str[i] = '\0'; + } + + /* + * The first part of the string must be either a platform event + * name or a generic event name. + */ + evlen = strlen(p); + (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1); + event[CPC_MAX_EVENT_LEN - 1] = '\0'; + + /* + * The next part of the name is the mode specification. Valid + * settings are "user", "kernel" or "all". + */ + p += evlen + 1; + + if (strcmp(p, "user") == 0) + flag |= CPC_COUNT_USER; + else if (strcmp(p, "kernel") == 0) + flag |= CPC_COUNT_SYSTEM; + else if (strcmp(p, "all") == 0) + flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM; + else + goto err; + + /* + * Next we either have a mask specification followed by an overflow + * rate or just an overflow rate on its own. + */ + p += strlen(p) + 1; + if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + /* + * A unit mask can only be specified if: + * 1) this performance counter back end supports masks. + * 2) the specified event is platform specific. + * 3) a valid hex number is converted. + * 4) no extraneous characters follow the mask specification. + */ + if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 && + ddi_strtol(p, &end, 16, &umask) == 0 && + end == p + strlen(p)) { + p += strlen(p) + 1; + } else { + goto err; + } + } + + /* + * This final part must be an overflow value which has to be greater + * than the minimum permissible overflow rate. + */ + if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) || + val < dcpc_min_overflow) + goto err; + + /* + * Validate the event and create the probe. + */ + for (i = 0; i < cpc_ncounters; i++) { + char *events, *cp, *p, *end; + int found = 0, j; + size_t llen; + + if ((events = kcpc_list_events(i)) == NULL) + goto err; + + llen = strlen(events); + p = cp = ddi_strdup(events, KM_NOSLEEP); + end = cp + llen; + + for (j = 0; j < llen; j++) { + if (cp[j] == ',') + cp[j] = '\0'; + } + + while (p < end && found == 0) { + if (strcmp(p, event) == 0) { + dcpc_create_probe(dcpc_pid, desc->dtpd_name, + event, umask, (uint32_t)val, flag); + found = 1; + } + p += strlen(p) + 1; + } + kmem_free(cp, llen + 1); + + if (found) + break; + } + +err: + kmem_free(str, len + 1); +} + +/*ARGSUSED*/ +static void +dcpc_destroy(void *arg, dtrace_id_t id, void *parg) +{ + dcpc_probe_t *pp = parg; + + ASSERT(pp->dcpc_enabled == 0); + kmem_free(pp, sizeof (dcpc_probe_t)); +} + +/*ARGSUSED*/ +static int +dcpc_mode(void *arg, dtrace_id_t id, void *parg) +{ + if (CPU->cpu_cpcprofile_pc == 0) { + return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_USER); + } else { + return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_KERNEL); + } +} + +static void +dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno) +{ + kcpc_set_t *oset; + int i; + + (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name, + CPC_MAX_EVENT_LEN); + set->ks_req[reqno].kr_config = NULL; + set->ks_req[reqno].kr_index = reqno; + set->ks_req[reqno].kr_picnum = -1; + set->ks_req[reqno].kr_flags = pp->dcpc_flag; + + /* + * If a unit mask has been specified then detect which attribute + * the platform needs. For now, it's either "umask" or "emask". + */ + if (pp->dcpc_umask >= 0) { + set->ks_req[reqno].kr_attr = + kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP); + set->ks_req[reqno].kr_nattrs = 1; + if (dcpc_mask_type & DCPC_UMASK) + (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, + "umask", 5); + else + (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, + "emask", 5); + set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask; + } else { + set->ks_req[reqno].kr_attr = NULL; + set->ks_req[reqno].kr_nattrs = 0; + } + + /* + * If this probe is enabled, obtain its current countdown value + * and use that. The CPUs cpc context might not exist yet if we + * are dealing with a CPU that is just coming online. + */ + if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) { + oset = c->cpu_cpc_ctx->kc_set; + + for (i = 0; i < oset->ks_nreqs; i++) { + if (strcmp(oset->ks_req[i].kr_event, + set->ks_req[reqno].kr_event) == 0) { + set->ks_req[reqno].kr_preset = + *(oset->ks_req[i].kr_data); + } + } + } else { + set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval; + } + + set->ks_nreqs++; +} + + +/* + * Create a fresh request set for the enablings represented in the + * 'dcpc_actv_reqs' array which contains the probes we want to be + * in the set. This can be called for several reasons: + * + * 1) We are on a single or multi overflow platform and we have no + * current events so we can just create the set and initialize it. + * 2) We are on a multi-overflow platform and we already have one or + * more existing events and we are adding a new enabling. Create a + * new set and copy old requests in and then add the new request. + * 3) We are on a multi-overflow platform and we have just removed an + * enabling but we still have enablings whch are valid. Create a new + * set and copy in still valid requests. + */ +static kcpc_set_t * +dcpc_create_set(cpu_t *c) +{ + int i, reqno = 0; + int active_requests = 0; + kcpc_set_t *set; + + /* + * First get a count of the number of currently active requests. + * Note that dcpc_actv_reqs[] should always reflect which requests + * we want to be in the set that is to be created. It is the + * responsibility of the caller of dcpc_create_set() to adjust that + * array accordingly beforehand. + */ + for (i = 0; i < cpc_ncounters; i++) { + if (dcpc_actv_reqs[i] != NULL) + active_requests++; + } + + set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); + + set->ks_req = + kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP); + + set->ks_data = + kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP); + + /* + * Look for valid entries in the active requests array and populate + * the request set for any entries found. + */ + for (i = 0; i < cpc_ncounters; i++) { + if (dcpc_actv_reqs[i] != NULL) { + dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno); + reqno++; + } + } + + return (set); +} + +static int +dcpc_program_cpu_event(cpu_t *c) +{ + int i, j, subcode; + kcpc_ctx_t *ctx, *octx; + kcpc_set_t *set; + + set = dcpc_create_set(c); + + set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP); + ctx->kc_set = set; + ctx->kc_cpuid = c->cpu_id; + + if (kcpc_assign_reqs(set, ctx) != 0) + goto err; + + if (kcpc_configure_reqs(ctx, set, &subcode) != 0) + goto err; + + for (i = 0; i < set->ks_nreqs; i++) { + for (j = 0; j < cpc_ncounters; j++) { + if (dcpc_actv_reqs[j] != NULL && + strcmp(set->ks_req[i].kr_event, + dcpc_actv_reqs[j]->dcpc_event_name) == 0) { + dcpc_actv_reqs[j]->dcpc_picno = + set->ks_req[i].kr_picnum; + } + } + } + + /* + * If we already have an active enabling then save the current cpc + * context away. + */ + octx = c->cpu_cpc_ctx; + + kcpc_cpu_program(c, ctx); + + if (octx != NULL) { + kcpc_set_t *oset = octx->kc_set; + kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t)); + kcpc_free_configs(oset); + kcpc_free_set(oset); + kcpc_ctx_free(octx); + } + + return (0); + +err: + /* + * We failed to configure this request up so free things up and + * get out. + */ + kcpc_free_configs(set); + kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); + kcpc_free_set(set); + kcpc_ctx_free(ctx); + + return (-1); +} + +static void +dcpc_disable_cpu(cpu_t *c) +{ + kcpc_ctx_t *ctx; + kcpc_set_t *set; + + /* + * Leave this CPU alone if it's already offline. + */ + if (c->cpu_flags & CPU_OFFLINE) + return; + + /* + * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and + * changes it. + */ + ctx = c->cpu_cpc_ctx; + + kcpc_cpu_stop(c, B_FALSE); + + set = ctx->kc_set; + + kcpc_free_configs(set); + kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); + kcpc_free_set(set); + kcpc_ctx_free(ctx); +} + +/* + * The dcpc_*_interrupts() routines are responsible for manipulating the + * per-CPU dcpc interrupt state byte. The purpose of the state byte is to + * synchronize processing of hardware overflow interrupts wth configuration + * changes made to the CPU performance counter subsystem by the dcpc provider. + * + * The dcpc provider claims ownership of the overflow interrupt mechanism + * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the + * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the + * overflow mechanism and interrupts may be processed). Before modifying + * a CPUs configuration state the state byte is transitioned from + * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state). + * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process + * an interrupt when a configuration is not in process (i.e. the state is + * marked as free). During interrupt processing the state is set to + * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based + * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate + * the dcpc provider is no longer interested in overflow interrupts. + */ +static void +dcpc_block_interrupts(void) +{ + cpu_t *c = cpu_list; + uint8_t *state; + + ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); + + do { + state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; + + while (atomic_cas_8(state, DCPC_INTR_FREE, + DCPC_INTR_CONFIG) != DCPC_INTR_FREE) + continue; + + } while ((c = c->cpu_next) != cpu_list); +} + +/* + * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that + * overflow interrupts can be processed safely. + */ +static void +dcpc_release_interrupts(void) +{ + cpu_t *c = cpu_list; + + ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); + + do { + cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; + membar_producer(); + } while ((c = c->cpu_next) != cpu_list); +} + +/* + * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to + * to DCPC_INTR_FREE. This indicates that the dcpc provider is now + * responsible for handling all overflow interrupt activity. Should only be + * called before enabling the first dcpc based probe. + */ +static void +dcpc_claim_interrupts(void) +{ + cpu_t *c = cpu_list; + + ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE); + + do { + cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; + membar_producer(); + } while ((c = c->cpu_next) != cpu_list); +} + +/* + * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that + * the dcpc provider is no longer processing overflow interrupts. Only called + * during removal of the last dcpc based enabling. + */ +static void +dcpc_surrender_interrupts(void) +{ + cpu_t *c = cpu_list; + + ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); + + do { + cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE; + membar_producer(); + } while ((c = c->cpu_next) != cpu_list); +} + +/* + * dcpc_program_event() can be called owing to a new enabling or if a multi + * overflow platform has disabled a request but needs to program the requests + * that are still valid. + * + * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t + * and a new request set which contains the new enabling and any old enablings + * which are still valid (possible with multi-overflow platforms). + */ +static int +dcpc_program_event(dcpc_probe_t *pp) +{ + cpu_t *c; + int ret = 0; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + kpreempt_disable(); + + dcpc_block_interrupts(); + + c = cpu_list; + + do { + /* + * Skip CPUs that are currently offline. + */ + if (c->cpu_flags & CPU_OFFLINE) + continue; + + /* + * Stop counters but preserve existing DTrace CPC context + * if there is one. + * + * If we come here when the first event is programmed for a CPU, + * there should be no DTrace CPC context installed. In this + * case, kcpc_cpu_stop() will ensure that there is no other + * context on the CPU. + * + * If we add new enabling to the original one, the CPU should + * have the old DTrace CPC context which we need to keep around + * since dcpc_program_event() will add to it. + */ + if (c->cpu_cpc_ctx != NULL) + kcpc_cpu_stop(c, B_TRUE); + } while ((c = c->cpu_next) != cpu_list); + + dcpc_release_interrupts(); + + /* + * If this enabling is being removed (in the case of a multi event + * capable system with more than one active enabling), we can now + * update the active request array to reflect the enablings that need + * to be reprogrammed. + */ + if (pp->dcpc_disabling == 1) + dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; + + do { + /* + * Skip CPUs that are currently offline. + */ + if (c->cpu_flags & CPU_OFFLINE) + continue; + + ret = dcpc_program_cpu_event(c); + } while ((c = c->cpu_next) != cpu_list && ret == 0); + + /* + * If dcpc_program_cpu_event() fails then it is because we couldn't + * configure the requests in the set for the CPU and not because of + * an error programming the hardware. If we have a failure here then + * we assume no CPUs have been programmed in the above step as they + * are all configured identically. + */ + if (ret != 0) { + pp->dcpc_enabled = 0; + kpreempt_enable(); + return (-1); + } + + if (pp->dcpc_disabling != 1) + pp->dcpc_enabled = 1; + + kpreempt_enable(); + + return (0); +} + +/*ARGSUSED*/ +static int +dcpc_enable(void *arg, dtrace_id_t id, void *parg) +{ + dcpc_probe_t *pp = parg; + int i, found = 0; + cpu_t *c; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Bail out if the counters are being used by a libcpc consumer. + */ + rw_enter(&kcpc_cpuctx_lock, RW_READER); + if (kcpc_cpuctx > 0) { + rw_exit(&kcpc_cpuctx_lock); + return (-1); + } + + dtrace_cpc_in_use++; + rw_exit(&kcpc_cpuctx_lock); + + /* + * Locate this enabling in the first free entry of the active + * request array. + */ + for (i = 0; i < cpc_ncounters; i++) { + if (dcpc_actv_reqs[i] == NULL) { + dcpc_actv_reqs[i] = pp; + pp->dcpc_actv_req_idx = i; + found = 1; + break; + } + } + + /* + * If we couldn't find a slot for this probe then there is no + * room at the inn. + */ + if (!found) { + dtrace_cpc_in_use--; + return (-1); + } + + ASSERT(pp->dcpc_actv_req_idx >= 0); + + /* + * DTrace is taking over CPC contexts, so stop collecting + * capacity/utilization data for all CPUs. + */ + if (dtrace_cpc_in_use == 1) + cu_disable(); + + /* + * The following must hold true if we are to (attempt to) enable + * this request: + * + * 1) No enablings currently exist. We allow all platforms to + * proceed if this is true. + * + * OR + * + * 2) If the platform is multi overflow capable and there are + * less valid enablings than there are counters. There is no + * guarantee that a platform can accommodate as many events as + * it has counters for but we will at least try to program + * up to that many requests. + * + * The 'dcpc_enablings' variable is implictly protected by locking + * provided by the DTrace framework and the cpu management framework. + */ + if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap && + dcpc_enablings < cpc_ncounters)) { + /* + * Before attempting to program the first enabling we need to + * invalidate any lwp-based contexts and lay claim to the + * overflow interrupt mechanism. + */ + if (dcpc_enablings == 0) { + kcpc_invalidate_all(); + dcpc_claim_interrupts(); + } + + if (dcpc_program_event(pp) == 0) { + dcpc_enablings++; + return (0); + } + } + + /* + * If active enablings existed before we failed to enable this probe + * on a multi event capable platform then we need to restart counters + * as they will have been stopped in the attempted configuration. The + * context should now just contain the request prior to this failed + * enabling. + */ + if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) { + c = cpu_list; + + ASSERT(dcpc_mult_ovf_cap == 1); + do { + /* + * Skip CPUs that are currently offline. + */ + if (c->cpu_flags & CPU_OFFLINE) + continue; + + kcpc_cpu_program(c, c->cpu_cpc_ctx); + } while ((c = c->cpu_next) != cpu_list); + } + + /* + * Give up any claim to the overflow interrupt mechanism if no + * dcpc based enablings exist. + */ + if (dcpc_enablings == 0) + dcpc_surrender_interrupts(); + + dtrace_cpc_in_use--; + dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; + pp->dcpc_actv_req_idx = pp->dcpc_picno = -1; + + /* + * If all probes are removed, enable capacity/utilization data + * collection for every CPU. + */ + if (dtrace_cpc_in_use == 0) + cu_enable(); + + return (-1); +} + +/* + * If only one enabling is active then remove the context and free + * everything up. If there are multiple enablings active then remove this + * one, its associated meta-data and re-program the hardware. + */ +/*ARGSUSED*/ +static void +dcpc_disable(void *arg, dtrace_id_t id, void *parg) +{ + cpu_t *c; + dcpc_probe_t *pp = parg; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + kpreempt_disable(); + + /* + * This probe didn't actually make it as far as being fully enabled + * so we needn't do anything with it. + */ + if (pp->dcpc_enabled == 0) { + /* + * If we actually allocated this request a slot in the + * request array but failed to enabled it then remove the + * entry in the array. + */ + if (pp->dcpc_actv_req_idx >= 0) { + dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; + pp->dcpc_actv_req_idx = pp->dcpc_picno = + pp->dcpc_disabling = -1; + } + + kpreempt_enable(); + return; + } + + /* + * If this is the only enabling then stop all the counters and + * free up the meta-data. + */ + if (dcpc_enablings == 1) { + ASSERT(dtrace_cpc_in_use == 1); + + dcpc_block_interrupts(); + + c = cpu_list; + + do { + dcpc_disable_cpu(c); + } while ((c = c->cpu_next) != cpu_list); + + dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; + dcpc_surrender_interrupts(); + } else { + /* + * This platform can support multiple overflow events and + * the enabling being disabled is not the last one. Remove this + * enabling and re-program the hardware with the new config. + */ + ASSERT(dcpc_mult_ovf_cap); + ASSERT(dcpc_enablings > 1); + + pp->dcpc_disabling = 1; + (void) dcpc_program_event(pp); + } + + kpreempt_enable(); + + dcpc_enablings--; + dtrace_cpc_in_use--; + pp->dcpc_enabled = 0; + pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; + + /* + * If all probes are removed, enable capacity/utilization data + * collection for every CPU + */ + if (dtrace_cpc_in_use == 0) + cu_enable(); +} + +/*ARGSUSED*/ +static int +dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg) +{ + cpu_t *c; + uint8_t *state; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + switch (what) { + case CPU_OFF: + /* + * Offline CPUs are not allowed to take part so remove this + * CPU if we are actively tracing. + */ + if (dtrace_cpc_in_use) { + c = cpu_get(cpu); + state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; + + /* + * Indicate that a configuration is in process in + * order to stop overflow interrupts being processed + * on this CPU while we disable it. + */ + while (atomic_cas_8(state, DCPC_INTR_FREE, + DCPC_INTR_CONFIG) != DCPC_INTR_FREE) + continue; + + dcpc_disable_cpu(c); + + /* + * Reset this CPUs interrupt state as the configuration + * has ended. + */ + cpu_core[c->cpu_id].cpuc_dcpc_intr_state = + DCPC_INTR_FREE; + membar_producer(); + } + break; + + case CPU_ON: + case CPU_SETUP: + /* + * This CPU is being initialized or brought online so program + * it with the current request set if we are actively tracing. + */ + if (dtrace_cpc_in_use) { + c = cpu_get(cpu); + (void) dcpc_program_cpu_event(c); + } + break; + + default: + break; + } + + return (0); +} + +static dtrace_pattr_t dcpc_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +}; + +static dtrace_pops_t dcpc_pops = { + dcpc_provide, + NULL, + dcpc_enable, + dcpc_disable, + NULL, + NULL, + NULL, + NULL, + dcpc_mode, + dcpc_destroy +}; + +/*ARGSUSED*/ +static int +dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + return (0); +} + +/*ARGSUSED*/ +static int +dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + int error; + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = (void *)dcpc_devi; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + default: + error = DDI_FAILURE; + } + return (error); +} + +static int +dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + break; + case DDI_SUSPEND: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (dtrace_unregister(dcpc_pid) != 0) + return (DDI_FAILURE); + + ddi_remove_minor_node(devi, NULL); + + mutex_enter(&cpu_lock); + unregister_cpu_setup_func(dcpc_cpu_setup, NULL); + mutex_exit(&cpu_lock); + + kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *)); + + kcpc_unregister_dcpc(); + + return (DDI_SUCCESS); +} + +static int +dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + uint_t caps; + char *attrs; + + switch (cmd) { + case DDI_ATTACH: + break; + case DDI_RESUME: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (kcpc_pcbe_loaded() == -1) + return (DDI_FAILURE); + + caps = kcpc_pcbe_capabilities(); + + if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) { + cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\ + " on this processor"); + return (DDI_FAILURE); + } + + if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0, + DDI_PSEUDO, NULL) == DDI_FAILURE || + dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL, + NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) { + ddi_remove_minor_node(devi, NULL); + return (DDI_FAILURE); + } + + mutex_enter(&cpu_lock); + register_cpu_setup_func(dcpc_cpu_setup, NULL); + mutex_exit(&cpu_lock); + + dcpc_ovf_mask = (1 << cpc_ncounters) - 1; + ASSERT(dcpc_ovf_mask != 0); + + if (caps & CPC_CAP_OVERFLOW_PRECISE) + dcpc_mult_ovf_cap = 1; + + /* + * Determine which, if any, mask attribute the back-end can use. + */ + attrs = kcpc_list_attrs(); + if (strstr(attrs, "umask") != NULL) + dcpc_mask_type |= DCPC_UMASK; + else if (strstr(attrs, "emask") != NULL) + dcpc_mask_type |= DCPC_EMASK; + + /* + * The dcpc_actv_reqs array is used to store the requests that + * we currently have programmed. The order of requests in this + * array is not necessarily the order that the event appears in + * the kcpc_request_t array. Once entered into a slot in the array + * the entry is not moved until it's removed. + */ + dcpc_actv_reqs = + kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP); + + dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi, + DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT); + + kcpc_register_dcpc(dcpc_fire); + + ddi_report_dev(devi); + dcpc_devi = devi; + + return (DDI_SUCCESS); +} + +static struct cb_ops dcpc_cb_ops = { + dcpc_open, /* open */ + nodev, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + nodev, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops dcpc_ops = { + DEVO_REV, /* devo_rev, */ + 0, /* refcnt */ + dcpc_info, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + dcpc_attach, /* attach */ + dcpc_detach, /* detach */ + nodev, /* reset */ + &dcpc_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev, /* dev power */ + ddi_quiesce_not_needed /* quiesce */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* module type */ + "DTrace CPC Module", /* name of module */ + &dcpc_ops, /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index c721386280f8..0c5e4b3a011a 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * DTrace - Dynamic Tracing for Solaris * @@ -121,7 +119,7 @@ dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); size_t dtrace_global_maxsize = (16 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; -dtrace_optval_t dtrace_helper_actions_max = 32; +dtrace_optval_t dtrace_helper_actions_max = 1024; dtrace_optval_t dtrace_helper_providers_max = 32; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; @@ -146,6 +144,7 @@ int dtrace_err_verbose; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; +hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; /* * DTrace External Variables @@ -186,7 +185,9 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ +static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ +static int dtrace_dynvar_failclean; /* dynvars failed to clean */ /* * DTrace Locking @@ -240,10 +241,16 @@ static void dtrace_nullop(void) {} +static int +dtrace_enable_nullop(void) +{ + return (0); +} + static dtrace_pops_t dtrace_provider_ops = { (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, (void (*)(void *, struct modctl *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, @@ -427,6 +434,7 @@ dtrace_load##bits(uintptr_t addr) \ #define DTRACE_DYNHASH_SINK 1 #define DTRACE_DYNHASH_VALID 2 +#define DTRACE_MATCH_FAIL -1 #define DTRACE_MATCH_NEXT 0 #define DTRACE_MATCH_DONE 1 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') @@ -453,11 +461,13 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); static int dtrace_enabling_match(dtrace_enabling_t *, int *); static void dtrace_enabling_matchall(void); +static void dtrace_enabling_reap(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, dtrace_state_t *, uint64_t, uint64_t); static dtrace_helpers_t *dtrace_helpers_create(proc_t *); static void dtrace_buffer_drop(dtrace_buffer_t *); +static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, @@ -1098,10 +1108,13 @@ dtrace_priv_proc_common_nocd() } static int -dtrace_priv_proc_destructive(dtrace_state_t *state) +dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate) { int action = state->dts_cred.dcr_action; + if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) + goto bad; + if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) goto bad; @@ -1123,15 +1136,17 @@ bad: } static int -dtrace_priv_proc_control(dtrace_state_t *state) +dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) { - if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) - return (1); + if (mstate->dtms_access & DTRACE_ACCESS_PROC) { + if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) + return (1); - if (dtrace_priv_proc_common_zone(state) && - dtrace_priv_proc_common_user(state) && - dtrace_priv_proc_common_nocd()) - return (1); + if (dtrace_priv_proc_common_zone(state) && + dtrace_priv_proc_common_user(state) && + dtrace_priv_proc_common_nocd()) + return (1); + } cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1139,9 +1154,10 @@ dtrace_priv_proc_control(dtrace_state_t *state) } static int -dtrace_priv_proc(dtrace_state_t *state) +dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate) { - if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) + if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && + (state->dts_cred.dcr_action & DTRACE_CRA_PROC)) return (1); cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1172,6 +1188,109 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) } /* + * Determine if the dte_cond of the specified ECB allows for processing of + * the current probe to continue. Note that this routine may allow continued + * processing, but with access(es) stripped from the mstate's dtms_access + * field. + */ +static int +dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, + dtrace_ecb_t *ecb) +{ + dtrace_probe_t *probe = ecb->dte_probe; + dtrace_provider_t *prov = probe->dtpr_provider; + dtrace_pops_t *pops = &prov->dtpv_pops; + int mode = DTRACE_MODE_NOPRIV_DROP; + + ASSERT(ecb->dte_cond); + + if (pops->dtps_mode != NULL) { + mode = pops->dtps_mode(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg); + + ASSERT((mode & DTRACE_MODE_USER) || + (mode & DTRACE_MODE_KERNEL)); + ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || + (mode & DTRACE_MODE_NOPRIV_DROP)); + } + + /* + * If the dte_cond bits indicate that this consumer is only allowed to + * see user-mode firings of this probe, call the provider's dtps_mode() + * entry point to check that the probe was fired while in a user + * context. If that's not the case, use the policy specified by the + * provider to determine if we drop the probe or merely restrict + * operation. + */ + if (ecb->dte_cond & DTRACE_COND_USERMODE) { + ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); + + if (!(mode & DTRACE_MODE_USER)) { + if (mode & DTRACE_MODE_NOPRIV_DROP) + return (0); + + mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; + } + } + + /* + * This is more subtle than it looks. We have to be absolutely certain + * that CRED() isn't going to change out from under us so it's only + * legit to examine that structure if we're in constrained situations. + * Currently, the only times we'll this check is if a non-super-user + * has enabled the profile or syscall providers -- providers that + * allow visibility of all processes. For the profile case, the check + * above will ensure that we're examining a user context. + */ + if (ecb->dte_cond & DTRACE_COND_OWNER) { + cred_t *cr; + cred_t *s_cr = state->dts_cred.dcr_cred; + proc_t *proc; + + ASSERT(s_cr != NULL); + + if ((cr = CRED()) == NULL || + s_cr->cr_uid != cr->cr_uid || + s_cr->cr_uid != cr->cr_ruid || + s_cr->cr_uid != cr->cr_suid || + s_cr->cr_gid != cr->cr_gid || + s_cr->cr_gid != cr->cr_rgid || + s_cr->cr_gid != cr->cr_sgid || + (proc = ttoproc(curthread)) == NULL || + (proc->p_flag & SNOCD)) { + if (mode & DTRACE_MODE_NOPRIV_DROP) + return (0); + + mstate->dtms_access &= ~DTRACE_ACCESS_PROC; + } + } + + /* + * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not + * in our zone, check to see if our mode policy is to restrict rather + * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC + * and DTRACE_ACCESS_ARGS + */ + if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { + cred_t *cr; + cred_t *s_cr = state->dts_cred.dcr_cred; + + ASSERT(s_cr != NULL); + + if ((cr = CRED()) == NULL || + s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { + if (mode & DTRACE_MODE_NOPRIV_DROP) + return (0); + + mstate->dtms_access &= + ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); + } + } + + return (1); +} + +/* * Note: not called from probe context. This function is called * asynchronously (and at a regular interval) from outside of probe context to * clean the dirty dynamic variable lists on all CPUs. Dynamic variable @@ -1182,12 +1301,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; dtrace_dstate_percpu_t *dcpu; - int i, work = 0; + dtrace_dynvar_t **rinsep; + int i, j, work = 0; for (i = 0; i < NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; - - ASSERT(dcpu->dtdsc_rinsing == NULL); + rinsep = &dcpu->dtdsc_rinsing; /* * If the dirty list is NULL, there is no dirty work to do. @@ -1195,14 +1314,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) if (dcpu->dtdsc_dirty == NULL) continue; - /* - * If the clean list is non-NULL, then we're not going to do - * any work for this CPU -- it means that there has not been - * a dtrace_dynvar() allocation on this CPU (or from this CPU) - * since the last time we cleaned house. - */ - if (dcpu->dtdsc_clean != NULL) + if (dcpu->dtdsc_rinsing != NULL) { + /* + * If the rinsing list is non-NULL, then it is because + * this CPU was selected to accept another CPU's + * dirty list -- and since that time, dirty buffers + * have accumulated. This is a highly unlikely + * condition, but we choose to ignore the dirty + * buffers -- they'll be picked up a future cleanse. + */ continue; + } + + if (dcpu->dtdsc_clean != NULL) { + /* + * If the clean list is non-NULL, then we're in a + * situation where a CPU has done deallocations (we + * have a non-NULL dirty list) but no allocations (we + * also have a non-NULL clean list). We can't simply + * move the dirty list into the clean list on this + * CPU, yet we also don't want to allow this condition + * to persist, lest a short clean list prevent a + * massive dirty list from being cleaned (which in + * turn could lead to otherwise avoidable dynamic + * drops). To deal with this, we look for some CPU + * with a NULL clean list, NULL dirty list, and NULL + * rinsing list -- and then we borrow this CPU to + * rinse our dirty list. + */ + for (j = 0; j < NCPU; j++) { + dtrace_dstate_percpu_t *rinser; + + rinser = &dstate->dtds_percpu[j]; + + if (rinser->dtdsc_rinsing != NULL) + continue; + + if (rinser->dtdsc_dirty != NULL) + continue; + + if (rinser->dtdsc_clean != NULL) + continue; + + rinsep = &rinser->dtdsc_rinsing; + break; + } + + if (j == NCPU) { + /* + * We were unable to find another CPU that + * could accept this dirty list -- we are + * therefore unable to clean it now. + */ + dtrace_dynvar_failclean++; + continue; + } + } work = 1; @@ -1219,7 +1386,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * on a hash chain, either the dirty list or the * rinsing list for some CPU must be non-NULL.) */ - dcpu->dtdsc_rinsing = dirty; + *rinsep = dirty; dtrace_membar_producer(); } while (dtrace_casptr(&dcpu->dtdsc_dirty, dirty, NULL) != dirty); @@ -1650,7 +1817,7 @@ retry: ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); /* - * Now we'll move the clean list to the free list. + * Now we'll move the clean list to our free list. * It's impossible for this to fail: the only way * the free list can be updated is through this * code path, and only one CPU can own the clean list. @@ -1663,6 +1830,7 @@ retry: * owners of the clean lists out before resetting * the clean lists. */ + dcpu = &dstate->dtds_percpu[me]; rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); ASSERT(rval == NULL); goto retry; @@ -1804,6 +1972,75 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) lquanta[levels + 1] += incr; } +static int +dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, + uint16_t high, uint16_t nsteps, int64_t value) +{ + int64_t this = 1, last, next; + int base = 1, order; + + ASSERT(factor <= nsteps); + ASSERT(nsteps % factor == 0); + + for (order = 0; order < low; order++) + this *= factor; + + /* + * If our value is less than our factor taken to the power of the + * low order of magnitude, it goes into the zeroth bucket. + */ + if (value < (last = this)) + return (0); + + for (this *= factor; order <= high; order++) { + int nbuckets = this > nsteps ? nsteps : this; + + if ((next = this * factor) < this) { + /* + * We should not generally get log/linear quantizations + * with a high magnitude that allows 64-bits to + * overflow, but we nonetheless protect against this + * by explicitly checking for overflow, and clamping + * our value accordingly. + */ + value = this - 1; + } + + if (value < this) { + /* + * If our value lies within this order of magnitude, + * determine its position by taking the offset within + * the order of magnitude, dividing by the bucket + * width, and adding to our (accumulated) base. + */ + return (base + (value - last) / (this / nbuckets)); + } + + base += nbuckets - (nbuckets / factor); + last = this; + this = next; + } + + /* + * Our value is greater than or equal to our factor taken to the + * power of one plus the high magnitude -- return the top bucket. + */ + return (base); +} + +static void +dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) +{ + uint64_t arg = *llquanta++; + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + + llquanta[dtrace_aggregate_llquantize_bucket(factor, + low, high, nsteps, nval)] += incr; +} + /*ARGSUSED*/ static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) @@ -2585,6 +2822,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, switch (v) { case DIF_VAR_ARGS: + if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) { + cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= + CPU_DTRACE_KPRIV; + return (0); + } + ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { @@ -2620,7 +2863,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_UREGS: { klwp_t *lwp; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); if ((lwp = curthread->t_lwp) == NULL) { @@ -2632,6 +2875,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(lwp->lwp_regs, ndx)); } + case DIF_VAR_VMREGS: { + uint64_t rval; + + if (!dtrace_priv_kernel(state)) + return (0); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + + rval = dtrace_getvmreg(ndx, + &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); + + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (rval); + } + case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); @@ -2684,7 +2943,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_stackdepth); case DIF_VAR_USTACKDEPTH: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { /* @@ -2739,7 +2998,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_caller); case DIF_VAR_UCALLER: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { @@ -2787,7 +3046,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_PID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2809,7 +3068,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_pidp->pid_id); case DIF_VAR_PPID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2836,7 +3095,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_tid); case DIF_VAR_EXECNAME: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2856,7 +3115,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2876,7 +3135,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_UID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2897,7 +3156,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_cred->cr_uid); case DIF_VAR_GID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2919,7 +3178,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_ERRNO: { klwp_t *lwp; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -3259,7 +3518,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && + dtrace_priv_proc_control(state, mstate) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyout(kaddr, uaddr, size, flags); @@ -3274,7 +3533,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && + dtrace_priv_proc_control(state, mstate) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyoutstr(kaddr, uaddr, size, flags); @@ -3600,7 +3859,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int64_t index = (int64_t)tupregs[1].dttk_value; int64_t remaining = (int64_t)tupregs[2].dttk_value; size_t len = dtrace_strlen((char *)s, size); - int64_t i = 0; + int64_t i; if (!dtrace_canload(s, len + 1, mstate, vstate)) { regs[rd] = NULL; @@ -3645,7 +3904,54 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - case DIF_SUBR_GETMAJOR: + case DIF_SUBR_TOUPPER: + case DIF_SUBR_TOLOWER: { + uintptr_t s = tupregs[0].dttk_value; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + char *dest = (char *)mstate->dtms_scratch_ptr, c; + size_t len = dtrace_strlen((char *)s, size); + char lower, upper, convert; + int64_t i; + + if (subr == DIF_SUBR_TOUPPER) { + lower = 'a'; + upper = 'z'; + convert = 'A'; + } else { + lower = 'A'; + upper = 'Z'; + convert = 'a'; + } + + if (!dtrace_canload(s, len + 1, mstate, vstate)) { + regs[rd] = NULL; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = NULL; + break; + } + + for (i = 0; i < size - 1; i++) { + if ((c = dtrace_load8(s + i)) == '\0') + break; + + if (c >= lower && c <= upper) + c = convert + (c - lower); + + dest[i] = c; + } + + ASSERT(i < size); + dest[i] = '\0'; + regs[rd] = (uintptr_t)dest; + mstate->dtms_scratch_ptr += size; + break; + } + +case DIF_SUBR_GETMAJOR: #ifdef _LP64 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; #else @@ -3907,9 +4213,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; - int64_t val = i < 0 ? i * -1 : i; - uint64_t size = 22; /* enough room for 2^64 in decimal */ + uint64_t val, digit; + uint64_t size = 65; /* enough room for 2^64 in binary */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } + + val = (base == 10 && i < 0) ? i * -1 : i; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -3917,13 +4234,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - for (*end-- = '\0'; val; val /= 10) - *end-- = '0' + (val % 10); + for (*end-- = '\0'; val; val /= base) { + if ((digit = val % base) <= '9' - '0') { + *end-- = '0' + digit; + } else { + *end-- = 'a' + (digit - ('9' - '0') - 1); + } + } + + if (i == 0 && base == 16) + *end-- = '0'; + + if (base == 16) + *end-- = 'x'; - if (i == 0) + if (i == 0 || base == 8 || base == 16) *end-- = '0'; - if (i < 0) + if (i < 0 && base == 10) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; @@ -5558,6 +5886,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; + uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; @@ -5578,6 +5907,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, #endif mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; + mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC; *flags &= ~CPU_DTRACE_ERROR; if (prov == dtrace_provider) { @@ -5615,65 +5945,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, } } - if (ecb->dte_cond) { - /* - * If the dte_cond bits indicate that this - * consumer is only allowed to see user-mode firings - * of this probe, call the provider's dtps_usermode() - * entry point to check that the probe was fired - * while in a user context. Skip this ECB if that's - * not the case. - */ - if ((ecb->dte_cond & DTRACE_COND_USERMODE) && - prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg) == 0) - continue; - - /* - * This is more subtle than it looks. We have to be - * absolutely certain that CRED() isn't going to - * change out from under us so it's only legit to - * examine that structure if we're in constrained - * situations. Currently, the only times we'll this - * check is if a non-super-user has enabled the - * profile or syscall providers -- providers that - * allow visibility of all processes. For the - * profile case, the check above will ensure that - * we're examining a user context. - */ - if (ecb->dte_cond & DTRACE_COND_OWNER) { - cred_t *cr; - cred_t *s_cr = - ecb->dte_state->dts_cred.dcr_cred; - proc_t *proc; - - ASSERT(s_cr != NULL); - - if ((cr = CRED()) == NULL || - s_cr->cr_uid != cr->cr_uid || - s_cr->cr_uid != cr->cr_ruid || - s_cr->cr_uid != cr->cr_suid || - s_cr->cr_gid != cr->cr_gid || - s_cr->cr_gid != cr->cr_rgid || - s_cr->cr_gid != cr->cr_sgid || - (proc = ttoproc(curthread)) == NULL || - (proc->p_flag & SNOCD)) - continue; - } - - if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { - cred_t *cr; - cred_t *s_cr = - ecb->dte_state->dts_cred.dcr_cred; - - ASSERT(s_cr != NULL); - - if ((cr = CRED()) == NULL || - s_cr->cr_zone->zone_id != - cr->cr_zone->zone_id) - continue; - } - } + if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb)) + continue; if (now - state->dts_alive > dtrace_deadman_timeout) { /* @@ -5713,9 +5986,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, mstate.dtms_present |= DTRACE_MSTATE_EPID; if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) - mstate.dtms_access = DTRACE_ACCESS_KERNEL; - else - mstate.dtms_access = 0; + mstate.dtms_access |= DTRACE_ACCESS_KERNEL; if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; @@ -5775,7 +6046,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, switch (act->dta_kind) { case DTRACEACT_STOP: - if (dtrace_priv_proc_destructive(state)) + if (dtrace_priv_proc_destructive(state, + &mstate)) dtrace_action_stop(); continue; @@ -5802,7 +6074,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_JSTACK: case DTRACEACT_USTACK: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, &mstate)) continue; /* @@ -5835,6 +6107,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, continue; } + /* + * Clear the string space, since there's no + * helper to do it for us. + */ + if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) { + int depth = DTRACE_USTACK_NFRAMES( + rec->dtrd_arg); + size_t strsize = DTRACE_USTACK_STRSIZE( + rec->dtrd_arg); + uint64_t *buf = (uint64_t *)(tomax + + valoffs); + void *strspace = &buf[depth + 1]; + + dtrace_bzero(strspace, + MIN(depth, strsize)); + } + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack((uint64_t *) (tomax + valoffs), @@ -5888,7 +6177,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, continue; case DTRACEACT_RAISE: - if (dtrace_priv_proc_destructive(state)) + if (dtrace_priv_proc_destructive(state, + &mstate)) dtrace_action_raise(val); continue; @@ -5915,6 +6205,11 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_TRACEMEM: + break; + + case DTRACEACT_TRACEMEM_DYNSIZE: + tracememsize = val; break; case DTRACEACT_SYM: @@ -5928,7 +6223,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_UADDR: { struct pid *pid = curthread->t_procp->p_pidp; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, &mstate)) continue; DTRACE_STORE(uint64_t, tomax, @@ -5980,6 +6275,12 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { uintptr_t end = valoffs + size; + if (tracememsize != 0 && + valoffs + tracememsize < end) { + end = valoffs + tracememsize; + tracememsize = 0; + } + if (!dtrace_vcanload((void *)(uintptr_t)val, &dp->dtdo_rtype, &mstate, vstate)) continue; @@ -6655,7 +6956,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, { dtrace_probe_t template, *probe; dtrace_hash_t *hash = NULL; - int len, best = INT_MAX, nmatched = 0; + int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -6667,7 +6968,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - (void) (*matched)(probe, arg); + if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); @@ -6714,8 +7016,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) + if ((rc = (*matched)(probe, arg)) != + DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); break; + } } return (nmatched); @@ -6734,8 +7040,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) + if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); break; + } } return (nmatched); @@ -6852,9 +7161,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, if ((priv & DTRACE_PRIV_KERNEL) && (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && - pops->dtps_usermode == NULL) { + pops->dtps_mode == NULL) { cmn_err(CE_WARN, "failed to register provider '%s': need " - "dtps_usermode() op for given privilege attributes", name); + "dtps_mode() op for given privilege attributes", name); return (EINVAL); } @@ -6951,11 +7260,11 @@ dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; - int i, self = 0; + int i, self = 0, noreap = 0; dtrace_probe_t *probe, *first = NULL; if (old->dtpv_pops.dtps_enable == - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { /* * If DTrace itself is the provider, we're called with locks * already held. @@ -7008,14 +7317,31 @@ dtrace_unregister(dtrace_provider_id_t id) continue; /* - * We have at least one ECB; we can't remove this provider. + * If we are trying to unregister a defunct provider, and the + * provider was made defunct within the interval dictated by + * dtrace_unregister_defunct_reap, we'll (asynchronously) + * attempt to reap our enablings. To denote that the provider + * should reattempt to unregister itself at some point in the + * future, we will return a differentiable error code (EAGAIN + * instead of EBUSY) in this case. */ + if (dtrace_gethrtime() - old->dtpv_defunct > + dtrace_unregister_defunct_reap) + noreap = 1; + if (!self) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); } - return (EBUSY); + + if (noreap) + return (EBUSY); + + (void) taskq_dispatch(dtrace_taskq, + (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); + + return (EAGAIN); } /* @@ -7101,12 +7427,12 @@ dtrace_invalidate(dtrace_provider_id_t id) dtrace_provider_t *pvp = (dtrace_provider_t *)id; ASSERT(pvp->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); - pvp->dtpv_defunct = 1; + pvp->dtpv_defunct = dtrace_gethrtime(); mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); @@ -7142,7 +7468,7 @@ dtrace_condense(dtrace_provider_id_t id) * Make sure this isn't the dtrace provider itself. */ ASSERT(prov->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); @@ -8103,7 +8429,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; default: - err += efunc(dp->dtdo_len - 1, "bad return size"); + err += efunc(dp->dtdo_len - 1, "bad return size\n"); } } @@ -9096,7 +9422,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) return (ecb); } -static void +static int dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; @@ -9109,7 +9435,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) /* * This is the NULL probe -- there's nothing to do. */ - return; + return (0); } if (probe->dtpr_ecb == NULL) { @@ -9123,8 +9449,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) if (ecb->dte_predicate != NULL) probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; - prov->dtpv_pops.dtps_enable(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg); + return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg)); } else { /* * This probe is already active. Swing the last pointer to @@ -9137,6 +9463,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) probe->dtpr_predcache = 0; dtrace_sync(); + return (0); } } @@ -9312,6 +9639,35 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; } + case DTRACEAGG_LLQUANTIZE: { + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); + int64_t v; + + agg->dtag_initial = desc->dtad_arg; + agg->dtag_aggregate = dtrace_aggregate_llquantize; + + if (factor < 2 || low >= high || nsteps < factor) + goto err; + + /* + * Now check that the number of steps evenly divides a power + * of the factor. (This assures both integer bucket size and + * linearity within each magnitude.) + */ + for (v = factor; v < nsteps; v *= factor) + continue; + + if ((v % nsteps) || (nsteps % factor)) + goto err; + + size = (dtrace_aggregate_llquantize_bucket(factor, + low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); + break; + } + case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; @@ -9481,12 +9837,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ if (arg == NULL) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || + desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { ASSERT(arg != NULL); @@ -9497,7 +9855,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) /*FALLTHROUGH*/ case DTRACEACT_LIBACT: - case DTRACEACT_DIFEXPR: + case DTRACEACT_TRACEMEM: + case DTRACEACT_TRACEMEM_DYNSIZE: if (dp == NULL) return (EINVAL); @@ -9920,7 +10279,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) return (DTRACE_MATCH_DONE); - dtrace_ecb_enable(ecb); + if (dtrace_ecb_enable(ecb) < 0) + return (DTRACE_MATCH_FAIL); + return (DTRACE_MATCH_NEXT); } @@ -9978,6 +10339,7 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) caddr_t tomax = buf->dtb_tomax; caddr_t xamot = buf->dtb_xamot; dtrace_icookie_t cookie; + hrtime_t now = dtrace_gethrtime(); ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); @@ -9993,6 +10355,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_drops = 0; buf->dtb_errors = 0; buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); + buf->dtb_interval = now - buf->dtb_switched; + buf->dtb_switched = now; dtrace_interrupt_enable(cookie); } @@ -10025,14 +10389,17 @@ dtrace_buffer_activate(dtrace_state_t *state) static int dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, - processorid_t cpu) + processorid_t cpu, int *factor) { cpu_t *cp; dtrace_buffer_t *buf; + int allocated = 0, desired = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); + *factor = 1; + if (size > dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); @@ -10057,7 +10424,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, ASSERT(buf->dtb_xamot == NULL); - if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) + if ((buf->dtb_tomax = kmem_zalloc(size, + KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; buf->dtb_size = size; @@ -10068,7 +10436,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, if (flags & DTRACEBUF_NOSWITCH) continue; - if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) + if ((buf->dtb_xamot = kmem_zalloc(size, + KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; } while ((cp = cp->cpu_next) != cpu_list); @@ -10082,16 +10451,19 @@ err: continue; buf = &bufs[cp->cpu_id]; + desired += 2; if (buf->dtb_xamot != NULL) { ASSERT(buf->dtb_tomax != NULL); ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_xamot, size); + allocated++; } if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_tomax, size); + allocated++; } buf->dtb_tomax = NULL; @@ -10099,6 +10471,8 @@ err: buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); + *factor = desired / (allocated > 0 ? allocated : 1); + return (ENOMEM); } @@ -10400,6 +10774,36 @@ dtrace_buffer_polish(dtrace_buffer_t *buf) } } +/* + * This routine determines if data generated at the specified time has likely + * been entirely consumed at user-level. This routine is called to determine + * if an ECB on a defunct probe (but for an active enabling) can be safely + * disabled and destroyed. + */ +static int +dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) +{ + int i; + + for (i = 0; i < NCPU; i++) { + dtrace_buffer_t *buf = &bufs[i]; + + if (buf->dtb_size == 0) + continue; + + if (buf->dtb_flags & DTRACEBUF_RING) + return (0); + + if (!buf->dtb_switched && buf->dtb_offset != 0) + return (0); + + if (buf->dtb_switched - buf->dtb_interval < when) + return (0); + } + + return (1); +} + static void dtrace_buffer_free(dtrace_buffer_t *bufs) { @@ -10557,6 +10961,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; + dtrace_retained_gen++; } if (enab->dten_prev == NULL) { @@ -10599,6 +11004,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) return (ENOSPC); state->dts_nretained++; + dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; @@ -10713,7 +11119,7 @@ static int dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) { int i = 0; - int matched = 0; + int total_matched = 0, matched = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -10724,7 +11130,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; - matched += dtrace_probe_enable(&ep->dted_probe, enab); + /* + * If a provider failed to enable a probe then get out and + * let the consumer know we failed. + */ + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) + return (EBUSY); + + total_matched += matched; if (enab->dten_error != 0) { /* @@ -10752,7 +11165,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) - *nmatched = matched; + *nmatched = total_matched; return (0); } @@ -10766,13 +11179,24 @@ dtrace_enabling_matchall(void) mutex_enter(&dtrace_lock); /* - * Because we can be called after dtrace_detach() has been called, we - * cannot assert that there are retained enablings. We can safely - * load from dtrace_retained, however: the taskq_destroy() at the - * end of dtrace_detach() will block pending our completion. + * Iterate over all retained enablings to see if any probes match + * against them. We only perform this operation on enablings for which + * we have sufficient permissions by virtue of being in the global zone + * or in the same zone as the DTrace client. Because we can be called + * after dtrace_detach() has been called, we cannot assert that there + * are retained enablings. We can safely load from dtrace_retained, + * however: the taskq_destroy() at the end of dtrace_detach() will + * block pending our completion. */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) - (void) dtrace_enabling_match(enab, NULL); + for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { + dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred; + cred_t *cr = dcr->dcr_cred; + zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0; + + if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL && + (zone == GLOBAL_ZONEID || getzoneid() == zone))) + (void) dtrace_enabling_match(enab, NULL); + } mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); @@ -10830,6 +11254,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; + dtrace_genid_t gen; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&dtrace_provider_lock)); @@ -10840,15 +11265,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } do { - dtrace_enabling_t *enab = dtrace_retained; + dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; - for (; enab != NULL; enab = enab->dten_next) { +retry: + gen = dtrace_retained_gen; + for (enab = dtrace_retained; enab != NULL; + enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; mutex_exit(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); mutex_enter(&dtrace_lock); + /* + * Process the retained enablings again if + * they have changed while we weren't holding + * dtrace_lock. + */ + if (gen != dtrace_retained_gen) + goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); @@ -10859,6 +11294,85 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } /* + * Called to reap ECBs that are attached to probes from defunct providers. + */ +static void +dtrace_enabling_reap(void) +{ + dtrace_provider_t *prov; + dtrace_probe_t *probe; + dtrace_ecb_t *ecb; + hrtime_t when; + int i; + + mutex_enter(&cpu_lock); + mutex_enter(&dtrace_lock); + + for (i = 0; i < dtrace_nprobes; i++) { + if ((probe = dtrace_probes[i]) == NULL) + continue; + + if (probe->dtpr_ecb == NULL) + continue; + + prov = probe->dtpr_provider; + + if ((when = prov->dtpv_defunct) == 0) + continue; + + /* + * We have ECBs on a defunct provider: we want to reap these + * ECBs to allow the provider to unregister. The destruction + * of these ECBs must be done carefully: if we destroy the ECB + * and the consumer later wishes to consume an EPID that + * corresponds to the destroyed ECB (and if the EPID metadata + * has not been previously consumed), the consumer will abort + * processing on the unknown EPID. To reduce (but not, sadly, + * eliminate) the possibility of this, we will only destroy an + * ECB for a defunct provider if, for the state that + * corresponds to the ECB: + * + * (a) There is no speculative tracing (which can effectively + * cache an EPID for an arbitrary amount of time). + * + * (b) The principal buffers have been switched twice since the + * provider became defunct. + * + * (c) The aggregation buffers are of zero size or have been + * switched twice since the provider became defunct. + * + * We use dts_speculates to determine (a) and call a function + * (dtrace_buffer_consumed()) to determine (b) and (c). Note + * that as soon as we've been unable to destroy one of the ECBs + * associated with the probe, we quit trying -- reaping is only + * fruitful in as much as we can destroy all ECBs associated + * with the defunct provider's probes. + */ + while ((ecb = probe->dtpr_ecb) != NULL) { + dtrace_state_t *state = ecb->dte_state; + dtrace_buffer_t *buf = state->dts_buffer; + dtrace_buffer_t *aggbuf = state->dts_aggbuffer; + + if (state->dts_speculates) + break; + + if (!dtrace_buffer_consumed(buf, when)) + break; + + if (!dtrace_buffer_consumed(aggbuf, when)) + break; + + dtrace_ecb_disable(ecb); + ASSERT(probe->dtpr_ecb != ecb); + dtrace_ecb_destroy(ecb); + } + } + + mutex_exit(&dtrace_lock); + mutex_exit(&cpu_lock); +} + +/* * DTrace DOF Functions */ /*ARGSUSED*/ @@ -10970,7 +11484,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { + if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || + dof->dofh_loadsz != hdr.dofh_loadsz) { kmem_free(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); @@ -11362,15 +11877,20 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if (DTRACEACT_ISPRINTFLIKE(kind) && + if ((DTRACEACT_ISPRINTFLIKE(kind) && (kind != DTRACEACT_PRINTA || + desc->dofa_strtab != DOF_SECIDX_NONE)) || + (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * printf()-like actions must have a format string. + * The argument to these actions is an index into the + * DOF string table. For printf()-like actions, this + * is the format string. For print(), this is the + * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -11698,6 +12218,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } } + if (DOF_SEC_ISLOADABLE(sec->dofs_type) && + !(sec->dofs_flags & DOF_SECF_LOAD)) { + dtrace_dof_error(dof, "loadable section with load " + "flag unset"); + return (-1); + } + if (!(sec->dofs_flags & DOF_SECF_LOAD)) continue; /* just ignore non-loadable sections */ @@ -11849,7 +12376,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min; - if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL) + if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) return (ENOMEM); dstate->dtds_size = size; @@ -12211,7 +12738,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu; - int flags = 0, rval; + int flags = 0, rval, factor, divisor = 1; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); @@ -12241,7 +12768,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } - for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { + for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -12259,7 +12786,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) return (E2BIG); } - rval = dtrace_buffer_alloc(buf, size, flags, cpu); + rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); if (rval != ENOMEM) { opt[which] = size; @@ -12268,6 +12795,9 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) return (rval); + + for (divisor = 2; divisor < factor; divisor <<= 1) + continue; } return (ENOMEM); @@ -12367,7 +12897,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) goto out; } - spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP); + spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), + KM_NOSLEEP | KM_NORMALPRI); if (spec == NULL) { rval = ENOMEM; @@ -12378,7 +12909,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) state->dts_nspeculations = (int)nspec; for (i = 0; i < nspec; i++) { - if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) { + if ((buf = kmem_zalloc(bufsize, + KM_NOSLEEP | KM_NORMALPRI)) == NULL) { rval = ENOMEM; goto err; } @@ -14390,7 +14922,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) * If this wasn't an open with the "helper" minor, then it must be * the "dtrace" minor. */ - ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); + if (getminor(*devp) != DTRACEMNRN_DTRACE) + return (ENXIO); /* * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -14427,7 +14960,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) mutex_exit(&cpu_lock); if (state == NULL) { - if (--dtrace_opens == 0) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); mutex_exit(&dtrace_lock); return (EAGAIN); @@ -14463,7 +14996,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_destroy(state); ASSERT(dtrace_opens > 0); - if (--dtrace_opens == 0) + + /* + * Only relinquish control of the kernel debugger interface when there + * are no consumers and no anonymous enablings. + */ + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); mutex_exit(&dtrace_lock); @@ -15458,7 +15996,8 @@ static struct dev_ops dtrace_ops = { nodev, /* reset */ &dtrace_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; static struct modldrv modldrv = { diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c index b7ca92f54a59..8cfe4cd33beb 100644 --- a/uts/common/dtrace/fasttrap.c +++ b/uts/common/dtrace/fasttrap.c @@ -20,11 +20,13 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2011, Joyent, Inc. All rights reserved. + */ #include <sys/atomic.h> #include <sys/errno.h> @@ -274,7 +276,7 @@ fasttrap_pid_cleanup_cb(void *data) fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; - int i, later; + int i, later, rval; static volatile int in = 0; ASSERT(in == 0); @@ -336,9 +338,13 @@ fasttrap_pid_cleanup_cb(void *data) * clean out the unenabled probes. */ provid = fp->ftp_provid; - if (dtrace_unregister(provid) != 0) { + if ((rval = dtrace_unregister(provid)) != 0) { if (fasttrap_total > fasttrap_max / 2) (void) dtrace_condense(provid); + + if (rval == EAGAIN) + fp->ftp_marked = 1; + later += fp->ftp_marked; fpp = &fp->ftp_next; } else { @@ -364,12 +370,16 @@ fasttrap_pid_cleanup_cb(void *data) * get a chance to do that work if and when the timeout is reenabled * (if detach fails). */ - if (later > 0 && fasttrap_timeout != (timeout_id_t)1) - fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, hz); - else if (later > 0) + if (later > 0) { + if (fasttrap_timeout != (timeout_id_t)1) { + fasttrap_timeout = + timeout(&fasttrap_pid_cleanup_cb, NULL, hz); + } + fasttrap_cleanup_work = 1; - else + } else { fasttrap_timeout = 0; + } mutex_exit(&fasttrap_cleanup_mtx); in = 0; @@ -876,7 +886,7 @@ fasttrap_disable_callbacks(void) } /*ARGSUSED*/ -static void +static int fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) { fasttrap_probe_t *probe = parg; @@ -904,7 +914,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) * provider can't go away while we're in this code path. */ if (probe->ftp_prov->ftp_retired) - return; + return (0); /* * If we can't find the process, it may be that we're in the context of @@ -913,7 +923,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) */ if ((p = sprlock(probe->ftp_pid)) == NULL) { if ((curproc->p_flag & SFORKING) == 0) - return; + return (0); mutex_enter(&pidlock); p = prfind(probe->ftp_pid); @@ -975,7 +985,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) * drop our reference on the trap table entry. */ fasttrap_disable_callbacks(); - return; + return (0); } } @@ -983,6 +993,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) sprunlock(p); probe->ftp_enabled = 1; + return (0); } /*ARGSUSED*/ @@ -1946,7 +1957,8 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) probe = kmem_alloc(size, KM_SLEEP); - if (copyin(uprobe, probe, size) != 0) { + if (copyin(uprobe, probe, size) != 0 || + probe->ftps_noffs != noffs) { kmem_free(probe, size); return (EFAULT); } @@ -2044,13 +2056,6 @@ err: tp->ftt_proc->ftpc_acount != 0) break; - /* - * The count of active providers can only be - * decremented (i.e. to zero) during exec, exit, and - * removal of a meta provider so it should be - * impossible to drop the count during this operation(). - */ - ASSERT(tp->ftt_proc->ftpc_acount != 0); tp = tp->ftt_next; } @@ -2346,7 +2351,8 @@ static struct dev_ops fasttrap_ops = { nodev, /* reset */ &fasttrap_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; /* diff --git a/uts/common/dtrace/lockstat.c b/uts/common/dtrace/lockstat.c index 3eb76a061d32..69c8b7254486 100644 --- a/uts/common/dtrace/lockstat.c +++ b/uts/common/dtrace/lockstat.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" #include <sys/types.h> #include <sys/param.h> @@ -84,7 +83,7 @@ static kmutex_t lockstat_test; /* for testing purposes only */ static dtrace_provider_id_t lockstat_id; /*ARGSUSED*/ -static void +static int lockstat_enable(void *arg, dtrace_id_t id, void *parg) { lockstat_probe_t *probe = parg; @@ -103,6 +102,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg) */ mutex_enter(&lockstat_test); mutex_exit(&lockstat_test); + return (0); } /*ARGSUSED*/ @@ -310,11 +310,13 @@ static struct dev_ops lockstat_ops = { nulldev, /* reset */ &lockstat_cb_ops, /* cb_ops */ NULL, /* bus_ops */ + NULL, /* power */ + ddi_quiesce_not_needed, /* quiesce */ }; static struct modldrv modldrv = { &mod_driverops, /* Type of module. This one is a driver */ - "Lock Statistics %I%", /* name of module */ + "Lock Statistics", /* name of module */ &lockstat_ops, /* driver ops */ }; diff --git a/uts/common/dtrace/profile.c b/uts/common/dtrace/profile.c index 8de919a851a2..fc809d3579a5 100644 --- a/uts/common/dtrace/profile.c +++ b/uts/common/dtrace/profile.c @@ -19,11 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2011, Joyent, Inc. All rights reserved. + */ #include <sys/errno.h> #include <sys/stat.h> @@ -361,7 +363,7 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg) } /*ARGSUSED*/ -static void +static int profile_enable(void *arg, dtrace_id_t id, void *parg) { profile_probe_t *prof = parg; @@ -391,6 +393,7 @@ profile_enable(void *arg, dtrace_id_t id, void *parg) } else { prof->prof_cyclic = cyclic_add_omni(&omni); } + return (0); } /*ARGSUSED*/ @@ -408,9 +411,25 @@ profile_disable(void *arg, dtrace_id_t id, void *parg) /*ARGSUSED*/ static int -profile_usermode(void *arg, dtrace_id_t id, void *parg) +profile_mode(void *arg, dtrace_id_t id, void *parg) { - return (CPU->cpu_profile_pc == 0); + profile_probe_t *prof = parg; + int mode; + + if (CPU->cpu_profile_pc != 0) { + mode = DTRACE_MODE_KERNEL; + } else { + mode = DTRACE_MODE_USER; + } + + if (prof->prof_kind == PROF_TICK) { + mode |= DTRACE_MODE_NOPRIV_RESTRICT; + } else { + ASSERT(prof->prof_kind == PROF_PROFILE); + mode |= DTRACE_MODE_NOPRIV_DROP; + } + + return (mode); } static dtrace_pattr_t profile_attr = { @@ -430,7 +449,7 @@ static dtrace_pops_t profile_pops = { NULL, NULL, NULL, - profile_usermode, + profile_mode, profile_destroy }; @@ -539,7 +558,8 @@ static struct dev_ops profile_ops = { nodev, /* reset */ &profile_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; /* diff --git a/uts/common/dtrace/sdt_subr.c b/uts/common/dtrace/sdt_subr.c index 66ff8a92a01b..242185071bb2 100644 --- a/uts/common/dtrace/sdt_subr.c +++ b/uts/common/dtrace/sdt_subr.c @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/sdt_impl.h> static dtrace_pattr_t vtrace_attr = { @@ -43,6 +40,14 @@ static dtrace_pattr_t info_attr = { { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, }; +static dtrace_pattr_t fc_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +}; + static dtrace_pattr_t fpu_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, @@ -83,6 +88,14 @@ static dtrace_pattr_t xpv_attr = { { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, }; +static dtrace_pattr_t iscsi_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +}; + sdt_provider_t sdt_providers[] = { { "vtrace", "__vtrace_", &vtrace_attr, 0 }, { "sysinfo", "__cpu_sysinfo_", &info_attr, 0 }, @@ -91,11 +104,17 @@ sdt_provider_t sdt_providers[] = { { "sched", "__sched_", &stab_attr, 0 }, { "proc", "__proc_", &stab_attr, 0 }, { "io", "__io_", &stab_attr, 0 }, + { "ip", "__ip_", &stab_attr, 0 }, + { "tcp", "__tcp_", &stab_attr, 0 }, + { "udp", "__udp_", &stab_attr, 0 }, { "mib", "__mib_", &stab_attr, 0 }, { "fsinfo", "__fsinfo_", &fsinfo_attr, 0 }, + { "iscsi", "__iscsi_", &iscsi_attr, 0 }, { "nfsv3", "__nfsv3_", &stab_attr, 0 }, { "nfsv4", "__nfsv4_", &stab_attr, 0 }, { "xpv", "__xpv_", &xpv_attr, 0 }, + { "fc", "__fc_", &fc_attr, 0 }, + { "srp", "__srp_", &fc_attr, 0 }, { "sysevent", "__sysevent_", &stab_attr, 0 }, { "sdt", NULL, &sdt_attr, 0 }, { NULL } @@ -169,6 +188,73 @@ sdt_argdesc_t sdt_args[] = { { "fsinfo", NULL, 0, 0, "vnode_t *", "fileinfo_t *" }, { "fsinfo", NULL, 1, 1, "int", "int" }, + { "iscsi", "async-send", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "async-send", 1, 1, "iscsi_async_evt_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "login-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "login-command", 1, 1, "iscsi_login_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "login-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "login-response", 1, 1, "iscsi_login_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "logout-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "logout-command", 1, 1, "iscsi_logout_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "logout-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "logout-response", 1, 1, "iscsi_logout_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "data-request", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "data-request", 1, 1, "iscsi_rtt_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "data-send", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "data-send", 1, 1, "iscsi_data_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "data-receive", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "data-receive", 1, 1, "iscsi_data_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "nop-send", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "nop-send", 1, 1, "iscsi_nop_in_hdr_t *", "iscsiinfo_t *" }, + { "iscsi", "nop-receive", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "nop-receive", 1, 1, "iscsi_nop_out_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "scsi-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "scsi-command", 1, 1, "iscsi_scsi_cmd_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "scsi-command", 2, 2, "scsi_task_t *", "scsicmd_t *" }, + { "iscsi", "scsi-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "scsi-response", 1, 1, "iscsi_scsi_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "task-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "task-command", 1, 1, "iscsi_scsi_task_mgt_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "task-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "task-response", 1, 1, "iscsi_scsi_task_mgt_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "text-command", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "text-command", 1, 1, "iscsi_text_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "text-response", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "text-response", 1, 1, "iscsi_text_rsp_hdr_t *", + "iscsiinfo_t *" }, + { "iscsi", "xfer-start", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "xfer-start", 1, 0, "idm_conn_t *", "iscsiinfo_t *" }, + { "iscsi", "xfer-start", 2, 1, "uintptr_t", "xferinfo_t *" }, + { "iscsi", "xfer-start", 3, 2, "uint32_t"}, + { "iscsi", "xfer-start", 4, 3, "uintptr_t"}, + { "iscsi", "xfer-start", 5, 4, "uint32_t"}, + { "iscsi", "xfer-start", 6, 5, "uint32_t"}, + { "iscsi", "xfer-start", 7, 6, "uint32_t"}, + { "iscsi", "xfer-start", 8, 7, "int"}, + { "iscsi", "xfer-done", 0, 0, "idm_conn_t *", "conninfo_t *" }, + { "iscsi", "xfer-done", 1, 0, "idm_conn_t *", "iscsiinfo_t *" }, + { "iscsi", "xfer-done", 2, 1, "uintptr_t", "xferinfo_t *" }, + { "iscsi", "xfer-done", 3, 2, "uint32_t"}, + { "iscsi", "xfer-done", 4, 3, "uintptr_t"}, + { "iscsi", "xfer-done", 5, 4, "uint32_t"}, + { "iscsi", "xfer-done", 6, 5, "uint32_t"}, + { "iscsi", "xfer-done", 7, 6, "uint32_t"}, + { "iscsi", "xfer-done", 8, 7, "int"}, + { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", "conninfo_t *" }, { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *", @@ -788,6 +874,75 @@ sdt_argdesc_t sdt_args[] = { "nfsv4cbinfo_t *" }, { "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *" }, + { "ip", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "ip", "send", 1, 1, "conn_t *", "csinfo_t *" }, + { "ip", "send", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "ip", "send", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" }, + { "ip", "send", 4, 4, "ipha_t *", "ipv4info_t *" }, + { "ip", "send", 5, 5, "ip6_t *", "ipv6info_t *" }, + { "ip", "send", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */ + { "ip", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "ip", "receive", 1, 1, "conn_t *", "csinfo_t *" }, + { "ip", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "ip", "receive", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" }, + { "ip", "receive", 4, 4, "ipha_t *", "ipv4info_t *" }, + { "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" }, + { "ip", "receive", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */ + + { "tcp", "connect-established", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-established", 1, 1, "ip_xmit_attr_t *", + "csinfo_t *" }, + { "tcp", "connect-established", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-established", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-established", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "connect-refused", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "connect-refused", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-refused", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "connect-request", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "connect-request", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "connect-request", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "connect-request", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "connect-request", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "accept-established", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "accept-established", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "accept-established", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "accept-established", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "accept-established", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "accept-refused", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "accept-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "accept-refused", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "tcp", "accept-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "accept-refused", 4, 4, "tcph_t *", "tcpinfo_t *" }, + { "tcp", "state-change", 0, 0, "void", "void" }, + { "tcp", "state-change", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "state-change", 2, 2, "void", "void" }, + { "tcp", "state-change", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "state-change", 4, 4, "void", "void" }, + { "tcp", "state-change", 5, 5, "int32_t", "tcplsinfo_t *" }, + { "tcp", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "send", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" }, + { "tcp", "send", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "send", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" }, + { "tcp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "tcp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "tcp", "receive", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" }, + { "tcp", "receive", 3, 3, "tcp_t *", "tcpsinfo_t *" }, + { "tcp", "receive", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" }, + + { "udp", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "udp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "udp", "send", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "udp", "send", 3, 3, "udp_t *", "udpsinfo_t *" }, + { "udp", "send", 4, 4, "udpha_t *", "udpinfo_t *" }, + { "udp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "udp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" }, + { "udp", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "udp", "receive", 3, 3, "udp_t *", "udpsinfo_t *" }, + { "udp", "receive", 4, 4, "udpha_t *", "udpinfo_t *" }, + { "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" }, { "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" }, @@ -848,6 +1003,154 @@ sdt_argdesc_t sdt_args[] = { { "xpv", "setvcpucontext-end", 0, 0, "int" }, { "xpv", "setvcpucontext-start", 0, 0, "domid_t" }, { "xpv", "setvcpucontext-start", 1, 1, "vcpu_guest_context_t *" }, + + { "srp", "service-up", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "service-up", 1, 0, "srpt_session_t *", "srp_portinfo_t *" }, + { "srp", "service-down", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "service-down", 1, 0, "srpt_session_t *", + "srp_portinfo_t *" }, + { "srp", "login-command", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "login-command", 1, 0, "srpt_session_t *", + "srp_portinfo_t *" }, + { "srp", "login-command", 2, 1, "srp_login_req_t *", + "srp_logininfo_t *" }, + { "srp", "login-response", 0, 0, "srpt_session_t *", "conninfo_t *" }, + { "srp", "login-response", 1, 0, "srpt_session_t *", + "srp_portinfo_t *" }, + { "srp", "login-response", 2, 1, "srp_login_rsp_t *", + "srp_logininfo_t *" }, + { "srp", "login-response", 3, 2, "srp_login_rej_t *" }, + { "srp", "logout-command", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "logout-command", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "task-command", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "task-command", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "task-command", 2, 1, "srp_cmd_req_t *", "srp_taskinfo_t *" }, + { "srp", "task-response", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "task-response", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "task-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" }, + { "srp", "task-response", 3, 2, "scsi_task_t *" }, + { "srp", "task-response", 4, 3, "int8_t" }, + { "srp", "scsi-command", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "scsi-command", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "scsi-command", 2, 1, "scsi_task_t *", "scsicmd_t *" }, + { "srp", "scsi-command", 3, 2, "srp_cmd_req_t *", "srp_taskinfo_t *" }, + { "srp", "scsi-response", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "scsi-response", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "scsi-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" }, + { "srp", "scsi-response", 3, 2, "scsi_task_t *" }, + { "srp", "scsi-response", 4, 3, "int8_t" }, + { "srp", "xfer-start", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "xfer-start", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "xfer-start", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" }, + { "srp", "xfer-start", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" }, + { "srp", "xfer-start", 4, 3, "ibt_send_wr_t *"}, + { "srp", "xfer-start", 5, 4, "uint32_t" }, + { "srp", "xfer-start", 6, 5, "uint32_t" }, + { "srp", "xfer-start", 7, 6, "uint32_t" }, + { "srp", "xfer-start", 8, 7, "uint32_t" }, + { "srp", "xfer-done", 0, 0, "srpt_channel_t *", "conninfo_t *" }, + { "srp", "xfer-done", 1, 0, "srpt_channel_t *", + "srp_portinfo_t *" }, + { "srp", "xfer-done", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" }, + { "srp", "xfer-done", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" }, + { "srp", "xfer-done", 4, 3, "ibt_send_wr_t *"}, + { "srp", "xfer-done", 5, 4, "uint32_t" }, + { "srp", "xfer-done", 6, 5, "uint32_t" }, + { "srp", "xfer-done", 7, 6, "uint32_t" }, + { "srp", "xfer-done", 8, 7, "uint32_t" }, + + { "fc", "link-up", 0, 0, "fct_i_local_port_t *", "conninfo_t *" }, + { "fc", "link-down", 0, 0, "fct_i_local_port_t *", "conninfo_t *" }, + { "fc", "fabric-login-start", 0, 0, "fct_i_local_port_t *", + "conninfo_t *" }, + { "fc", "fabric-login-start", 1, 0, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "fabric-login-end", 0, 0, "fct_i_local_port_t *", + "conninfo_t *" }, + { "fc", "fabric-login-end", 1, 0, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-start", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-login-start", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-start", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-start", 3, 3, "int", "int" }, + { "fc", "rport-login-end", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-login-end", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-end", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-login-end", 3, 3, "int", "int" }, + { "fc", "rport-login-end", 4, 4, "int", "int" }, + { "fc", "rport-logout-start", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-logout-start", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-start", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-start", 3, 3, "int", "int" }, + { "fc", "rport-logout-end", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "rport-logout-end", 1, 1, "fct_local_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-end", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "rport-logout-end", 3, 3, "int", "int" }, + { "fc", "scsi-command", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "scsi-command", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "scsi-command", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "scsi-command", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "scsi-response", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "scsi-response", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "scsi-response", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "scsi-response", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-start", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "xfer-start", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-start", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "xfer-start", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-start", 4, 4, "stmf_data_buf_t *", + "fc_xferinfo_t *" }, + { "fc", "xfer-done", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "xfer-done", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-done", 2, 2, "scsi_task_t *", + "scsicmd_t *" }, + { "fc", "xfer-done", 3, 3, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + { "fc", "xfer-done", 4, 4, "stmf_data_buf_t *", + "fc_xferinfo_t *" }, + { "fc", "rscn-receive", 0, 0, "fct_i_local_port_t *", + "conninfo_t *" }, + { "fc", "rscn-receive", 1, 1, "int", "int"}, + { "fc", "abts-receive", 0, 0, "fct_cmd_t *", + "conninfo_t *" }, + { "fc", "abts-receive", 1, 1, "fct_i_local_port_t *", + "fc_port_info_t *" }, + { "fc", "abts-receive", 2, 2, "fct_i_remote_port_t *", + "fc_port_info_t *" }, + + { NULL } }; diff --git a/uts/common/dtrace/systrace.c b/uts/common/dtrace/systrace.c index be14660b04c0..b864041c450d 100644 --- a/uts/common/dtrace/systrace.c +++ b/uts/common/dtrace/systrace.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" #include <sys/dtrace.h> #include <sys/systrace.h> @@ -141,7 +140,7 @@ systrace_destroy(void *arg, dtrace_id_t id, void *parg) } /*ARGSUSED*/ -static void +static int systrace_enable(void *arg, dtrace_id_t id, void *parg) { int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); @@ -162,7 +161,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg) if (enabled) { ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); - return; + return (0); } (void) casptr(&sysent[sysnum].sy_callc, @@ -173,6 +172,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg) (void *)systrace_sysent32[sysnum].stsy_underlying, (void *)dtrace_systrace_syscall32); #endif + return (0); } /*ARGSUSED*/ @@ -336,7 +336,8 @@ static struct dev_ops systrace_ops = { nodev, /* reset */ &systrace_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; /* |
