summaryrefslogtreecommitdiff
path: root/uts/common/dtrace
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2012-07-01 14:55:35 +0000
committerMartin Matuska <mm@FreeBSD.org>2012-07-01 14:55:35 +0000
commit5a27a66a81254f7b8eadd92ceac8ff50132e971c (patch)
treea7fdb0498552fa601ffec363d7a91a3095209b8e /uts/common/dtrace
parentd661fdff24a3141883f5058d89a5f044c736ef56 (diff)
Notes
Diffstat (limited to 'uts/common/dtrace')
-rw-r--r--uts/common/dtrace/dcpc.c1218
-rw-r--r--uts/common/dtrace/dtrace.c863
-rw-r--r--uts/common/dtrace/fasttrap.c48
-rw-r--r--uts/common/dtrace/lockstat.c10
-rw-r--r--uts/common/dtrace/profile.c34
-rw-r--r--uts/common/dtrace/sdt_subr.c311
-rw-r--r--uts/common/dtrace/systrace.c11
7 files changed, 2292 insertions, 203 deletions
diff --git a/uts/common/dtrace/dcpc.c b/uts/common/dtrace/dcpc.c
new file mode 100644
index 000000000000..8fd96cc24c6c
--- /dev/null
+++ b/uts/common/dtrace/dcpc.c
@@ -0,0 +1,1218 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/errno.h>
+#include <sys/cpuvar.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ksynch.h>
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/kcpc.h>
+#include <sys/cap_util.h>
+#include <sys/cpc_pcbe.h>
+#include <sys/cpc_impl.h>
+#include <sys/dtrace_impl.h>
+
+/*
+ * DTrace CPU Performance Counter Provider
+ * ---------------------------------------
+ *
+ * The DTrace cpc provider allows DTrace consumers to access the CPU
+ * performance counter overflow mechanism of a CPU. The configuration
+ * presented in a probe specification is programmed into the performance
+ * counter hardware of all available CPUs on a system. Programming the
+ * hardware causes a counter on each CPU to begin counting events of the
+ * given type. When the specified number of events have occurred, an overflow
+ * interrupt will be generated and the probe is fired.
+ *
+ * The required configuration for the performance counter is encoded into
+ * the probe specification and this includes the performance counter event
+ * name, processor mode, overflow rate and an optional unit mask.
+ *
+ * Most processors provide several counters (PICs) which can count all or a
+ * subset of the events available for a given CPU. However, when overflow
+ * profiling is being used, not all CPUs can detect which counter generated the
+ * overflow interrupt. In this case we cannot reliably determine which counter
+ * overflowed and we therefore only allow such CPUs to configure one event at
+ * a time. Processors that can determine the counter which overflowed are
+ * allowed to program as many events at one time as possible (in theory up to
+ * the number of instrumentation counters supported by that platform).
+ * Therefore, multiple consumers can enable multiple probes at the same time
+ * on such platforms. Platforms which cannot determine the source of an
+ * overflow interrupt are only allowed to program a single event at one time.
+ *
+ * The performance counter hardware is made available to consumers on a
+ * first-come, first-served basis. Only a finite amount of hardware resource
+ * is available and, while we make every attempt to accomodate requests from
+ * consumers, we must deny requests when hardware resources have been exhausted.
+ * A consumer will fail to enable probes when resources are currently in use.
+ *
+ * The cpc provider contends for shared hardware resources along with other
+ * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)).
+ * Only one such consumer can use the performance counters at any one time and
+ * counters are made available on a first-come, first-served basis. As with
+ * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g.
+ * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP
+ * counter contexts to be invalidated.
+ */
+
+typedef struct dcpc_probe {
+ char dcpc_event_name[CPC_MAX_EVENT_LEN];
+ int dcpc_flag; /* flags (USER/SYS) */
+ uint32_t dcpc_ovfval; /* overflow value */
+ int64_t dcpc_umask; /* umask/emask for this event */
+ int dcpc_picno; /* pic this event is programmed in */
+ int dcpc_enabled; /* probe is actually enabled? */
+ int dcpc_disabling; /* probe is currently being disabled */
+ dtrace_id_t dcpc_id; /* probeid this request is enabling */
+ int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */
+} dcpc_probe_t;
+
+static dev_info_t *dcpc_devi;
+static dtrace_provider_id_t dcpc_pid;
+static dcpc_probe_t **dcpc_actv_reqs;
+static uint32_t dcpc_enablings = 0;
+static int dcpc_ovf_mask = 0;
+static int dcpc_mult_ovf_cap = 0;
+static int dcpc_mask_type = 0;
+
+/*
+ * When the dcpc provider is loaded, dcpc_min_overflow is set to either
+ * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in
+ * the dcpc.conf file. Decrease this value to set probes with smaller
+ * overflow values. Remember that very small values could render a system
+ * unusable with frequently occurring events.
+ */
+#define DCPC_MIN_OVF_DEFAULT 5000
+static uint32_t dcpc_min_overflow;
+
+static int dcpc_aframes = 0; /* override for artificial frame setting */
+#if defined(__x86)
+#define DCPC_ARTIFICIAL_FRAMES 8
+#elif defined(__sparc)
+#define DCPC_ARTIFICIAL_FRAMES 2
+#endif
+
+/*
+ * Called from the platform overflow interrupt handler. 'bitmap' is a mask
+ * which contains the pic(s) that have overflowed.
+ */
+static void
+dcpc_fire(uint64_t bitmap)
+{
+ int i;
+
+ /*
+ * No counter was marked as overflowing. Shout about it and get out.
+ */
+ if ((bitmap & dcpc_ovf_mask) == 0) {
+ cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n");
+ return;
+ }
+
+ /*
+ * This is the common case of a processor that doesn't support
+ * multiple overflow events. Such systems are only allowed a single
+ * enabling and therefore we just look for the first entry in
+ * the active request array.
+ */
+ if (!dcpc_mult_ovf_cap) {
+ for (i = 0; i < cpc_ncounters; i++) {
+ if (dcpc_actv_reqs[i] != NULL) {
+ dtrace_probe(dcpc_actv_reqs[i]->dcpc_id,
+ CPU->cpu_cpcprofile_pc,
+ CPU->cpu_cpcprofile_upc, 0, 0, 0);
+ return;
+ }
+ }
+ return;
+ }
+
+ /*
+ * This is a processor capable of handling multiple overflow events.
+ * Iterate over the array of active requests and locate the counters
+ * that overflowed (note: it is possible for more than one counter to
+ * have overflowed at the same time).
+ */
+ for (i = 0; i < cpc_ncounters; i++) {
+ if (dcpc_actv_reqs[i] != NULL &&
+ (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) {
+ dtrace_probe(dcpc_actv_reqs[i]->dcpc_id,
+ CPU->cpu_cpcprofile_pc,
+ CPU->cpu_cpcprofile_upc, 0, 0, 0);
+ }
+ }
+}
+
+static void
+dcpc_create_probe(dtrace_provider_id_t id, const char *probename,
+ char *eventname, int64_t umask, uint32_t ovfval, char flag)
+{
+ dcpc_probe_t *pp;
+ int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes();
+
+ if (dcpc_aframes)
+ nr_frames = dcpc_aframes;
+
+ if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0)
+ return;
+
+ pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP);
+ (void) strncpy(pp->dcpc_event_name, eventname,
+ sizeof (pp->dcpc_event_name) - 1);
+ pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0';
+ pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT;
+ pp->dcpc_ovfval = ovfval;
+ pp->dcpc_umask = umask;
+ pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
+
+ pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename,
+ nr_frames, pp);
+}
+
+/*ARGSUSED*/
+static void
+dcpc_provide(void *arg, const dtrace_probedesc_t *desc)
+{
+ /*
+ * The format of a probe is:
+ *
+ * event_name-mode-{optional_umask}-overflow_rate
+ * e.g.
+ * DC_refill_from_system-user-0x1e-50000, or,
+ * DC_refill_from_system-all-10000
+ *
+ */
+ char *str, *end, *p;
+ int i, flag = 0;
+ char event[CPC_MAX_EVENT_LEN];
+ long umask = -1, val = 0;
+ size_t evlen, len;
+
+ /*
+ * The 'cpc' provider offers no probes by default.
+ */
+ if (desc == NULL)
+ return;
+
+ len = strlen(desc->dtpd_name);
+ p = str = kmem_alloc(len + 1, KM_SLEEP);
+ (void) strcpy(str, desc->dtpd_name);
+
+ /*
+ * We have a poor man's strtok() going on here. Replace any hyphens
+ * in the the probe name with NULL characters in order to make it
+ * easy to parse the string with regular string functions.
+ */
+ for (i = 0; i < len; i++) {
+ if (str[i] == '-')
+ str[i] = '\0';
+ }
+
+ /*
+ * The first part of the string must be either a platform event
+ * name or a generic event name.
+ */
+ evlen = strlen(p);
+ (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1);
+ event[CPC_MAX_EVENT_LEN - 1] = '\0';
+
+ /*
+ * The next part of the name is the mode specification. Valid
+ * settings are "user", "kernel" or "all".
+ */
+ p += evlen + 1;
+
+ if (strcmp(p, "user") == 0)
+ flag |= CPC_COUNT_USER;
+ else if (strcmp(p, "kernel") == 0)
+ flag |= CPC_COUNT_SYSTEM;
+ else if (strcmp(p, "all") == 0)
+ flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM;
+ else
+ goto err;
+
+ /*
+ * Next we either have a mask specification followed by an overflow
+ * rate or just an overflow rate on its own.
+ */
+ p += strlen(p) + 1;
+ if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ /*
+ * A unit mask can only be specified if:
+ * 1) this performance counter back end supports masks.
+ * 2) the specified event is platform specific.
+ * 3) a valid hex number is converted.
+ * 4) no extraneous characters follow the mask specification.
+ */
+ if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 &&
+ ddi_strtol(p, &end, 16, &umask) == 0 &&
+ end == p + strlen(p)) {
+ p += strlen(p) + 1;
+ } else {
+ goto err;
+ }
+ }
+
+ /*
+ * This final part must be an overflow value which has to be greater
+ * than the minimum permissible overflow rate.
+ */
+ if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) ||
+ val < dcpc_min_overflow)
+ goto err;
+
+ /*
+ * Validate the event and create the probe.
+ */
+ for (i = 0; i < cpc_ncounters; i++) {
+ char *events, *cp, *p, *end;
+ int found = 0, j;
+ size_t llen;
+
+ if ((events = kcpc_list_events(i)) == NULL)
+ goto err;
+
+ llen = strlen(events);
+ p = cp = ddi_strdup(events, KM_NOSLEEP);
+ end = cp + llen;
+
+ for (j = 0; j < llen; j++) {
+ if (cp[j] == ',')
+ cp[j] = '\0';
+ }
+
+ while (p < end && found == 0) {
+ if (strcmp(p, event) == 0) {
+ dcpc_create_probe(dcpc_pid, desc->dtpd_name,
+ event, umask, (uint32_t)val, flag);
+ found = 1;
+ }
+ p += strlen(p) + 1;
+ }
+ kmem_free(cp, llen + 1);
+
+ if (found)
+ break;
+ }
+
+err:
+ kmem_free(str, len + 1);
+}
+
+/*ARGSUSED*/
+static void
+dcpc_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+ dcpc_probe_t *pp = parg;
+
+ ASSERT(pp->dcpc_enabled == 0);
+ kmem_free(pp, sizeof (dcpc_probe_t));
+}
+
+/*ARGSUSED*/
+static int
+dcpc_mode(void *arg, dtrace_id_t id, void *parg)
+{
+ if (CPU->cpu_cpcprofile_pc == 0) {
+ return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_USER);
+ } else {
+ return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_KERNEL);
+ }
+}
+
+static void
+dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno)
+{
+ kcpc_set_t *oset;
+ int i;
+
+ (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name,
+ CPC_MAX_EVENT_LEN);
+ set->ks_req[reqno].kr_config = NULL;
+ set->ks_req[reqno].kr_index = reqno;
+ set->ks_req[reqno].kr_picnum = -1;
+ set->ks_req[reqno].kr_flags = pp->dcpc_flag;
+
+ /*
+ * If a unit mask has been specified then detect which attribute
+ * the platform needs. For now, it's either "umask" or "emask".
+ */
+ if (pp->dcpc_umask >= 0) {
+ set->ks_req[reqno].kr_attr =
+ kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP);
+ set->ks_req[reqno].kr_nattrs = 1;
+ if (dcpc_mask_type & DCPC_UMASK)
+ (void) strncpy(set->ks_req[reqno].kr_attr->ka_name,
+ "umask", 5);
+ else
+ (void) strncpy(set->ks_req[reqno].kr_attr->ka_name,
+ "emask", 5);
+ set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask;
+ } else {
+ set->ks_req[reqno].kr_attr = NULL;
+ set->ks_req[reqno].kr_nattrs = 0;
+ }
+
+ /*
+ * If this probe is enabled, obtain its current countdown value
+ * and use that. The CPUs cpc context might not exist yet if we
+ * are dealing with a CPU that is just coming online.
+ */
+ if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) {
+ oset = c->cpu_cpc_ctx->kc_set;
+
+ for (i = 0; i < oset->ks_nreqs; i++) {
+ if (strcmp(oset->ks_req[i].kr_event,
+ set->ks_req[reqno].kr_event) == 0) {
+ set->ks_req[reqno].kr_preset =
+ *(oset->ks_req[i].kr_data);
+ }
+ }
+ } else {
+ set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval;
+ }
+
+ set->ks_nreqs++;
+}
+
+
+/*
+ * Create a fresh request set for the enablings represented in the
+ * 'dcpc_actv_reqs' array which contains the probes we want to be
+ * in the set. This can be called for several reasons:
+ *
+ * 1) We are on a single or multi overflow platform and we have no
+ * current events so we can just create the set and initialize it.
+ * 2) We are on a multi-overflow platform and we already have one or
+ * more existing events and we are adding a new enabling. Create a
+ * new set and copy old requests in and then add the new request.
+ * 3) We are on a multi-overflow platform and we have just removed an
+ * enabling but we still have enablings whch are valid. Create a new
+ * set and copy in still valid requests.
+ */
+static kcpc_set_t *
+dcpc_create_set(cpu_t *c)
+{
+ int i, reqno = 0;
+ int active_requests = 0;
+ kcpc_set_t *set;
+
+ /*
+ * First get a count of the number of currently active requests.
+ * Note that dcpc_actv_reqs[] should always reflect which requests
+ * we want to be in the set that is to be created. It is the
+ * responsibility of the caller of dcpc_create_set() to adjust that
+ * array accordingly beforehand.
+ */
+ for (i = 0; i < cpc_ncounters; i++) {
+ if (dcpc_actv_reqs[i] != NULL)
+ active_requests++;
+ }
+
+ set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP);
+
+ set->ks_req =
+ kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP);
+
+ set->ks_data =
+ kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP);
+
+ /*
+ * Look for valid entries in the active requests array and populate
+ * the request set for any entries found.
+ */
+ for (i = 0; i < cpc_ncounters; i++) {
+ if (dcpc_actv_reqs[i] != NULL) {
+ dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno);
+ reqno++;
+ }
+ }
+
+ return (set);
+}
+
+static int
+dcpc_program_cpu_event(cpu_t *c)
+{
+ int i, j, subcode;
+ kcpc_ctx_t *ctx, *octx;
+ kcpc_set_t *set;
+
+ set = dcpc_create_set(c);
+
+ set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP);
+ ctx->kc_set = set;
+ ctx->kc_cpuid = c->cpu_id;
+
+ if (kcpc_assign_reqs(set, ctx) != 0)
+ goto err;
+
+ if (kcpc_configure_reqs(ctx, set, &subcode) != 0)
+ goto err;
+
+ for (i = 0; i < set->ks_nreqs; i++) {
+ for (j = 0; j < cpc_ncounters; j++) {
+ if (dcpc_actv_reqs[j] != NULL &&
+ strcmp(set->ks_req[i].kr_event,
+ dcpc_actv_reqs[j]->dcpc_event_name) == 0) {
+ dcpc_actv_reqs[j]->dcpc_picno =
+ set->ks_req[i].kr_picnum;
+ }
+ }
+ }
+
+ /*
+ * If we already have an active enabling then save the current cpc
+ * context away.
+ */
+ octx = c->cpu_cpc_ctx;
+
+ kcpc_cpu_program(c, ctx);
+
+ if (octx != NULL) {
+ kcpc_set_t *oset = octx->kc_set;
+ kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t));
+ kcpc_free_configs(oset);
+ kcpc_free_set(oset);
+ kcpc_ctx_free(octx);
+ }
+
+ return (0);
+
+err:
+ /*
+ * We failed to configure this request up so free things up and
+ * get out.
+ */
+ kcpc_free_configs(set);
+ kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
+ kcpc_free_set(set);
+ kcpc_ctx_free(ctx);
+
+ return (-1);
+}
+
+static void
+dcpc_disable_cpu(cpu_t *c)
+{
+ kcpc_ctx_t *ctx;
+ kcpc_set_t *set;
+
+ /*
+ * Leave this CPU alone if it's already offline.
+ */
+ if (c->cpu_flags & CPU_OFFLINE)
+ return;
+
+ /*
+ * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and
+ * changes it.
+ */
+ ctx = c->cpu_cpc_ctx;
+
+ kcpc_cpu_stop(c, B_FALSE);
+
+ set = ctx->kc_set;
+
+ kcpc_free_configs(set);
+ kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
+ kcpc_free_set(set);
+ kcpc_ctx_free(ctx);
+}
+
+/*
+ * The dcpc_*_interrupts() routines are responsible for manipulating the
+ * per-CPU dcpc interrupt state byte. The purpose of the state byte is to
+ * synchronize processing of hardware overflow interrupts wth configuration
+ * changes made to the CPU performance counter subsystem by the dcpc provider.
+ *
+ * The dcpc provider claims ownership of the overflow interrupt mechanism
+ * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the
+ * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the
+ * overflow mechanism and interrupts may be processed). Before modifying
+ * a CPUs configuration state the state byte is transitioned from
+ * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state).
+ * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process
+ * an interrupt when a configuration is not in process (i.e. the state is
+ * marked as free). During interrupt processing the state is set to
+ * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based
+ * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate
+ * the dcpc provider is no longer interested in overflow interrupts.
+ */
+static void
+dcpc_block_interrupts(void)
+{
+ cpu_t *c = cpu_list;
+ uint8_t *state;
+
+ ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
+
+ do {
+ state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state;
+
+ while (atomic_cas_8(state, DCPC_INTR_FREE,
+ DCPC_INTR_CONFIG) != DCPC_INTR_FREE)
+ continue;
+
+ } while ((c = c->cpu_next) != cpu_list);
+}
+
+/*
+ * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that
+ * overflow interrupts can be processed safely.
+ */
+static void
+dcpc_release_interrupts(void)
+{
+ cpu_t *c = cpu_list;
+
+ ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
+
+ do {
+ cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE;
+ membar_producer();
+ } while ((c = c->cpu_next) != cpu_list);
+}
+
+/*
+ * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to
+ * to DCPC_INTR_FREE. This indicates that the dcpc provider is now
+ * responsible for handling all overflow interrupt activity. Should only be
+ * called before enabling the first dcpc based probe.
+ */
+static void
+dcpc_claim_interrupts(void)
+{
+ cpu_t *c = cpu_list;
+
+ ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE);
+
+ do {
+ cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE;
+ membar_producer();
+ } while ((c = c->cpu_next) != cpu_list);
+}
+
+/*
+ * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that
+ * the dcpc provider is no longer processing overflow interrupts. Only called
+ * during removal of the last dcpc based enabling.
+ */
+static void
+dcpc_surrender_interrupts(void)
+{
+ cpu_t *c = cpu_list;
+
+ ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
+
+ do {
+ cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE;
+ membar_producer();
+ } while ((c = c->cpu_next) != cpu_list);
+}
+
+/*
+ * dcpc_program_event() can be called owing to a new enabling or if a multi
+ * overflow platform has disabled a request but needs to program the requests
+ * that are still valid.
+ *
+ * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t
+ * and a new request set which contains the new enabling and any old enablings
+ * which are still valid (possible with multi-overflow platforms).
+ */
+static int
+dcpc_program_event(dcpc_probe_t *pp)
+{
+ cpu_t *c;
+ int ret = 0;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ kpreempt_disable();
+
+ dcpc_block_interrupts();
+
+ c = cpu_list;
+
+ do {
+ /*
+ * Skip CPUs that are currently offline.
+ */
+ if (c->cpu_flags & CPU_OFFLINE)
+ continue;
+
+ /*
+ * Stop counters but preserve existing DTrace CPC context
+ * if there is one.
+ *
+ * If we come here when the first event is programmed for a CPU,
+ * there should be no DTrace CPC context installed. In this
+ * case, kcpc_cpu_stop() will ensure that there is no other
+ * context on the CPU.
+ *
+ * If we add new enabling to the original one, the CPU should
+ * have the old DTrace CPC context which we need to keep around
+ * since dcpc_program_event() will add to it.
+ */
+ if (c->cpu_cpc_ctx != NULL)
+ kcpc_cpu_stop(c, B_TRUE);
+ } while ((c = c->cpu_next) != cpu_list);
+
+ dcpc_release_interrupts();
+
+ /*
+ * If this enabling is being removed (in the case of a multi event
+ * capable system with more than one active enabling), we can now
+ * update the active request array to reflect the enablings that need
+ * to be reprogrammed.
+ */
+ if (pp->dcpc_disabling == 1)
+ dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
+
+ do {
+ /*
+ * Skip CPUs that are currently offline.
+ */
+ if (c->cpu_flags & CPU_OFFLINE)
+ continue;
+
+ ret = dcpc_program_cpu_event(c);
+ } while ((c = c->cpu_next) != cpu_list && ret == 0);
+
+ /*
+ * If dcpc_program_cpu_event() fails then it is because we couldn't
+ * configure the requests in the set for the CPU and not because of
+ * an error programming the hardware. If we have a failure here then
+ * we assume no CPUs have been programmed in the above step as they
+ * are all configured identically.
+ */
+ if (ret != 0) {
+ pp->dcpc_enabled = 0;
+ kpreempt_enable();
+ return (-1);
+ }
+
+ if (pp->dcpc_disabling != 1)
+ pp->dcpc_enabled = 1;
+
+ kpreempt_enable();
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+dcpc_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ dcpc_probe_t *pp = parg;
+ int i, found = 0;
+ cpu_t *c;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Bail out if the counters are being used by a libcpc consumer.
+ */
+ rw_enter(&kcpc_cpuctx_lock, RW_READER);
+ if (kcpc_cpuctx > 0) {
+ rw_exit(&kcpc_cpuctx_lock);
+ return (-1);
+ }
+
+ dtrace_cpc_in_use++;
+ rw_exit(&kcpc_cpuctx_lock);
+
+ /*
+ * Locate this enabling in the first free entry of the active
+ * request array.
+ */
+ for (i = 0; i < cpc_ncounters; i++) {
+ if (dcpc_actv_reqs[i] == NULL) {
+ dcpc_actv_reqs[i] = pp;
+ pp->dcpc_actv_req_idx = i;
+ found = 1;
+ break;
+ }
+ }
+
+ /*
+ * If we couldn't find a slot for this probe then there is no
+ * room at the inn.
+ */
+ if (!found) {
+ dtrace_cpc_in_use--;
+ return (-1);
+ }
+
+ ASSERT(pp->dcpc_actv_req_idx >= 0);
+
+ /*
+ * DTrace is taking over CPC contexts, so stop collecting
+ * capacity/utilization data for all CPUs.
+ */
+ if (dtrace_cpc_in_use == 1)
+ cu_disable();
+
+ /*
+ * The following must hold true if we are to (attempt to) enable
+ * this request:
+ *
+ * 1) No enablings currently exist. We allow all platforms to
+ * proceed if this is true.
+ *
+ * OR
+ *
+ * 2) If the platform is multi overflow capable and there are
+ * less valid enablings than there are counters. There is no
+ * guarantee that a platform can accommodate as many events as
+ * it has counters for but we will at least try to program
+ * up to that many requests.
+ *
+ * The 'dcpc_enablings' variable is implictly protected by locking
+ * provided by the DTrace framework and the cpu management framework.
+ */
+ if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap &&
+ dcpc_enablings < cpc_ncounters)) {
+ /*
+ * Before attempting to program the first enabling we need to
+ * invalidate any lwp-based contexts and lay claim to the
+ * overflow interrupt mechanism.
+ */
+ if (dcpc_enablings == 0) {
+ kcpc_invalidate_all();
+ dcpc_claim_interrupts();
+ }
+
+ if (dcpc_program_event(pp) == 0) {
+ dcpc_enablings++;
+ return (0);
+ }
+ }
+
+ /*
+ * If active enablings existed before we failed to enable this probe
+ * on a multi event capable platform then we need to restart counters
+ * as they will have been stopped in the attempted configuration. The
+ * context should now just contain the request prior to this failed
+ * enabling.
+ */
+ if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) {
+ c = cpu_list;
+
+ ASSERT(dcpc_mult_ovf_cap == 1);
+ do {
+ /*
+ * Skip CPUs that are currently offline.
+ */
+ if (c->cpu_flags & CPU_OFFLINE)
+ continue;
+
+ kcpc_cpu_program(c, c->cpu_cpc_ctx);
+ } while ((c = c->cpu_next) != cpu_list);
+ }
+
+ /*
+ * Give up any claim to the overflow interrupt mechanism if no
+ * dcpc based enablings exist.
+ */
+ if (dcpc_enablings == 0)
+ dcpc_surrender_interrupts();
+
+ dtrace_cpc_in_use--;
+ dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
+ pp->dcpc_actv_req_idx = pp->dcpc_picno = -1;
+
+ /*
+ * If all probes are removed, enable capacity/utilization data
+ * collection for every CPU.
+ */
+ if (dtrace_cpc_in_use == 0)
+ cu_enable();
+
+ return (-1);
+}
+
+/*
+ * If only one enabling is active then remove the context and free
+ * everything up. If there are multiple enablings active then remove this
+ * one, its associated meta-data and re-program the hardware.
+ */
+/*ARGSUSED*/
+static void
+dcpc_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ cpu_t *c;
+ dcpc_probe_t *pp = parg;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ kpreempt_disable();
+
+ /*
+ * This probe didn't actually make it as far as being fully enabled
+ * so we needn't do anything with it.
+ */
+ if (pp->dcpc_enabled == 0) {
+ /*
+ * If we actually allocated this request a slot in the
+ * request array but failed to enabled it then remove the
+ * entry in the array.
+ */
+ if (pp->dcpc_actv_req_idx >= 0) {
+ dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
+ pp->dcpc_actv_req_idx = pp->dcpc_picno =
+ pp->dcpc_disabling = -1;
+ }
+
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * If this is the only enabling then stop all the counters and
+ * free up the meta-data.
+ */
+ if (dcpc_enablings == 1) {
+ ASSERT(dtrace_cpc_in_use == 1);
+
+ dcpc_block_interrupts();
+
+ c = cpu_list;
+
+ do {
+ dcpc_disable_cpu(c);
+ } while ((c = c->cpu_next) != cpu_list);
+
+ dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
+ dcpc_surrender_interrupts();
+ } else {
+ /*
+ * This platform can support multiple overflow events and
+ * the enabling being disabled is not the last one. Remove this
+ * enabling and re-program the hardware with the new config.
+ */
+ ASSERT(dcpc_mult_ovf_cap);
+ ASSERT(dcpc_enablings > 1);
+
+ pp->dcpc_disabling = 1;
+ (void) dcpc_program_event(pp);
+ }
+
+ kpreempt_enable();
+
+ dcpc_enablings--;
+ dtrace_cpc_in_use--;
+ pp->dcpc_enabled = 0;
+ pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
+
+ /*
+ * If all probes are removed, enable capacity/utilization data
+ * collection for every CPU
+ */
+ if (dtrace_cpc_in_use == 0)
+ cu_enable();
+}
+
+/*ARGSUSED*/
+static int
+dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg)
+{
+ cpu_t *c;
+ uint8_t *state;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ switch (what) {
+ case CPU_OFF:
+ /*
+ * Offline CPUs are not allowed to take part so remove this
+ * CPU if we are actively tracing.
+ */
+ if (dtrace_cpc_in_use) {
+ c = cpu_get(cpu);
+ state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state;
+
+ /*
+ * Indicate that a configuration is in process in
+ * order to stop overflow interrupts being processed
+ * on this CPU while we disable it.
+ */
+ while (atomic_cas_8(state, DCPC_INTR_FREE,
+ DCPC_INTR_CONFIG) != DCPC_INTR_FREE)
+ continue;
+
+ dcpc_disable_cpu(c);
+
+ /*
+ * Reset this CPUs interrupt state as the configuration
+ * has ended.
+ */
+ cpu_core[c->cpu_id].cpuc_dcpc_intr_state =
+ DCPC_INTR_FREE;
+ membar_producer();
+ }
+ break;
+
+ case CPU_ON:
+ case CPU_SETUP:
+ /*
+ * This CPU is being initialized or brought online so program
+ * it with the current request set if we are actively tracing.
+ */
+ if (dtrace_cpc_in_use) {
+ c = cpu_get(cpu);
+ (void) dcpc_program_cpu_event(c);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+static dtrace_pattr_t dcpc_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+};
+
+static dtrace_pops_t dcpc_pops = {
+ dcpc_provide,
+ NULL,
+ dcpc_enable,
+ dcpc_disable,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ dcpc_mode,
+ dcpc_destroy
+};
+
+/*ARGSUSED*/
+static int
+dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)dcpc_devi;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ }
+ return (error);
+}
+
+static int
+dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (dtrace_unregister(dcpc_pid) != 0)
+ return (DDI_FAILURE);
+
+ ddi_remove_minor_node(devi, NULL);
+
+ mutex_enter(&cpu_lock);
+ unregister_cpu_setup_func(dcpc_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+
+ kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *));
+
+ kcpc_unregister_dcpc();
+
+ return (DDI_SUCCESS);
+}
+
+static int
+dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ uint_t caps;
+ char *attrs;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (kcpc_pcbe_loaded() == -1)
+ return (DDI_FAILURE);
+
+ caps = kcpc_pcbe_capabilities();
+
+ if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) {
+ cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\
+ " on this processor");
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0,
+ DDI_PSEUDO, NULL) == DDI_FAILURE ||
+ dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL,
+ NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) {
+ ddi_remove_minor_node(devi, NULL);
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&cpu_lock);
+ register_cpu_setup_func(dcpc_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+
+ dcpc_ovf_mask = (1 << cpc_ncounters) - 1;
+ ASSERT(dcpc_ovf_mask != 0);
+
+ if (caps & CPC_CAP_OVERFLOW_PRECISE)
+ dcpc_mult_ovf_cap = 1;
+
+ /*
+ * Determine which, if any, mask attribute the back-end can use.
+ */
+ attrs = kcpc_list_attrs();
+ if (strstr(attrs, "umask") != NULL)
+ dcpc_mask_type |= DCPC_UMASK;
+ else if (strstr(attrs, "emask") != NULL)
+ dcpc_mask_type |= DCPC_EMASK;
+
+ /*
+ * The dcpc_actv_reqs array is used to store the requests that
+ * we currently have programmed. The order of requests in this
+ * array is not necessarily the order that the event appears in
+ * the kcpc_request_t array. Once entered into a slot in the array
+ * the entry is not moved until it's removed.
+ */
+ dcpc_actv_reqs =
+ kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP);
+
+ dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
+ DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT);
+
+ kcpc_register_dcpc(dcpc_fire);
+
+ ddi_report_dev(devi);
+ dcpc_devi = devi;
+
+ return (DDI_SUCCESS);
+}
+
+static struct cb_ops dcpc_cb_ops = {
+ dcpc_open, /* open */
+ nodev, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ nodev, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops dcpc_ops = {
+ DEVO_REV, /* devo_rev, */
+ 0, /* refcnt */
+ dcpc_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ dcpc_attach, /* attach */
+ dcpc_detach, /* detach */
+ nodev, /* reset */
+ &dcpc_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed /* quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type */
+ "DTrace CPC Module", /* name of module */
+ &dcpc_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c
index c721386280f8..0c5e4b3a011a 100644
--- a/uts/common/dtrace/dtrace.c
+++ b/uts/common/dtrace/dtrace.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* DTrace - Dynamic Tracing for Solaris
*
@@ -121,7 +119,7 @@ dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
size_t dtrace_global_maxsize = (16 * 1024);
size_t dtrace_actions_max = (16 * 1024);
size_t dtrace_retain_max = 1024;
-dtrace_optval_t dtrace_helper_actions_max = 32;
+dtrace_optval_t dtrace_helper_actions_max = 1024;
dtrace_optval_t dtrace_helper_providers_max = 32;
dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
size_t dtrace_strsize_default = 256;
@@ -146,6 +144,7 @@ int dtrace_err_verbose;
hrtime_t dtrace_deadman_interval = NANOSEC;
hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
+hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
/*
* DTrace External Variables
@@ -186,7 +185,9 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
static dtrace_genid_t dtrace_probegen; /* current probe generation */
static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
+static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
+static int dtrace_dynvar_failclean; /* dynvars failed to clean */
/*
* DTrace Locking
@@ -240,10 +241,16 @@ static void
dtrace_nullop(void)
{}
+static int
+dtrace_enable_nullop(void)
+{
+ return (0);
+}
+
static dtrace_pops_t dtrace_provider_ops = {
(void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
(void (*)(void *, struct modctl *))dtrace_nullop,
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
@@ -427,6 +434,7 @@ dtrace_load##bits(uintptr_t addr) \
#define DTRACE_DYNHASH_SINK 1
#define DTRACE_DYNHASH_VALID 2
+#define DTRACE_MATCH_FAIL -1
#define DTRACE_MATCH_NEXT 0
#define DTRACE_MATCH_DONE 1
#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
@@ -453,11 +461,13 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
static void dtrace_enabling_provide(dtrace_provider_t *);
static int dtrace_enabling_match(dtrace_enabling_t *, int *);
static void dtrace_enabling_matchall(void);
+static void dtrace_enabling_reap(void);
static dtrace_state_t *dtrace_anon_grab(void);
static uint64_t dtrace_helper(int, dtrace_mstate_t *,
dtrace_state_t *, uint64_t, uint64_t);
static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
static void dtrace_buffer_drop(dtrace_buffer_t *);
+static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
dtrace_state_t *, dtrace_mstate_t *);
static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
@@ -1098,10 +1108,13 @@ dtrace_priv_proc_common_nocd()
}
static int
-dtrace_priv_proc_destructive(dtrace_state_t *state)
+dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
{
int action = state->dts_cred.dcr_action;
+ if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
+ goto bad;
+
if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
dtrace_priv_proc_common_zone(state) == 0)
goto bad;
@@ -1123,15 +1136,17 @@ bad:
}
static int
-dtrace_priv_proc_control(dtrace_state_t *state)
+dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
{
- if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
- return (1);
+ if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
+ if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
+ return (1);
- if (dtrace_priv_proc_common_zone(state) &&
- dtrace_priv_proc_common_user(state) &&
- dtrace_priv_proc_common_nocd())
- return (1);
+ if (dtrace_priv_proc_common_zone(state) &&
+ dtrace_priv_proc_common_user(state) &&
+ dtrace_priv_proc_common_nocd())
+ return (1);
+ }
cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
@@ -1139,9 +1154,10 @@ dtrace_priv_proc_control(dtrace_state_t *state)
}
static int
-dtrace_priv_proc(dtrace_state_t *state)
+dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
{
- if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
+ if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
+ (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
return (1);
cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
@@ -1172,6 +1188,109 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state)
}
/*
+ * Determine if the dte_cond of the specified ECB allows for processing of
+ * the current probe to continue. Note that this routine may allow continued
+ * processing, but with access(es) stripped from the mstate's dtms_access
+ * field.
+ */
+static int
+dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
+ dtrace_ecb_t *ecb)
+{
+ dtrace_probe_t *probe = ecb->dte_probe;
+ dtrace_provider_t *prov = probe->dtpr_provider;
+ dtrace_pops_t *pops = &prov->dtpv_pops;
+ int mode = DTRACE_MODE_NOPRIV_DROP;
+
+ ASSERT(ecb->dte_cond);
+
+ if (pops->dtps_mode != NULL) {
+ mode = pops->dtps_mode(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg);
+
+ ASSERT((mode & DTRACE_MODE_USER) ||
+ (mode & DTRACE_MODE_KERNEL));
+ ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
+ (mode & DTRACE_MODE_NOPRIV_DROP));
+ }
+
+ /*
+ * If the dte_cond bits indicate that this consumer is only allowed to
+ * see user-mode firings of this probe, call the provider's dtps_mode()
+ * entry point to check that the probe was fired while in a user
+ * context. If that's not the case, use the policy specified by the
+ * provider to determine if we drop the probe or merely restrict
+ * operation.
+ */
+ if (ecb->dte_cond & DTRACE_COND_USERMODE) {
+ ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
+
+ if (!(mode & DTRACE_MODE_USER)) {
+ if (mode & DTRACE_MODE_NOPRIV_DROP)
+ return (0);
+
+ mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
+ }
+ }
+
+ /*
+ * This is more subtle than it looks. We have to be absolutely certain
+ * that CRED() isn't going to change out from under us so it's only
+ * legit to examine that structure if we're in constrained situations.
+ * Currently, the only times we'll this check is if a non-super-user
+ * has enabled the profile or syscall providers -- providers that
+ * allow visibility of all processes. For the profile case, the check
+ * above will ensure that we're examining a user context.
+ */
+ if (ecb->dte_cond & DTRACE_COND_OWNER) {
+ cred_t *cr;
+ cred_t *s_cr = state->dts_cred.dcr_cred;
+ proc_t *proc;
+
+ ASSERT(s_cr != NULL);
+
+ if ((cr = CRED()) == NULL ||
+ s_cr->cr_uid != cr->cr_uid ||
+ s_cr->cr_uid != cr->cr_ruid ||
+ s_cr->cr_uid != cr->cr_suid ||
+ s_cr->cr_gid != cr->cr_gid ||
+ s_cr->cr_gid != cr->cr_rgid ||
+ s_cr->cr_gid != cr->cr_sgid ||
+ (proc = ttoproc(curthread)) == NULL ||
+ (proc->p_flag & SNOCD)) {
+ if (mode & DTRACE_MODE_NOPRIV_DROP)
+ return (0);
+
+ mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
+ }
+ }
+
+ /*
+ * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
+ * in our zone, check to see if our mode policy is to restrict rather
+ * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
+ * and DTRACE_ACCESS_ARGS
+ */
+ if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
+ cred_t *cr;
+ cred_t *s_cr = state->dts_cred.dcr_cred;
+
+ ASSERT(s_cr != NULL);
+
+ if ((cr = CRED()) == NULL ||
+ s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
+ if (mode & DTRACE_MODE_NOPRIV_DROP)
+ return (0);
+
+ mstate->dtms_access &=
+ ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
+ }
+ }
+
+ return (1);
+}
+
+/*
* Note: not called from probe context. This function is called
* asynchronously (and at a regular interval) from outside of probe context to
* clean the dirty dynamic variable lists on all CPUs. Dynamic variable
@@ -1182,12 +1301,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
{
dtrace_dynvar_t *dirty;
dtrace_dstate_percpu_t *dcpu;
- int i, work = 0;
+ dtrace_dynvar_t **rinsep;
+ int i, j, work = 0;
for (i = 0; i < NCPU; i++) {
dcpu = &dstate->dtds_percpu[i];
-
- ASSERT(dcpu->dtdsc_rinsing == NULL);
+ rinsep = &dcpu->dtdsc_rinsing;
/*
* If the dirty list is NULL, there is no dirty work to do.
@@ -1195,14 +1314,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
if (dcpu->dtdsc_dirty == NULL)
continue;
- /*
- * If the clean list is non-NULL, then we're not going to do
- * any work for this CPU -- it means that there has not been
- * a dtrace_dynvar() allocation on this CPU (or from this CPU)
- * since the last time we cleaned house.
- */
- if (dcpu->dtdsc_clean != NULL)
+ if (dcpu->dtdsc_rinsing != NULL) {
+ /*
+ * If the rinsing list is non-NULL, then it is because
+ * this CPU was selected to accept another CPU's
+ * dirty list -- and since that time, dirty buffers
+ * have accumulated. This is a highly unlikely
+ * condition, but we choose to ignore the dirty
+ * buffers -- they'll be picked up a future cleanse.
+ */
continue;
+ }
+
+ if (dcpu->dtdsc_clean != NULL) {
+ /*
+ * If the clean list is non-NULL, then we're in a
+ * situation where a CPU has done deallocations (we
+ * have a non-NULL dirty list) but no allocations (we
+ * also have a non-NULL clean list). We can't simply
+ * move the dirty list into the clean list on this
+ * CPU, yet we also don't want to allow this condition
+ * to persist, lest a short clean list prevent a
+ * massive dirty list from being cleaned (which in
+ * turn could lead to otherwise avoidable dynamic
+ * drops). To deal with this, we look for some CPU
+ * with a NULL clean list, NULL dirty list, and NULL
+ * rinsing list -- and then we borrow this CPU to
+ * rinse our dirty list.
+ */
+ for (j = 0; j < NCPU; j++) {
+ dtrace_dstate_percpu_t *rinser;
+
+ rinser = &dstate->dtds_percpu[j];
+
+ if (rinser->dtdsc_rinsing != NULL)
+ continue;
+
+ if (rinser->dtdsc_dirty != NULL)
+ continue;
+
+ if (rinser->dtdsc_clean != NULL)
+ continue;
+
+ rinsep = &rinser->dtdsc_rinsing;
+ break;
+ }
+
+ if (j == NCPU) {
+ /*
+ * We were unable to find another CPU that
+ * could accept this dirty list -- we are
+ * therefore unable to clean it now.
+ */
+ dtrace_dynvar_failclean++;
+ continue;
+ }
+ }
work = 1;
@@ -1219,7 +1386,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
* on a hash chain, either the dirty list or the
* rinsing list for some CPU must be non-NULL.)
*/
- dcpu->dtdsc_rinsing = dirty;
+ *rinsep = dirty;
dtrace_membar_producer();
} while (dtrace_casptr(&dcpu->dtdsc_dirty,
dirty, NULL) != dirty);
@@ -1650,7 +1817,7 @@ retry:
ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
/*
- * Now we'll move the clean list to the free list.
+ * Now we'll move the clean list to our free list.
* It's impossible for this to fail: the only way
* the free list can be updated is through this
* code path, and only one CPU can own the clean list.
@@ -1663,6 +1830,7 @@ retry:
* owners of the clean lists out before resetting
* the clean lists.
*/
+ dcpu = &dstate->dtds_percpu[me];
rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
ASSERT(rval == NULL);
goto retry;
@@ -1804,6 +1972,75 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
lquanta[levels + 1] += incr;
}
+static int
+dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
+ uint16_t high, uint16_t nsteps, int64_t value)
+{
+ int64_t this = 1, last, next;
+ int base = 1, order;
+
+ ASSERT(factor <= nsteps);
+ ASSERT(nsteps % factor == 0);
+
+ for (order = 0; order < low; order++)
+ this *= factor;
+
+ /*
+ * If our value is less than our factor taken to the power of the
+ * low order of magnitude, it goes into the zeroth bucket.
+ */
+ if (value < (last = this))
+ return (0);
+
+ for (this *= factor; order <= high; order++) {
+ int nbuckets = this > nsteps ? nsteps : this;
+
+ if ((next = this * factor) < this) {
+ /*
+ * We should not generally get log/linear quantizations
+ * with a high magnitude that allows 64-bits to
+ * overflow, but we nonetheless protect against this
+ * by explicitly checking for overflow, and clamping
+ * our value accordingly.
+ */
+ value = this - 1;
+ }
+
+ if (value < this) {
+ /*
+ * If our value lies within this order of magnitude,
+ * determine its position by taking the offset within
+ * the order of magnitude, dividing by the bucket
+ * width, and adding to our (accumulated) base.
+ */
+ return (base + (value - last) / (this / nbuckets));
+ }
+
+ base += nbuckets - (nbuckets / factor);
+ last = this;
+ this = next;
+ }
+
+ /*
+ * Our value is greater than or equal to our factor taken to the
+ * power of one plus the high magnitude -- return the top bucket.
+ */
+ return (base);
+}
+
+static void
+dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
+{
+ uint64_t arg = *llquanta++;
+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
+ uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
+ uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
+ uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
+
+ llquanta[dtrace_aggregate_llquantize_bucket(factor,
+ low, high, nsteps, nval)] += incr;
+}
+
/*ARGSUSED*/
static void
dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
@@ -2585,6 +2822,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
switch (v) {
case DIF_VAR_ARGS:
+ if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
+ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
+ CPU_DTRACE_KPRIV;
+ return (0);
+ }
+
ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
if (ndx >= sizeof (mstate->dtms_arg) /
sizeof (mstate->dtms_arg[0])) {
@@ -2620,7 +2863,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
case DIF_VAR_UREGS: {
klwp_t *lwp;
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
if ((lwp = curthread->t_lwp) == NULL) {
@@ -2632,6 +2875,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (dtrace_getreg(lwp->lwp_regs, ndx));
}
+ case DIF_VAR_VMREGS: {
+ uint64_t rval;
+
+ if (!dtrace_priv_kernel(state))
+ return (0);
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+
+ rval = dtrace_getvmreg(ndx,
+ &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
+
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+ return (rval);
+ }
+
case DIF_VAR_CURTHREAD:
if (!dtrace_priv_kernel(state))
return (0);
@@ -2684,7 +2943,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (mstate->dtms_stackdepth);
case DIF_VAR_USTACKDEPTH:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
/*
@@ -2739,7 +2998,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (mstate->dtms_caller);
case DIF_VAR_UCALLER:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
@@ -2787,7 +3046,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_PID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2809,7 +3068,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
case DIF_VAR_PPID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2836,7 +3095,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_tid);
case DIF_VAR_EXECNAME:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2856,7 +3115,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_ZONENAME:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2876,7 +3135,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_UID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2897,7 +3156,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
case DIF_VAR_GID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2919,7 +3178,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
case DIF_VAR_ERRNO: {
klwp_t *lwp;
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -3259,7 +3518,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
- dtrace_priv_proc_control(state) &&
+ dtrace_priv_proc_control(state, mstate) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyout(kaddr, uaddr, size, flags);
@@ -3274,7 +3533,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
- dtrace_priv_proc_control(state) &&
+ dtrace_priv_proc_control(state, mstate) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyoutstr(kaddr, uaddr, size, flags);
@@ -3600,7 +3859,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
int64_t index = (int64_t)tupregs[1].dttk_value;
int64_t remaining = (int64_t)tupregs[2].dttk_value;
size_t len = dtrace_strlen((char *)s, size);
- int64_t i = 0;
+ int64_t i;
if (!dtrace_canload(s, len + 1, mstate, vstate)) {
regs[rd] = NULL;
@@ -3645,7 +3904,54 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
break;
}
- case DIF_SUBR_GETMAJOR:
+ case DIF_SUBR_TOUPPER:
+ case DIF_SUBR_TOLOWER: {
+ uintptr_t s = tupregs[0].dttk_value;
+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
+ char *dest = (char *)mstate->dtms_scratch_ptr, c;
+ size_t len = dtrace_strlen((char *)s, size);
+ char lower, upper, convert;
+ int64_t i;
+
+ if (subr == DIF_SUBR_TOUPPER) {
+ lower = 'a';
+ upper = 'z';
+ convert = 'A';
+ } else {
+ lower = 'A';
+ upper = 'Z';
+ convert = 'a';
+ }
+
+ if (!dtrace_canload(s, len + 1, mstate, vstate)) {
+ regs[rd] = NULL;
+ break;
+ }
+
+ if (!DTRACE_INSCRATCH(mstate, size)) {
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
+ regs[rd] = NULL;
+ break;
+ }
+
+ for (i = 0; i < size - 1; i++) {
+ if ((c = dtrace_load8(s + i)) == '\0')
+ break;
+
+ if (c >= lower && c <= upper)
+ c = convert + (c - lower);
+
+ dest[i] = c;
+ }
+
+ ASSERT(i < size);
+ dest[i] = '\0';
+ regs[rd] = (uintptr_t)dest;
+ mstate->dtms_scratch_ptr += size;
+ break;
+ }
+
+case DIF_SUBR_GETMAJOR:
#ifdef _LP64
regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
#else
@@ -3907,9 +4213,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
case DIF_SUBR_LLTOSTR: {
int64_t i = (int64_t)tupregs[0].dttk_value;
- int64_t val = i < 0 ? i * -1 : i;
- uint64_t size = 22; /* enough room for 2^64 in decimal */
+ uint64_t val, digit;
+ uint64_t size = 65; /* enough room for 2^64 in binary */
char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
+ int base = 10;
+
+ if (nargs > 1) {
+ if ((base = tupregs[1].dttk_value) <= 1 ||
+ base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
+ *flags |= CPU_DTRACE_ILLOP;
+ break;
+ }
+ }
+
+ val = (base == 10 && i < 0) ? i * -1 : i;
if (!DTRACE_INSCRATCH(mstate, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
@@ -3917,13 +4234,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
break;
}
- for (*end-- = '\0'; val; val /= 10)
- *end-- = '0' + (val % 10);
+ for (*end-- = '\0'; val; val /= base) {
+ if ((digit = val % base) <= '9' - '0') {
+ *end-- = '0' + digit;
+ } else {
+ *end-- = 'a' + (digit - ('9' - '0') - 1);
+ }
+ }
+
+ if (i == 0 && base == 16)
+ *end-- = '0';
+
+ if (base == 16)
+ *end-- = 'x';
- if (i == 0)
+ if (i == 0 || base == 8 || base == 16)
*end-- = '0';
- if (i < 0)
+ if (i < 0 && base == 10)
*end-- = '-';
regs[rd] = (uintptr_t)end + 1;
@@ -5558,6 +5886,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
dtrace_vstate_t *vstate = &state->dts_vstate;
dtrace_provider_t *prov = probe->dtpr_provider;
+ uint64_t tracememsize = 0;
int committed = 0;
caddr_t tomax;
@@ -5578,6 +5907,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
#endif
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
+ mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
@@ -5615,65 +5945,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
}
}
- if (ecb->dte_cond) {
- /*
- * If the dte_cond bits indicate that this
- * consumer is only allowed to see user-mode firings
- * of this probe, call the provider's dtps_usermode()
- * entry point to check that the probe was fired
- * while in a user context. Skip this ECB if that's
- * not the case.
- */
- if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
- prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg) == 0)
- continue;
-
- /*
- * This is more subtle than it looks. We have to be
- * absolutely certain that CRED() isn't going to
- * change out from under us so it's only legit to
- * examine that structure if we're in constrained
- * situations. Currently, the only times we'll this
- * check is if a non-super-user has enabled the
- * profile or syscall providers -- providers that
- * allow visibility of all processes. For the
- * profile case, the check above will ensure that
- * we're examining a user context.
- */
- if (ecb->dte_cond & DTRACE_COND_OWNER) {
- cred_t *cr;
- cred_t *s_cr =
- ecb->dte_state->dts_cred.dcr_cred;
- proc_t *proc;
-
- ASSERT(s_cr != NULL);
-
- if ((cr = CRED()) == NULL ||
- s_cr->cr_uid != cr->cr_uid ||
- s_cr->cr_uid != cr->cr_ruid ||
- s_cr->cr_uid != cr->cr_suid ||
- s_cr->cr_gid != cr->cr_gid ||
- s_cr->cr_gid != cr->cr_rgid ||
- s_cr->cr_gid != cr->cr_sgid ||
- (proc = ttoproc(curthread)) == NULL ||
- (proc->p_flag & SNOCD))
- continue;
- }
-
- if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
- cred_t *cr;
- cred_t *s_cr =
- ecb->dte_state->dts_cred.dcr_cred;
-
- ASSERT(s_cr != NULL);
-
- if ((cr = CRED()) == NULL ||
- s_cr->cr_zone->zone_id !=
- cr->cr_zone->zone_id)
- continue;
- }
- }
+ if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
+ continue;
if (now - state->dts_alive > dtrace_deadman_timeout) {
/*
@@ -5713,9 +5986,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
mstate.dtms_present |= DTRACE_MSTATE_EPID;
if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
- mstate.dtms_access = DTRACE_ACCESS_KERNEL;
- else
- mstate.dtms_access = 0;
+ mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
if (pred != NULL) {
dtrace_difo_t *dp = pred->dtp_difo;
@@ -5775,7 +6046,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
switch (act->dta_kind) {
case DTRACEACT_STOP:
- if (dtrace_priv_proc_destructive(state))
+ if (dtrace_priv_proc_destructive(state,
+ &mstate))
dtrace_action_stop();
continue;
@@ -5802,7 +6074,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_JSTACK:
case DTRACEACT_USTACK:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, &mstate))
continue;
/*
@@ -5835,6 +6107,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
}
+ /*
+ * Clear the string space, since there's no
+ * helper to do it for us.
+ */
+ if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
+ int depth = DTRACE_USTACK_NFRAMES(
+ rec->dtrd_arg);
+ size_t strsize = DTRACE_USTACK_STRSIZE(
+ rec->dtrd_arg);
+ uint64_t *buf = (uint64_t *)(tomax +
+ valoffs);
+ void *strspace = &buf[depth + 1];
+
+ dtrace_bzero(strspace,
+ MIN(depth, strsize));
+ }
+
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_getupcstack((uint64_t *)
(tomax + valoffs),
@@ -5888,7 +6177,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
case DTRACEACT_RAISE:
- if (dtrace_priv_proc_destructive(state))
+ if (dtrace_priv_proc_destructive(state,
+ &mstate))
dtrace_action_raise(val);
continue;
@@ -5915,6 +6205,11 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_PRINTA:
case DTRACEACT_SYSTEM:
case DTRACEACT_FREOPEN:
+ case DTRACEACT_TRACEMEM:
+ break;
+
+ case DTRACEACT_TRACEMEM_DYNSIZE:
+ tracememsize = val;
break;
case DTRACEACT_SYM:
@@ -5928,7 +6223,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_UADDR: {
struct pid *pid = curthread->t_procp->p_pidp;
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, &mstate))
continue;
DTRACE_STORE(uint64_t, tomax,
@@ -5980,6 +6275,12 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
uintptr_t end = valoffs + size;
+ if (tracememsize != 0 &&
+ valoffs + tracememsize < end) {
+ end = valoffs + tracememsize;
+ tracememsize = 0;
+ }
+
if (!dtrace_vcanload((void *)(uintptr_t)val,
&dp->dtdo_rtype, &mstate, vstate))
continue;
@@ -6655,7 +6956,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
{
dtrace_probe_t template, *probe;
dtrace_hash_t *hash = NULL;
- int len, best = INT_MAX, nmatched = 0;
+ int len, rc, best = INT_MAX, nmatched = 0;
dtrace_id_t i;
ASSERT(MUTEX_HELD(&dtrace_lock));
@@ -6667,7 +6968,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
if (pkp->dtpk_id != DTRACE_IDNONE) {
if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
- (void) (*matched)(probe, arg);
+ if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
nmatched++;
}
return (nmatched);
@@ -6714,8 +7016,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
+ if ((rc = (*matched)(probe, arg)) !=
+ DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
break;
+ }
}
return (nmatched);
@@ -6734,8 +7040,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
+ if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
break;
+ }
}
return (nmatched);
@@ -6852,9 +7161,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
if ((priv & DTRACE_PRIV_KERNEL) &&
(priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
- pops->dtps_usermode == NULL) {
+ pops->dtps_mode == NULL) {
cmn_err(CE_WARN, "failed to register provider '%s': need "
- "dtps_usermode() op for given privilege attributes", name);
+ "dtps_mode() op for given privilege attributes", name);
return (EINVAL);
}
@@ -6951,11 +7260,11 @@ dtrace_unregister(dtrace_provider_id_t id)
{
dtrace_provider_t *old = (dtrace_provider_t *)id;
dtrace_provider_t *prev = NULL;
- int i, self = 0;
+ int i, self = 0, noreap = 0;
dtrace_probe_t *probe, *first = NULL;
if (old->dtpv_pops.dtps_enable ==
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
/*
* If DTrace itself is the provider, we're called with locks
* already held.
@@ -7008,14 +7317,31 @@ dtrace_unregister(dtrace_provider_id_t id)
continue;
/*
- * We have at least one ECB; we can't remove this provider.
+ * If we are trying to unregister a defunct provider, and the
+ * provider was made defunct within the interval dictated by
+ * dtrace_unregister_defunct_reap, we'll (asynchronously)
+ * attempt to reap our enablings. To denote that the provider
+ * should reattempt to unregister itself at some point in the
+ * future, we will return a differentiable error code (EAGAIN
+ * instead of EBUSY) in this case.
*/
+ if (dtrace_gethrtime() - old->dtpv_defunct >
+ dtrace_unregister_defunct_reap)
+ noreap = 1;
+
if (!self) {
mutex_exit(&dtrace_lock);
mutex_exit(&mod_lock);
mutex_exit(&dtrace_provider_lock);
}
- return (EBUSY);
+
+ if (noreap)
+ return (EBUSY);
+
+ (void) taskq_dispatch(dtrace_taskq,
+ (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
+
+ return (EAGAIN);
}
/*
@@ -7101,12 +7427,12 @@ dtrace_invalidate(dtrace_provider_id_t id)
dtrace_provider_t *pvp = (dtrace_provider_t *)id;
ASSERT(pvp->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
- pvp->dtpv_defunct = 1;
+ pvp->dtpv_defunct = dtrace_gethrtime();
mutex_exit(&dtrace_lock);
mutex_exit(&dtrace_provider_lock);
@@ -7142,7 +7468,7 @@ dtrace_condense(dtrace_provider_id_t id)
* Make sure this isn't the dtrace provider itself.
*/
ASSERT(prov->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
@@ -8103,7 +8429,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
break;
default:
- err += efunc(dp->dtdo_len - 1, "bad return size");
+ err += efunc(dp->dtdo_len - 1, "bad return size\n");
}
}
@@ -9096,7 +9422,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
return (ecb);
}
-static void
+static int
dtrace_ecb_enable(dtrace_ecb_t *ecb)
{
dtrace_probe_t *probe = ecb->dte_probe;
@@ -9109,7 +9435,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
/*
* This is the NULL probe -- there's nothing to do.
*/
- return;
+ return (0);
}
if (probe->dtpr_ecb == NULL) {
@@ -9123,8 +9449,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
if (ecb->dte_predicate != NULL)
probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
- prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg);
+ return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg));
} else {
/*
* This probe is already active. Swing the last pointer to
@@ -9137,6 +9463,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
probe->dtpr_predcache = 0;
dtrace_sync();
+ return (0);
}
}
@@ -9312,6 +9639,35 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
break;
}
+ case DTRACEAGG_LLQUANTIZE: {
+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
+ uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
+ uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
+ uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
+ int64_t v;
+
+ agg->dtag_initial = desc->dtad_arg;
+ agg->dtag_aggregate = dtrace_aggregate_llquantize;
+
+ if (factor < 2 || low >= high || nsteps < factor)
+ goto err;
+
+ /*
+ * Now check that the number of steps evenly divides a power
+ * of the factor. (This assures both integer bucket size and
+ * linearity within each magnitude.)
+ */
+ for (v = factor; v < nsteps; v *= factor)
+ continue;
+
+ if ((v % nsteps) || (nsteps % factor))
+ goto err;
+
+ size = (dtrace_aggregate_llquantize_bucket(factor,
+ low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
+ break;
+ }
+
case DTRACEAGG_AVG:
agg->dtag_aggregate = dtrace_aggregate_avg;
size = sizeof (uint64_t) * 2;
@@ -9481,12 +9837,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
case DTRACEACT_PRINTA:
case DTRACEACT_SYSTEM:
case DTRACEACT_FREOPEN:
+ case DTRACEACT_DIFEXPR:
/*
* We know that our arg is a string -- turn it into a
* format.
*/
if (arg == NULL) {
- ASSERT(desc->dtad_kind == DTRACEACT_PRINTA);
+ ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
+ desc->dtad_kind == DTRACEACT_DIFEXPR);
format = 0;
} else {
ASSERT(arg != NULL);
@@ -9497,7 +9855,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
/*FALLTHROUGH*/
case DTRACEACT_LIBACT:
- case DTRACEACT_DIFEXPR:
+ case DTRACEACT_TRACEMEM:
+ case DTRACEACT_TRACEMEM_DYNSIZE:
if (dp == NULL)
return (EINVAL);
@@ -9920,7 +10279,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
return (DTRACE_MATCH_DONE);
- dtrace_ecb_enable(ecb);
+ if (dtrace_ecb_enable(ecb) < 0)
+ return (DTRACE_MATCH_FAIL);
+
return (DTRACE_MATCH_NEXT);
}
@@ -9978,6 +10339,7 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
caddr_t tomax = buf->dtb_tomax;
caddr_t xamot = buf->dtb_xamot;
dtrace_icookie_t cookie;
+ hrtime_t now = dtrace_gethrtime();
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
@@ -9993,6 +10355,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
buf->dtb_drops = 0;
buf->dtb_errors = 0;
buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
+ buf->dtb_interval = now - buf->dtb_switched;
+ buf->dtb_switched = now;
dtrace_interrupt_enable(cookie);
}
@@ -10025,14 +10389,17 @@ dtrace_buffer_activate(dtrace_state_t *state)
static int
dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
- processorid_t cpu)
+ processorid_t cpu, int *factor)
{
cpu_t *cp;
dtrace_buffer_t *buf;
+ int allocated = 0, desired = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(MUTEX_HELD(&dtrace_lock));
+ *factor = 1;
+
if (size > dtrace_nonroot_maxsize &&
!PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
return (EFBIG);
@@ -10057,7 +10424,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
ASSERT(buf->dtb_xamot == NULL);
- if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
+ if ((buf->dtb_tomax = kmem_zalloc(size,
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL)
goto err;
buf->dtb_size = size;
@@ -10068,7 +10436,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
if (flags & DTRACEBUF_NOSWITCH)
continue;
- if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
+ if ((buf->dtb_xamot = kmem_zalloc(size,
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL)
goto err;
} while ((cp = cp->cpu_next) != cpu_list);
@@ -10082,16 +10451,19 @@ err:
continue;
buf = &bufs[cp->cpu_id];
+ desired += 2;
if (buf->dtb_xamot != NULL) {
ASSERT(buf->dtb_tomax != NULL);
ASSERT(buf->dtb_size == size);
kmem_free(buf->dtb_xamot, size);
+ allocated++;
}
if (buf->dtb_tomax != NULL) {
ASSERT(buf->dtb_size == size);
kmem_free(buf->dtb_tomax, size);
+ allocated++;
}
buf->dtb_tomax = NULL;
@@ -10099,6 +10471,8 @@ err:
buf->dtb_size = 0;
} while ((cp = cp->cpu_next) != cpu_list);
+ *factor = desired / (allocated > 0 ? allocated : 1);
+
return (ENOMEM);
}
@@ -10400,6 +10774,36 @@ dtrace_buffer_polish(dtrace_buffer_t *buf)
}
}
+/*
+ * This routine determines if data generated at the specified time has likely
+ * been entirely consumed at user-level. This routine is called to determine
+ * if an ECB on a defunct probe (but for an active enabling) can be safely
+ * disabled and destroyed.
+ */
+static int
+dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
+{
+ int i;
+
+ for (i = 0; i < NCPU; i++) {
+ dtrace_buffer_t *buf = &bufs[i];
+
+ if (buf->dtb_size == 0)
+ continue;
+
+ if (buf->dtb_flags & DTRACEBUF_RING)
+ return (0);
+
+ if (!buf->dtb_switched && buf->dtb_offset != 0)
+ return (0);
+
+ if (buf->dtb_switched - buf->dtb_interval < when)
+ return (0);
+ }
+
+ return (1);
+}
+
static void
dtrace_buffer_free(dtrace_buffer_t *bufs)
{
@@ -10557,6 +10961,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab)
ASSERT(enab->dten_vstate->dtvs_state != NULL);
ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
enab->dten_vstate->dtvs_state->dts_nretained--;
+ dtrace_retained_gen++;
}
if (enab->dten_prev == NULL) {
@@ -10599,6 +11004,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab)
return (ENOSPC);
state->dts_nretained++;
+ dtrace_retained_gen++;
if (dtrace_retained == NULL) {
dtrace_retained = enab;
@@ -10713,7 +11119,7 @@ static int
dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
{
int i = 0;
- int matched = 0;
+ int total_matched = 0, matched = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(MUTEX_HELD(&dtrace_lock));
@@ -10724,7 +11130,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
enab->dten_current = ep;
enab->dten_error = 0;
- matched += dtrace_probe_enable(&ep->dted_probe, enab);
+ /*
+ * If a provider failed to enable a probe then get out and
+ * let the consumer know we failed.
+ */
+ if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
+ return (EBUSY);
+
+ total_matched += matched;
if (enab->dten_error != 0) {
/*
@@ -10752,7 +11165,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
enab->dten_probegen = dtrace_probegen;
if (nmatched != NULL)
- *nmatched = matched;
+ *nmatched = total_matched;
return (0);
}
@@ -10766,13 +11179,24 @@ dtrace_enabling_matchall(void)
mutex_enter(&dtrace_lock);
/*
- * Because we can be called after dtrace_detach() has been called, we
- * cannot assert that there are retained enablings. We can safely
- * load from dtrace_retained, however: the taskq_destroy() at the
- * end of dtrace_detach() will block pending our completion.
+ * Iterate over all retained enablings to see if any probes match
+ * against them. We only perform this operation on enablings for which
+ * we have sufficient permissions by virtue of being in the global zone
+ * or in the same zone as the DTrace client. Because we can be called
+ * after dtrace_detach() has been called, we cannot assert that there
+ * are retained enablings. We can safely load from dtrace_retained,
+ * however: the taskq_destroy() at the end of dtrace_detach() will
+ * block pending our completion.
*/
- for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next)
- (void) dtrace_enabling_match(enab, NULL);
+ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
+ dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
+ cred_t *cr = dcr->dcr_cred;
+ zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
+
+ if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
+ (zone == GLOBAL_ZONEID || getzoneid() == zone)))
+ (void) dtrace_enabling_match(enab, NULL);
+ }
mutex_exit(&dtrace_lock);
mutex_exit(&cpu_lock);
@@ -10830,6 +11254,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
{
int i, all = 0;
dtrace_probedesc_t desc;
+ dtrace_genid_t gen;
ASSERT(MUTEX_HELD(&dtrace_lock));
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
@@ -10840,15 +11265,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
}
do {
- dtrace_enabling_t *enab = dtrace_retained;
+ dtrace_enabling_t *enab;
void *parg = prv->dtpv_arg;
- for (; enab != NULL; enab = enab->dten_next) {
+retry:
+ gen = dtrace_retained_gen;
+ for (enab = dtrace_retained; enab != NULL;
+ enab = enab->dten_next) {
for (i = 0; i < enab->dten_ndesc; i++) {
desc = enab->dten_desc[i]->dted_probe;
mutex_exit(&dtrace_lock);
prv->dtpv_pops.dtps_provide(parg, &desc);
mutex_enter(&dtrace_lock);
+ /*
+ * Process the retained enablings again if
+ * they have changed while we weren't holding
+ * dtrace_lock.
+ */
+ if (gen != dtrace_retained_gen)
+ goto retry;
}
}
} while (all && (prv = prv->dtpv_next) != NULL);
@@ -10859,6 +11294,85 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
}
/*
+ * Called to reap ECBs that are attached to probes from defunct providers.
+ */
+static void
+dtrace_enabling_reap(void)
+{
+ dtrace_provider_t *prov;
+ dtrace_probe_t *probe;
+ dtrace_ecb_t *ecb;
+ hrtime_t when;
+ int i;
+
+ mutex_enter(&cpu_lock);
+ mutex_enter(&dtrace_lock);
+
+ for (i = 0; i < dtrace_nprobes; i++) {
+ if ((probe = dtrace_probes[i]) == NULL)
+ continue;
+
+ if (probe->dtpr_ecb == NULL)
+ continue;
+
+ prov = probe->dtpr_provider;
+
+ if ((when = prov->dtpv_defunct) == 0)
+ continue;
+
+ /*
+ * We have ECBs on a defunct provider: we want to reap these
+ * ECBs to allow the provider to unregister. The destruction
+ * of these ECBs must be done carefully: if we destroy the ECB
+ * and the consumer later wishes to consume an EPID that
+ * corresponds to the destroyed ECB (and if the EPID metadata
+ * has not been previously consumed), the consumer will abort
+ * processing on the unknown EPID. To reduce (but not, sadly,
+ * eliminate) the possibility of this, we will only destroy an
+ * ECB for a defunct provider if, for the state that
+ * corresponds to the ECB:
+ *
+ * (a) There is no speculative tracing (which can effectively
+ * cache an EPID for an arbitrary amount of time).
+ *
+ * (b) The principal buffers have been switched twice since the
+ * provider became defunct.
+ *
+ * (c) The aggregation buffers are of zero size or have been
+ * switched twice since the provider became defunct.
+ *
+ * We use dts_speculates to determine (a) and call a function
+ * (dtrace_buffer_consumed()) to determine (b) and (c). Note
+ * that as soon as we've been unable to destroy one of the ECBs
+ * associated with the probe, we quit trying -- reaping is only
+ * fruitful in as much as we can destroy all ECBs associated
+ * with the defunct provider's probes.
+ */
+ while ((ecb = probe->dtpr_ecb) != NULL) {
+ dtrace_state_t *state = ecb->dte_state;
+ dtrace_buffer_t *buf = state->dts_buffer;
+ dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
+
+ if (state->dts_speculates)
+ break;
+
+ if (!dtrace_buffer_consumed(buf, when))
+ break;
+
+ if (!dtrace_buffer_consumed(aggbuf, when))
+ break;
+
+ dtrace_ecb_disable(ecb);
+ ASSERT(probe->dtpr_ecb != ecb);
+ dtrace_ecb_destroy(ecb);
+ }
+ }
+
+ mutex_exit(&dtrace_lock);
+ mutex_exit(&cpu_lock);
+}
+
+/*
* DTrace DOF Functions
*/
/*ARGSUSED*/
@@ -10970,7 +11484,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp)
dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
- if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) {
+ if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
+ dof->dofh_loadsz != hdr.dofh_loadsz) {
kmem_free(dof, hdr.dofh_loadsz);
*errp = EFAULT;
return (NULL);
@@ -11362,15 +11877,20 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
(uintptr_t)sec->dofs_offset + offs);
kind = (dtrace_actkind_t)desc->dofa_kind;
- if (DTRACEACT_ISPRINTFLIKE(kind) &&
+ if ((DTRACEACT_ISPRINTFLIKE(kind) &&
(kind != DTRACEACT_PRINTA ||
+ desc->dofa_strtab != DOF_SECIDX_NONE)) ||
+ (kind == DTRACEACT_DIFEXPR &&
desc->dofa_strtab != DOF_SECIDX_NONE)) {
dof_sec_t *strtab;
char *str, *fmt;
uint64_t i;
/*
- * printf()-like actions must have a format string.
+ * The argument to these actions is an index into the
+ * DOF string table. For printf()-like actions, this
+ * is the format string. For print(), this is the
+ * CTF type of the expression result.
*/
if ((strtab = dtrace_dof_sect(dof,
DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
@@ -11698,6 +12218,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
}
}
+ if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
+ !(sec->dofs_flags & DOF_SECF_LOAD)) {
+ dtrace_dof_error(dof, "loadable section with load "
+ "flag unset");
+ return (-1);
+ }
+
if (!(sec->dofs_flags & DOF_SECF_LOAD))
continue; /* just ignore non-loadable sections */
@@ -11849,7 +12376,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
size = min;
- if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
+ if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
return (ENOMEM);
dstate->dtds_size = size;
@@ -12211,7 +12738,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
{
dtrace_optval_t *opt = state->dts_options, size;
processorid_t cpu;
- int flags = 0, rval;
+ int flags = 0, rval, factor, divisor = 1;
ASSERT(MUTEX_HELD(&dtrace_lock));
ASSERT(MUTEX_HELD(&cpu_lock));
@@ -12241,7 +12768,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
flags |= DTRACEBUF_INACTIVE;
}
- for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) {
+ for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
/*
* The size must be 8-byte aligned. If the size is not 8-byte
* aligned, drop it down by the difference.
@@ -12259,7 +12786,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
return (E2BIG);
}
- rval = dtrace_buffer_alloc(buf, size, flags, cpu);
+ rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
if (rval != ENOMEM) {
opt[which] = size;
@@ -12268,6 +12795,9 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
return (rval);
+
+ for (divisor = 2; divisor < factor; divisor <<= 1)
+ continue;
}
return (ENOMEM);
@@ -12367,7 +12897,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
goto out;
}
- spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP);
+ spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
+ KM_NOSLEEP | KM_NORMALPRI);
if (spec == NULL) {
rval = ENOMEM;
@@ -12378,7 +12909,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
state->dts_nspeculations = (int)nspec;
for (i = 0; i < nspec; i++) {
- if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) {
+ if ((buf = kmem_zalloc(bufsize,
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
rval = ENOMEM;
goto err;
}
@@ -14390,7 +14922,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
* If this wasn't an open with the "helper" minor, then it must be
* the "dtrace" minor.
*/
- ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE);
+ if (getminor(*devp) != DTRACEMNRN_DTRACE)
+ return (ENXIO);
/*
* If no DTRACE_PRIV_* bits are set in the credential, then the
@@ -14427,7 +14960,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
mutex_exit(&cpu_lock);
if (state == NULL) {
- if (--dtrace_opens == 0)
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
mutex_exit(&dtrace_lock);
return (EAGAIN);
@@ -14463,7 +14996,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
dtrace_state_destroy(state);
ASSERT(dtrace_opens > 0);
- if (--dtrace_opens == 0)
+
+ /*
+ * Only relinquish control of the kernel debugger interface when there
+ * are no consumers and no anonymous enablings.
+ */
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
mutex_exit(&dtrace_lock);
@@ -15458,7 +15996,8 @@ static struct dev_ops dtrace_ops = {
nodev, /* reset */
&dtrace_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
static struct modldrv modldrv = {
diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c
index b7ca92f54a59..8cfe4cd33beb 100644
--- a/uts/common/dtrace/fasttrap.c
+++ b/uts/common/dtrace/fasttrap.c
@@ -20,11 +20,13 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ */
#include <sys/atomic.h>
#include <sys/errno.h>
@@ -274,7 +276,7 @@ fasttrap_pid_cleanup_cb(void *data)
fasttrap_provider_t **fpp, *fp;
fasttrap_bucket_t *bucket;
dtrace_provider_id_t provid;
- int i, later;
+ int i, later, rval;
static volatile int in = 0;
ASSERT(in == 0);
@@ -336,9 +338,13 @@ fasttrap_pid_cleanup_cb(void *data)
* clean out the unenabled probes.
*/
provid = fp->ftp_provid;
- if (dtrace_unregister(provid) != 0) {
+ if ((rval = dtrace_unregister(provid)) != 0) {
if (fasttrap_total > fasttrap_max / 2)
(void) dtrace_condense(provid);
+
+ if (rval == EAGAIN)
+ fp->ftp_marked = 1;
+
later += fp->ftp_marked;
fpp = &fp->ftp_next;
} else {
@@ -364,12 +370,16 @@ fasttrap_pid_cleanup_cb(void *data)
* get a chance to do that work if and when the timeout is reenabled
* (if detach fails).
*/
- if (later > 0 && fasttrap_timeout != (timeout_id_t)1)
- fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, hz);
- else if (later > 0)
+ if (later > 0) {
+ if (fasttrap_timeout != (timeout_id_t)1) {
+ fasttrap_timeout =
+ timeout(&fasttrap_pid_cleanup_cb, NULL, hz);
+ }
+
fasttrap_cleanup_work = 1;
- else
+ } else {
fasttrap_timeout = 0;
+ }
mutex_exit(&fasttrap_cleanup_mtx);
in = 0;
@@ -876,7 +886,7 @@ fasttrap_disable_callbacks(void)
}
/*ARGSUSED*/
-static void
+static int
fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
{
fasttrap_probe_t *probe = parg;
@@ -904,7 +914,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
* provider can't go away while we're in this code path.
*/
if (probe->ftp_prov->ftp_retired)
- return;
+ return (0);
/*
* If we can't find the process, it may be that we're in the context of
@@ -913,7 +923,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
*/
if ((p = sprlock(probe->ftp_pid)) == NULL) {
if ((curproc->p_flag & SFORKING) == 0)
- return;
+ return (0);
mutex_enter(&pidlock);
p = prfind(probe->ftp_pid);
@@ -975,7 +985,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
* drop our reference on the trap table entry.
*/
fasttrap_disable_callbacks();
- return;
+ return (0);
}
}
@@ -983,6 +993,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
sprunlock(p);
probe->ftp_enabled = 1;
+ return (0);
}
/*ARGSUSED*/
@@ -1946,7 +1957,8 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
probe = kmem_alloc(size, KM_SLEEP);
- if (copyin(uprobe, probe, size) != 0) {
+ if (copyin(uprobe, probe, size) != 0 ||
+ probe->ftps_noffs != noffs) {
kmem_free(probe, size);
return (EFAULT);
}
@@ -2044,13 +2056,6 @@ err:
tp->ftt_proc->ftpc_acount != 0)
break;
- /*
- * The count of active providers can only be
- * decremented (i.e. to zero) during exec, exit, and
- * removal of a meta provider so it should be
- * impossible to drop the count during this operation().
- */
- ASSERT(tp->ftt_proc->ftpc_acount != 0);
tp = tp->ftt_next;
}
@@ -2346,7 +2351,8 @@ static struct dev_ops fasttrap_ops = {
nodev, /* reset */
&fasttrap_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*
diff --git a/uts/common/dtrace/lockstat.c b/uts/common/dtrace/lockstat.c
index 3eb76a061d32..69c8b7254486 100644
--- a/uts/common/dtrace/lockstat.c
+++ b/uts/common/dtrace/lockstat.c
@@ -19,11 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/param.h>
@@ -84,7 +83,7 @@ static kmutex_t lockstat_test; /* for testing purposes only */
static dtrace_provider_id_t lockstat_id;
/*ARGSUSED*/
-static void
+static int
lockstat_enable(void *arg, dtrace_id_t id, void *parg)
{
lockstat_probe_t *probe = parg;
@@ -103,6 +102,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg)
*/
mutex_enter(&lockstat_test);
mutex_exit(&lockstat_test);
+ return (0);
}
/*ARGSUSED*/
@@ -310,11 +310,13 @@ static struct dev_ops lockstat_ops = {
nulldev, /* reset */
&lockstat_cb_ops, /* cb_ops */
NULL, /* bus_ops */
+ NULL, /* power */
+ ddi_quiesce_not_needed, /* quiesce */
};
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
- "Lock Statistics %I%", /* name of module */
+ "Lock Statistics", /* name of module */
&lockstat_ops, /* driver ops */
};
diff --git a/uts/common/dtrace/profile.c b/uts/common/dtrace/profile.c
index 8de919a851a2..fc809d3579a5 100644
--- a/uts/common/dtrace/profile.c
+++ b/uts/common/dtrace/profile.c
@@ -19,11 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ */
#include <sys/errno.h>
#include <sys/stat.h>
@@ -361,7 +363,7 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg)
}
/*ARGSUSED*/
-static void
+static int
profile_enable(void *arg, dtrace_id_t id, void *parg)
{
profile_probe_t *prof = parg;
@@ -391,6 +393,7 @@ profile_enable(void *arg, dtrace_id_t id, void *parg)
} else {
prof->prof_cyclic = cyclic_add_omni(&omni);
}
+ return (0);
}
/*ARGSUSED*/
@@ -408,9 +411,25 @@ profile_disable(void *arg, dtrace_id_t id, void *parg)
/*ARGSUSED*/
static int
-profile_usermode(void *arg, dtrace_id_t id, void *parg)
+profile_mode(void *arg, dtrace_id_t id, void *parg)
{
- return (CPU->cpu_profile_pc == 0);
+ profile_probe_t *prof = parg;
+ int mode;
+
+ if (CPU->cpu_profile_pc != 0) {
+ mode = DTRACE_MODE_KERNEL;
+ } else {
+ mode = DTRACE_MODE_USER;
+ }
+
+ if (prof->prof_kind == PROF_TICK) {
+ mode |= DTRACE_MODE_NOPRIV_RESTRICT;
+ } else {
+ ASSERT(prof->prof_kind == PROF_PROFILE);
+ mode |= DTRACE_MODE_NOPRIV_DROP;
+ }
+
+ return (mode);
}
static dtrace_pattr_t profile_attr = {
@@ -430,7 +449,7 @@ static dtrace_pops_t profile_pops = {
NULL,
NULL,
NULL,
- profile_usermode,
+ profile_mode,
profile_destroy
};
@@ -539,7 +558,8 @@ static struct dev_ops profile_ops = {
nodev, /* reset */
&profile_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*
diff --git a/uts/common/dtrace/sdt_subr.c b/uts/common/dtrace/sdt_subr.c
index 66ff8a92a01b..242185071bb2 100644
--- a/uts/common/dtrace/sdt_subr.c
+++ b/uts/common/dtrace/sdt_subr.c
@@ -19,12 +19,9 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/sdt_impl.h>
static dtrace_pattr_t vtrace_attr = {
@@ -43,6 +40,14 @@ static dtrace_pattr_t info_attr = {
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};
+static dtrace_pattr_t fc_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
static dtrace_pattr_t fpu_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
@@ -83,6 +88,14 @@ static dtrace_pattr_t xpv_attr = {
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM },
};
+static dtrace_pattr_t iscsi_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
sdt_provider_t sdt_providers[] = {
{ "vtrace", "__vtrace_", &vtrace_attr, 0 },
{ "sysinfo", "__cpu_sysinfo_", &info_attr, 0 },
@@ -91,11 +104,17 @@ sdt_provider_t sdt_providers[] = {
{ "sched", "__sched_", &stab_attr, 0 },
{ "proc", "__proc_", &stab_attr, 0 },
{ "io", "__io_", &stab_attr, 0 },
+ { "ip", "__ip_", &stab_attr, 0 },
+ { "tcp", "__tcp_", &stab_attr, 0 },
+ { "udp", "__udp_", &stab_attr, 0 },
{ "mib", "__mib_", &stab_attr, 0 },
{ "fsinfo", "__fsinfo_", &fsinfo_attr, 0 },
+ { "iscsi", "__iscsi_", &iscsi_attr, 0 },
{ "nfsv3", "__nfsv3_", &stab_attr, 0 },
{ "nfsv4", "__nfsv4_", &stab_attr, 0 },
{ "xpv", "__xpv_", &xpv_attr, 0 },
+ { "fc", "__fc_", &fc_attr, 0 },
+ { "srp", "__srp_", &fc_attr, 0 },
{ "sysevent", "__sysevent_", &stab_attr, 0 },
{ "sdt", NULL, &sdt_attr, 0 },
{ NULL }
@@ -169,6 +188,73 @@ sdt_argdesc_t sdt_args[] = {
{ "fsinfo", NULL, 0, 0, "vnode_t *", "fileinfo_t *" },
{ "fsinfo", NULL, 1, 1, "int", "int" },
+ { "iscsi", "async-send", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "async-send", 1, 1, "iscsi_async_evt_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "login-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "login-command", 1, 1, "iscsi_login_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "login-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "login-response", 1, 1, "iscsi_login_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "logout-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "logout-command", 1, 1, "iscsi_logout_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "logout-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "logout-response", 1, 1, "iscsi_logout_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "data-request", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "data-request", 1, 1, "iscsi_rtt_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "data-send", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "data-send", 1, 1, "iscsi_data_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "data-receive", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "data-receive", 1, 1, "iscsi_data_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "nop-send", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "nop-send", 1, 1, "iscsi_nop_in_hdr_t *", "iscsiinfo_t *" },
+ { "iscsi", "nop-receive", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "nop-receive", 1, 1, "iscsi_nop_out_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "scsi-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "scsi-command", 1, 1, "iscsi_scsi_cmd_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "scsi-command", 2, 2, "scsi_task_t *", "scsicmd_t *" },
+ { "iscsi", "scsi-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "scsi-response", 1, 1, "iscsi_scsi_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "task-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "task-command", 1, 1, "iscsi_scsi_task_mgt_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "task-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "task-response", 1, 1, "iscsi_scsi_task_mgt_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "text-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "text-command", 1, 1, "iscsi_text_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "text-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "text-response", 1, 1, "iscsi_text_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "xfer-start", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "xfer-start", 1, 0, "idm_conn_t *", "iscsiinfo_t *" },
+ { "iscsi", "xfer-start", 2, 1, "uintptr_t", "xferinfo_t *" },
+ { "iscsi", "xfer-start", 3, 2, "uint32_t"},
+ { "iscsi", "xfer-start", 4, 3, "uintptr_t"},
+ { "iscsi", "xfer-start", 5, 4, "uint32_t"},
+ { "iscsi", "xfer-start", 6, 5, "uint32_t"},
+ { "iscsi", "xfer-start", 7, 6, "uint32_t"},
+ { "iscsi", "xfer-start", 8, 7, "int"},
+ { "iscsi", "xfer-done", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "xfer-done", 1, 0, "idm_conn_t *", "iscsiinfo_t *" },
+ { "iscsi", "xfer-done", 2, 1, "uintptr_t", "xferinfo_t *" },
+ { "iscsi", "xfer-done", 3, 2, "uint32_t"},
+ { "iscsi", "xfer-done", 4, 3, "uintptr_t"},
+ { "iscsi", "xfer-done", 5, 4, "uint32_t"},
+ { "iscsi", "xfer-done", 6, 5, "uint32_t"},
+ { "iscsi", "xfer-done", 7, 6, "uint32_t"},
+ { "iscsi", "xfer-done", 8, 7, "int"},
+
{ "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *",
"conninfo_t *" },
{ "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *",
@@ -788,6 +874,75 @@ sdt_argdesc_t sdt_args[] = {
"nfsv4cbinfo_t *" },
{ "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *" },
+ { "ip", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "ip", "send", 1, 1, "conn_t *", "csinfo_t *" },
+ { "ip", "send", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "ip", "send", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" },
+ { "ip", "send", 4, 4, "ipha_t *", "ipv4info_t *" },
+ { "ip", "send", 5, 5, "ip6_t *", "ipv6info_t *" },
+ { "ip", "send", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */
+ { "ip", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "ip", "receive", 1, 1, "conn_t *", "csinfo_t *" },
+ { "ip", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "ip", "receive", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" },
+ { "ip", "receive", 4, 4, "ipha_t *", "ipv4info_t *" },
+ { "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" },
+ { "ip", "receive", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */
+
+ { "tcp", "connect-established", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "connect-established", 1, 1, "ip_xmit_attr_t *",
+ "csinfo_t *" },
+ { "tcp", "connect-established", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "connect-established", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "connect-established", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "connect-refused", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "connect-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "connect-refused", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "connect-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "connect-refused", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "connect-request", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "connect-request", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "connect-request", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "connect-request", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "connect-request", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "accept-established", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "accept-established", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "accept-established", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "accept-established", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "accept-established", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "accept-refused", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "accept-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "accept-refused", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "accept-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "accept-refused", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "state-change", 0, 0, "void", "void" },
+ { "tcp", "state-change", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "state-change", 2, 2, "void", "void" },
+ { "tcp", "state-change", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "state-change", 4, 4, "void", "void" },
+ { "tcp", "state-change", 5, 5, "int32_t", "tcplsinfo_t *" },
+ { "tcp", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "send", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" },
+ { "tcp", "send", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "send", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" },
+ { "tcp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "receive", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" },
+ { "tcp", "receive", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "receive", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" },
+
+ { "udp", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "udp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "udp", "send", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "udp", "send", 3, 3, "udp_t *", "udpsinfo_t *" },
+ { "udp", "send", 4, 4, "udpha_t *", "udpinfo_t *" },
+ { "udp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "udp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "udp", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "udp", "receive", 3, 3, "udp_t *", "udpsinfo_t *" },
+ { "udp", "receive", 4, 4, "udpha_t *", "udpinfo_t *" },
+
{ "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" },
{ "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" },
@@ -848,6 +1003,154 @@ sdt_argdesc_t sdt_args[] = {
{ "xpv", "setvcpucontext-end", 0, 0, "int" },
{ "xpv", "setvcpucontext-start", 0, 0, "domid_t" },
{ "xpv", "setvcpucontext-start", 1, 1, "vcpu_guest_context_t *" },
+
+ { "srp", "service-up", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "service-up", 1, 0, "srpt_session_t *", "srp_portinfo_t *" },
+ { "srp", "service-down", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "service-down", 1, 0, "srpt_session_t *",
+ "srp_portinfo_t *" },
+ { "srp", "login-command", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "login-command", 1, 0, "srpt_session_t *",
+ "srp_portinfo_t *" },
+ { "srp", "login-command", 2, 1, "srp_login_req_t *",
+ "srp_logininfo_t *" },
+ { "srp", "login-response", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "login-response", 1, 0, "srpt_session_t *",
+ "srp_portinfo_t *" },
+ { "srp", "login-response", 2, 1, "srp_login_rsp_t *",
+ "srp_logininfo_t *" },
+ { "srp", "login-response", 3, 2, "srp_login_rej_t *" },
+ { "srp", "logout-command", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "logout-command", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "task-command", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "task-command", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "task-command", 2, 1, "srp_cmd_req_t *", "srp_taskinfo_t *" },
+ { "srp", "task-response", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "task-response", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "task-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" },
+ { "srp", "task-response", 3, 2, "scsi_task_t *" },
+ { "srp", "task-response", 4, 3, "int8_t" },
+ { "srp", "scsi-command", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "scsi-command", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "scsi-command", 2, 1, "scsi_task_t *", "scsicmd_t *" },
+ { "srp", "scsi-command", 3, 2, "srp_cmd_req_t *", "srp_taskinfo_t *" },
+ { "srp", "scsi-response", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "scsi-response", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "scsi-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" },
+ { "srp", "scsi-response", 3, 2, "scsi_task_t *" },
+ { "srp", "scsi-response", 4, 3, "int8_t" },
+ { "srp", "xfer-start", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "xfer-start", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "xfer-start", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" },
+ { "srp", "xfer-start", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" },
+ { "srp", "xfer-start", 4, 3, "ibt_send_wr_t *"},
+ { "srp", "xfer-start", 5, 4, "uint32_t" },
+ { "srp", "xfer-start", 6, 5, "uint32_t" },
+ { "srp", "xfer-start", 7, 6, "uint32_t" },
+ { "srp", "xfer-start", 8, 7, "uint32_t" },
+ { "srp", "xfer-done", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "xfer-done", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "xfer-done", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" },
+ { "srp", "xfer-done", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" },
+ { "srp", "xfer-done", 4, 3, "ibt_send_wr_t *"},
+ { "srp", "xfer-done", 5, 4, "uint32_t" },
+ { "srp", "xfer-done", 6, 5, "uint32_t" },
+ { "srp", "xfer-done", 7, 6, "uint32_t" },
+ { "srp", "xfer-done", 8, 7, "uint32_t" },
+
+ { "fc", "link-up", 0, 0, "fct_i_local_port_t *", "conninfo_t *" },
+ { "fc", "link-down", 0, 0, "fct_i_local_port_t *", "conninfo_t *" },
+ { "fc", "fabric-login-start", 0, 0, "fct_i_local_port_t *",
+ "conninfo_t *" },
+ { "fc", "fabric-login-start", 1, 0, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "fabric-login-end", 0, 0, "fct_i_local_port_t *",
+ "conninfo_t *" },
+ { "fc", "fabric-login-end", 1, 0, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-start", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-login-start", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-start", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-start", 3, 3, "int", "int" },
+ { "fc", "rport-login-end", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-login-end", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-end", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-end", 3, 3, "int", "int" },
+ { "fc", "rport-login-end", 4, 4, "int", "int" },
+ { "fc", "rport-logout-start", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-logout-start", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-start", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-start", 3, 3, "int", "int" },
+ { "fc", "rport-logout-end", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-logout-end", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-end", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-end", 3, 3, "int", "int" },
+ { "fc", "scsi-command", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "scsi-command", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "scsi-command", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "scsi-command", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "scsi-response", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "scsi-response", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "scsi-response", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "scsi-response", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-start", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "xfer-start", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-start", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "xfer-start", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-start", 4, 4, "stmf_data_buf_t *",
+ "fc_xferinfo_t *" },
+ { "fc", "xfer-done", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "xfer-done", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-done", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "xfer-done", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-done", 4, 4, "stmf_data_buf_t *",
+ "fc_xferinfo_t *" },
+ { "fc", "rscn-receive", 0, 0, "fct_i_local_port_t *",
+ "conninfo_t *" },
+ { "fc", "rscn-receive", 1, 1, "int", "int"},
+ { "fc", "abts-receive", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "abts-receive", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "abts-receive", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+
+
{ NULL }
};
diff --git a/uts/common/dtrace/systrace.c b/uts/common/dtrace/systrace.c
index be14660b04c0..b864041c450d 100644
--- a/uts/common/dtrace/systrace.c
+++ b/uts/common/dtrace/systrace.c
@@ -19,11 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dtrace.h>
#include <sys/systrace.h>
@@ -141,7 +140,7 @@ systrace_destroy(void *arg, dtrace_id_t id, void *parg)
}
/*ARGSUSED*/
-static void
+static int
systrace_enable(void *arg, dtrace_id_t id, void *parg)
{
int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
@@ -162,7 +161,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
if (enabled) {
ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
- return;
+ return (0);
}
(void) casptr(&sysent[sysnum].sy_callc,
@@ -173,6 +172,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
(void *)systrace_sysent32[sysnum].stsy_underlying,
(void *)dtrace_systrace_syscall32);
#endif
+ return (0);
}
/*ARGSUSED*/
@@ -336,7 +336,8 @@ static struct dev_ops systrace_ops = {
nodev, /* reset */
&systrace_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*