summaryrefslogtreecommitdiff
path: root/uts/common/dtrace/dtrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'uts/common/dtrace/dtrace.c')
-rw-r--r--uts/common/dtrace/dtrace.c863
1 files changed, 701 insertions, 162 deletions
diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c
index c721386280f8..0c5e4b3a011a 100644
--- a/uts/common/dtrace/dtrace.c
+++ b/uts/common/dtrace/dtrace.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* DTrace - Dynamic Tracing for Solaris
*
@@ -121,7 +119,7 @@ dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
size_t dtrace_global_maxsize = (16 * 1024);
size_t dtrace_actions_max = (16 * 1024);
size_t dtrace_retain_max = 1024;
-dtrace_optval_t dtrace_helper_actions_max = 32;
+dtrace_optval_t dtrace_helper_actions_max = 1024;
dtrace_optval_t dtrace_helper_providers_max = 32;
dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
size_t dtrace_strsize_default = 256;
@@ -146,6 +144,7 @@ int dtrace_err_verbose;
hrtime_t dtrace_deadman_interval = NANOSEC;
hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
+hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
/*
* DTrace External Variables
@@ -186,7 +185,9 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
static dtrace_genid_t dtrace_probegen; /* current probe generation */
static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
+static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
+static int dtrace_dynvar_failclean; /* dynvars failed to clean */
/*
* DTrace Locking
@@ -240,10 +241,16 @@ static void
dtrace_nullop(void)
{}
+static int
+dtrace_enable_nullop(void)
+{
+ return (0);
+}
+
static dtrace_pops_t dtrace_provider_ops = {
(void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
(void (*)(void *, struct modctl *))dtrace_nullop,
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
@@ -427,6 +434,7 @@ dtrace_load##bits(uintptr_t addr) \
#define DTRACE_DYNHASH_SINK 1
#define DTRACE_DYNHASH_VALID 2
+#define DTRACE_MATCH_FAIL -1
#define DTRACE_MATCH_NEXT 0
#define DTRACE_MATCH_DONE 1
#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
@@ -453,11 +461,13 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
static void dtrace_enabling_provide(dtrace_provider_t *);
static int dtrace_enabling_match(dtrace_enabling_t *, int *);
static void dtrace_enabling_matchall(void);
+static void dtrace_enabling_reap(void);
static dtrace_state_t *dtrace_anon_grab(void);
static uint64_t dtrace_helper(int, dtrace_mstate_t *,
dtrace_state_t *, uint64_t, uint64_t);
static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
static void dtrace_buffer_drop(dtrace_buffer_t *);
+static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
dtrace_state_t *, dtrace_mstate_t *);
static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
@@ -1098,10 +1108,13 @@ dtrace_priv_proc_common_nocd()
}
static int
-dtrace_priv_proc_destructive(dtrace_state_t *state)
+dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
{
int action = state->dts_cred.dcr_action;
+ if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
+ goto bad;
+
if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
dtrace_priv_proc_common_zone(state) == 0)
goto bad;
@@ -1123,15 +1136,17 @@ bad:
}
static int
-dtrace_priv_proc_control(dtrace_state_t *state)
+dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
{
- if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
- return (1);
+ if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
+ if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
+ return (1);
- if (dtrace_priv_proc_common_zone(state) &&
- dtrace_priv_proc_common_user(state) &&
- dtrace_priv_proc_common_nocd())
- return (1);
+ if (dtrace_priv_proc_common_zone(state) &&
+ dtrace_priv_proc_common_user(state) &&
+ dtrace_priv_proc_common_nocd())
+ return (1);
+ }
cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
@@ -1139,9 +1154,10 @@ dtrace_priv_proc_control(dtrace_state_t *state)
}
static int
-dtrace_priv_proc(dtrace_state_t *state)
+dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
{
- if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
+ if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
+ (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
return (1);
cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
@@ -1172,6 +1188,109 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state)
}
/*
+ * Determine if the dte_cond of the specified ECB allows for processing of
+ * the current probe to continue. Note that this routine may allow continued
+ * processing, but with access(es) stripped from the mstate's dtms_access
+ * field.
+ */
+static int
+dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
+ dtrace_ecb_t *ecb)
+{
+ dtrace_probe_t *probe = ecb->dte_probe;
+ dtrace_provider_t *prov = probe->dtpr_provider;
+ dtrace_pops_t *pops = &prov->dtpv_pops;
+ int mode = DTRACE_MODE_NOPRIV_DROP;
+
+ ASSERT(ecb->dte_cond);
+
+ if (pops->dtps_mode != NULL) {
+ mode = pops->dtps_mode(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg);
+
+ ASSERT((mode & DTRACE_MODE_USER) ||
+ (mode & DTRACE_MODE_KERNEL));
+ ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
+ (mode & DTRACE_MODE_NOPRIV_DROP));
+ }
+
+ /*
+ * If the dte_cond bits indicate that this consumer is only allowed to
+ * see user-mode firings of this probe, call the provider's dtps_mode()
+ * entry point to check that the probe was fired while in a user
+ * context. If that's not the case, use the policy specified by the
+ * provider to determine if we drop the probe or merely restrict
+ * operation.
+ */
+ if (ecb->dte_cond & DTRACE_COND_USERMODE) {
+ ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
+
+ if (!(mode & DTRACE_MODE_USER)) {
+ if (mode & DTRACE_MODE_NOPRIV_DROP)
+ return (0);
+
+ mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
+ }
+ }
+
+ /*
+ * This is more subtle than it looks. We have to be absolutely certain
+ * that CRED() isn't going to change out from under us so it's only
+ * legit to examine that structure if we're in constrained situations.
+ * Currently, the only times we'll this check is if a non-super-user
+ * has enabled the profile or syscall providers -- providers that
+ * allow visibility of all processes. For the profile case, the check
+ * above will ensure that we're examining a user context.
+ */
+ if (ecb->dte_cond & DTRACE_COND_OWNER) {
+ cred_t *cr;
+ cred_t *s_cr = state->dts_cred.dcr_cred;
+ proc_t *proc;
+
+ ASSERT(s_cr != NULL);
+
+ if ((cr = CRED()) == NULL ||
+ s_cr->cr_uid != cr->cr_uid ||
+ s_cr->cr_uid != cr->cr_ruid ||
+ s_cr->cr_uid != cr->cr_suid ||
+ s_cr->cr_gid != cr->cr_gid ||
+ s_cr->cr_gid != cr->cr_rgid ||
+ s_cr->cr_gid != cr->cr_sgid ||
+ (proc = ttoproc(curthread)) == NULL ||
+ (proc->p_flag & SNOCD)) {
+ if (mode & DTRACE_MODE_NOPRIV_DROP)
+ return (0);
+
+ mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
+ }
+ }
+
+ /*
+ * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
+ * in our zone, check to see if our mode policy is to restrict rather
+ * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
+ * and DTRACE_ACCESS_ARGS
+ */
+ if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
+ cred_t *cr;
+ cred_t *s_cr = state->dts_cred.dcr_cred;
+
+ ASSERT(s_cr != NULL);
+
+ if ((cr = CRED()) == NULL ||
+ s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
+ if (mode & DTRACE_MODE_NOPRIV_DROP)
+ return (0);
+
+ mstate->dtms_access &=
+ ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
+ }
+ }
+
+ return (1);
+}
+
+/*
* Note: not called from probe context. This function is called
* asynchronously (and at a regular interval) from outside of probe context to
* clean the dirty dynamic variable lists on all CPUs. Dynamic variable
@@ -1182,12 +1301,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
{
dtrace_dynvar_t *dirty;
dtrace_dstate_percpu_t *dcpu;
- int i, work = 0;
+ dtrace_dynvar_t **rinsep;
+ int i, j, work = 0;
for (i = 0; i < NCPU; i++) {
dcpu = &dstate->dtds_percpu[i];
-
- ASSERT(dcpu->dtdsc_rinsing == NULL);
+ rinsep = &dcpu->dtdsc_rinsing;
/*
* If the dirty list is NULL, there is no dirty work to do.
@@ -1195,14 +1314,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
if (dcpu->dtdsc_dirty == NULL)
continue;
- /*
- * If the clean list is non-NULL, then we're not going to do
- * any work for this CPU -- it means that there has not been
- * a dtrace_dynvar() allocation on this CPU (or from this CPU)
- * since the last time we cleaned house.
- */
- if (dcpu->dtdsc_clean != NULL)
+ if (dcpu->dtdsc_rinsing != NULL) {
+ /*
+ * If the rinsing list is non-NULL, then it is because
+ * this CPU was selected to accept another CPU's
+ * dirty list -- and since that time, dirty buffers
+ * have accumulated. This is a highly unlikely
+ * condition, but we choose to ignore the dirty
+ * buffers -- they'll be picked up a future cleanse.
+ */
continue;
+ }
+
+ if (dcpu->dtdsc_clean != NULL) {
+ /*
+ * If the clean list is non-NULL, then we're in a
+ * situation where a CPU has done deallocations (we
+ * have a non-NULL dirty list) but no allocations (we
+ * also have a non-NULL clean list). We can't simply
+ * move the dirty list into the clean list on this
+ * CPU, yet we also don't want to allow this condition
+ * to persist, lest a short clean list prevent a
+ * massive dirty list from being cleaned (which in
+ * turn could lead to otherwise avoidable dynamic
+ * drops). To deal with this, we look for some CPU
+ * with a NULL clean list, NULL dirty list, and NULL
+ * rinsing list -- and then we borrow this CPU to
+ * rinse our dirty list.
+ */
+ for (j = 0; j < NCPU; j++) {
+ dtrace_dstate_percpu_t *rinser;
+
+ rinser = &dstate->dtds_percpu[j];
+
+ if (rinser->dtdsc_rinsing != NULL)
+ continue;
+
+ if (rinser->dtdsc_dirty != NULL)
+ continue;
+
+ if (rinser->dtdsc_clean != NULL)
+ continue;
+
+ rinsep = &rinser->dtdsc_rinsing;
+ break;
+ }
+
+ if (j == NCPU) {
+ /*
+ * We were unable to find another CPU that
+ * could accept this dirty list -- we are
+ * therefore unable to clean it now.
+ */
+ dtrace_dynvar_failclean++;
+ continue;
+ }
+ }
work = 1;
@@ -1219,7 +1386,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
* on a hash chain, either the dirty list or the
* rinsing list for some CPU must be non-NULL.)
*/
- dcpu->dtdsc_rinsing = dirty;
+ *rinsep = dirty;
dtrace_membar_producer();
} while (dtrace_casptr(&dcpu->dtdsc_dirty,
dirty, NULL) != dirty);
@@ -1650,7 +1817,7 @@ retry:
ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
/*
- * Now we'll move the clean list to the free list.
+ * Now we'll move the clean list to our free list.
* It's impossible for this to fail: the only way
* the free list can be updated is through this
* code path, and only one CPU can own the clean list.
@@ -1663,6 +1830,7 @@ retry:
* owners of the clean lists out before resetting
* the clean lists.
*/
+ dcpu = &dstate->dtds_percpu[me];
rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
ASSERT(rval == NULL);
goto retry;
@@ -1804,6 +1972,75 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
lquanta[levels + 1] += incr;
}
+static int
+dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
+ uint16_t high, uint16_t nsteps, int64_t value)
+{
+ int64_t this = 1, last, next;
+ int base = 1, order;
+
+ ASSERT(factor <= nsteps);
+ ASSERT(nsteps % factor == 0);
+
+ for (order = 0; order < low; order++)
+ this *= factor;
+
+ /*
+ * If our value is less than our factor taken to the power of the
+ * low order of magnitude, it goes into the zeroth bucket.
+ */
+ if (value < (last = this))
+ return (0);
+
+ for (this *= factor; order <= high; order++) {
+ int nbuckets = this > nsteps ? nsteps : this;
+
+ if ((next = this * factor) < this) {
+ /*
+ * We should not generally get log/linear quantizations
+ * with a high magnitude that allows 64-bits to
+ * overflow, but we nonetheless protect against this
+ * by explicitly checking for overflow, and clamping
+ * our value accordingly.
+ */
+ value = this - 1;
+ }
+
+ if (value < this) {
+ /*
+ * If our value lies within this order of magnitude,
+ * determine its position by taking the offset within
+ * the order of magnitude, dividing by the bucket
+ * width, and adding to our (accumulated) base.
+ */
+ return (base + (value - last) / (this / nbuckets));
+ }
+
+ base += nbuckets - (nbuckets / factor);
+ last = this;
+ this = next;
+ }
+
+ /*
+ * Our value is greater than or equal to our factor taken to the
+ * power of one plus the high magnitude -- return the top bucket.
+ */
+ return (base);
+}
+
+static void
+dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
+{
+ uint64_t arg = *llquanta++;
+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
+ uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
+ uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
+ uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
+
+ llquanta[dtrace_aggregate_llquantize_bucket(factor,
+ low, high, nsteps, nval)] += incr;
+}
+
/*ARGSUSED*/
static void
dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
@@ -2585,6 +2822,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
switch (v) {
case DIF_VAR_ARGS:
+ if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
+ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
+ CPU_DTRACE_KPRIV;
+ return (0);
+ }
+
ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
if (ndx >= sizeof (mstate->dtms_arg) /
sizeof (mstate->dtms_arg[0])) {
@@ -2620,7 +2863,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
case DIF_VAR_UREGS: {
klwp_t *lwp;
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
if ((lwp = curthread->t_lwp) == NULL) {
@@ -2632,6 +2875,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (dtrace_getreg(lwp->lwp_regs, ndx));
}
+ case DIF_VAR_VMREGS: {
+ uint64_t rval;
+
+ if (!dtrace_priv_kernel(state))
+ return (0);
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+
+ rval = dtrace_getvmreg(ndx,
+ &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
+
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+ return (rval);
+ }
+
case DIF_VAR_CURTHREAD:
if (!dtrace_priv_kernel(state))
return (0);
@@ -2684,7 +2943,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (mstate->dtms_stackdepth);
case DIF_VAR_USTACKDEPTH:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
/*
@@ -2739,7 +2998,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (mstate->dtms_caller);
case DIF_VAR_UCALLER:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
@@ -2787,7 +3046,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_PID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2809,7 +3068,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
case DIF_VAR_PPID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2836,7 +3095,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_tid);
case DIF_VAR_EXECNAME:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2856,7 +3115,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_ZONENAME:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2876,7 +3135,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_UID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2897,7 +3156,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
case DIF_VAR_GID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -2919,7 +3178,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
case DIF_VAR_ERRNO: {
klwp_t *lwp;
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
/*
@@ -3259,7 +3518,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
- dtrace_priv_proc_control(state) &&
+ dtrace_priv_proc_control(state, mstate) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyout(kaddr, uaddr, size, flags);
@@ -3274,7 +3533,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
- dtrace_priv_proc_control(state) &&
+ dtrace_priv_proc_control(state, mstate) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyoutstr(kaddr, uaddr, size, flags);
@@ -3600,7 +3859,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
int64_t index = (int64_t)tupregs[1].dttk_value;
int64_t remaining = (int64_t)tupregs[2].dttk_value;
size_t len = dtrace_strlen((char *)s, size);
- int64_t i = 0;
+ int64_t i;
if (!dtrace_canload(s, len + 1, mstate, vstate)) {
regs[rd] = NULL;
@@ -3645,7 +3904,54 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
break;
}
- case DIF_SUBR_GETMAJOR:
+ case DIF_SUBR_TOUPPER:
+ case DIF_SUBR_TOLOWER: {
+ uintptr_t s = tupregs[0].dttk_value;
+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
+ char *dest = (char *)mstate->dtms_scratch_ptr, c;
+ size_t len = dtrace_strlen((char *)s, size);
+ char lower, upper, convert;
+ int64_t i;
+
+ if (subr == DIF_SUBR_TOUPPER) {
+ lower = 'a';
+ upper = 'z';
+ convert = 'A';
+ } else {
+ lower = 'A';
+ upper = 'Z';
+ convert = 'a';
+ }
+
+ if (!dtrace_canload(s, len + 1, mstate, vstate)) {
+ regs[rd] = NULL;
+ break;
+ }
+
+ if (!DTRACE_INSCRATCH(mstate, size)) {
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
+ regs[rd] = NULL;
+ break;
+ }
+
+ for (i = 0; i < size - 1; i++) {
+ if ((c = dtrace_load8(s + i)) == '\0')
+ break;
+
+ if (c >= lower && c <= upper)
+ c = convert + (c - lower);
+
+ dest[i] = c;
+ }
+
+ ASSERT(i < size);
+ dest[i] = '\0';
+ regs[rd] = (uintptr_t)dest;
+ mstate->dtms_scratch_ptr += size;
+ break;
+ }
+
+case DIF_SUBR_GETMAJOR:
#ifdef _LP64
regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
#else
@@ -3907,9 +4213,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
case DIF_SUBR_LLTOSTR: {
int64_t i = (int64_t)tupregs[0].dttk_value;
- int64_t val = i < 0 ? i * -1 : i;
- uint64_t size = 22; /* enough room for 2^64 in decimal */
+ uint64_t val, digit;
+ uint64_t size = 65; /* enough room for 2^64 in binary */
char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
+ int base = 10;
+
+ if (nargs > 1) {
+ if ((base = tupregs[1].dttk_value) <= 1 ||
+ base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
+ *flags |= CPU_DTRACE_ILLOP;
+ break;
+ }
+ }
+
+ val = (base == 10 && i < 0) ? i * -1 : i;
if (!DTRACE_INSCRATCH(mstate, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
@@ -3917,13 +4234,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
break;
}
- for (*end-- = '\0'; val; val /= 10)
- *end-- = '0' + (val % 10);
+ for (*end-- = '\0'; val; val /= base) {
+ if ((digit = val % base) <= '9' - '0') {
+ *end-- = '0' + digit;
+ } else {
+ *end-- = 'a' + (digit - ('9' - '0') - 1);
+ }
+ }
+
+ if (i == 0 && base == 16)
+ *end-- = '0';
+
+ if (base == 16)
+ *end-- = 'x';
- if (i == 0)
+ if (i == 0 || base == 8 || base == 16)
*end-- = '0';
- if (i < 0)
+ if (i < 0 && base == 10)
*end-- = '-';
regs[rd] = (uintptr_t)end + 1;
@@ -5558,6 +5886,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
dtrace_vstate_t *vstate = &state->dts_vstate;
dtrace_provider_t *prov = probe->dtpr_provider;
+ uint64_t tracememsize = 0;
int committed = 0;
caddr_t tomax;
@@ -5578,6 +5907,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
#endif
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
+ mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
@@ -5615,65 +5945,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
}
}
- if (ecb->dte_cond) {
- /*
- * If the dte_cond bits indicate that this
- * consumer is only allowed to see user-mode firings
- * of this probe, call the provider's dtps_usermode()
- * entry point to check that the probe was fired
- * while in a user context. Skip this ECB if that's
- * not the case.
- */
- if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
- prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg) == 0)
- continue;
-
- /*
- * This is more subtle than it looks. We have to be
- * absolutely certain that CRED() isn't going to
- * change out from under us so it's only legit to
- * examine that structure if we're in constrained
- * situations. Currently, the only times we'll this
- * check is if a non-super-user has enabled the
- * profile or syscall providers -- providers that
- * allow visibility of all processes. For the
- * profile case, the check above will ensure that
- * we're examining a user context.
- */
- if (ecb->dte_cond & DTRACE_COND_OWNER) {
- cred_t *cr;
- cred_t *s_cr =
- ecb->dte_state->dts_cred.dcr_cred;
- proc_t *proc;
-
- ASSERT(s_cr != NULL);
-
- if ((cr = CRED()) == NULL ||
- s_cr->cr_uid != cr->cr_uid ||
- s_cr->cr_uid != cr->cr_ruid ||
- s_cr->cr_uid != cr->cr_suid ||
- s_cr->cr_gid != cr->cr_gid ||
- s_cr->cr_gid != cr->cr_rgid ||
- s_cr->cr_gid != cr->cr_sgid ||
- (proc = ttoproc(curthread)) == NULL ||
- (proc->p_flag & SNOCD))
- continue;
- }
-
- if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
- cred_t *cr;
- cred_t *s_cr =
- ecb->dte_state->dts_cred.dcr_cred;
-
- ASSERT(s_cr != NULL);
-
- if ((cr = CRED()) == NULL ||
- s_cr->cr_zone->zone_id !=
- cr->cr_zone->zone_id)
- continue;
- }
- }
+ if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
+ continue;
if (now - state->dts_alive > dtrace_deadman_timeout) {
/*
@@ -5713,9 +5986,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
mstate.dtms_present |= DTRACE_MSTATE_EPID;
if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
- mstate.dtms_access = DTRACE_ACCESS_KERNEL;
- else
- mstate.dtms_access = 0;
+ mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
if (pred != NULL) {
dtrace_difo_t *dp = pred->dtp_difo;
@@ -5775,7 +6046,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
switch (act->dta_kind) {
case DTRACEACT_STOP:
- if (dtrace_priv_proc_destructive(state))
+ if (dtrace_priv_proc_destructive(state,
+ &mstate))
dtrace_action_stop();
continue;
@@ -5802,7 +6074,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_JSTACK:
case DTRACEACT_USTACK:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, &mstate))
continue;
/*
@@ -5835,6 +6107,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
}
+ /*
+ * Clear the string space, since there's no
+ * helper to do it for us.
+ */
+ if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
+ int depth = DTRACE_USTACK_NFRAMES(
+ rec->dtrd_arg);
+ size_t strsize = DTRACE_USTACK_STRSIZE(
+ rec->dtrd_arg);
+ uint64_t *buf = (uint64_t *)(tomax +
+ valoffs);
+ void *strspace = &buf[depth + 1];
+
+ dtrace_bzero(strspace,
+ MIN(depth, strsize));
+ }
+
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_getupcstack((uint64_t *)
(tomax + valoffs),
@@ -5888,7 +6177,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
case DTRACEACT_RAISE:
- if (dtrace_priv_proc_destructive(state))
+ if (dtrace_priv_proc_destructive(state,
+ &mstate))
dtrace_action_raise(val);
continue;
@@ -5915,6 +6205,11 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_PRINTA:
case DTRACEACT_SYSTEM:
case DTRACEACT_FREOPEN:
+ case DTRACEACT_TRACEMEM:
+ break;
+
+ case DTRACEACT_TRACEMEM_DYNSIZE:
+ tracememsize = val;
break;
case DTRACEACT_SYM:
@@ -5928,7 +6223,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_UADDR: {
struct pid *pid = curthread->t_procp->p_pidp;
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc(state, &mstate))
continue;
DTRACE_STORE(uint64_t, tomax,
@@ -5980,6 +6275,12 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
uintptr_t end = valoffs + size;
+ if (tracememsize != 0 &&
+ valoffs + tracememsize < end) {
+ end = valoffs + tracememsize;
+ tracememsize = 0;
+ }
+
if (!dtrace_vcanload((void *)(uintptr_t)val,
&dp->dtdo_rtype, &mstate, vstate))
continue;
@@ -6655,7 +6956,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
{
dtrace_probe_t template, *probe;
dtrace_hash_t *hash = NULL;
- int len, best = INT_MAX, nmatched = 0;
+ int len, rc, best = INT_MAX, nmatched = 0;
dtrace_id_t i;
ASSERT(MUTEX_HELD(&dtrace_lock));
@@ -6667,7 +6968,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
if (pkp->dtpk_id != DTRACE_IDNONE) {
if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
- (void) (*matched)(probe, arg);
+ if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
nmatched++;
}
return (nmatched);
@@ -6714,8 +7016,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
+ if ((rc = (*matched)(probe, arg)) !=
+ DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
break;
+ }
}
return (nmatched);
@@ -6734,8 +7040,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
+ if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
break;
+ }
}
return (nmatched);
@@ -6852,9 +7161,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
if ((priv & DTRACE_PRIV_KERNEL) &&
(priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
- pops->dtps_usermode == NULL) {
+ pops->dtps_mode == NULL) {
cmn_err(CE_WARN, "failed to register provider '%s': need "
- "dtps_usermode() op for given privilege attributes", name);
+ "dtps_mode() op for given privilege attributes", name);
return (EINVAL);
}
@@ -6951,11 +7260,11 @@ dtrace_unregister(dtrace_provider_id_t id)
{
dtrace_provider_t *old = (dtrace_provider_t *)id;
dtrace_provider_t *prev = NULL;
- int i, self = 0;
+ int i, self = 0, noreap = 0;
dtrace_probe_t *probe, *first = NULL;
if (old->dtpv_pops.dtps_enable ==
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
/*
* If DTrace itself is the provider, we're called with locks
* already held.
@@ -7008,14 +7317,31 @@ dtrace_unregister(dtrace_provider_id_t id)
continue;
/*
- * We have at least one ECB; we can't remove this provider.
+ * If we are trying to unregister a defunct provider, and the
+ * provider was made defunct within the interval dictated by
+ * dtrace_unregister_defunct_reap, we'll (asynchronously)
+ * attempt to reap our enablings. To denote that the provider
+ * should reattempt to unregister itself at some point in the
+ * future, we will return a differentiable error code (EAGAIN
+ * instead of EBUSY) in this case.
*/
+ if (dtrace_gethrtime() - old->dtpv_defunct >
+ dtrace_unregister_defunct_reap)
+ noreap = 1;
+
if (!self) {
mutex_exit(&dtrace_lock);
mutex_exit(&mod_lock);
mutex_exit(&dtrace_provider_lock);
}
- return (EBUSY);
+
+ if (noreap)
+ return (EBUSY);
+
+ (void) taskq_dispatch(dtrace_taskq,
+ (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
+
+ return (EAGAIN);
}
/*
@@ -7101,12 +7427,12 @@ dtrace_invalidate(dtrace_provider_id_t id)
dtrace_provider_t *pvp = (dtrace_provider_t *)id;
ASSERT(pvp->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
- pvp->dtpv_defunct = 1;
+ pvp->dtpv_defunct = dtrace_gethrtime();
mutex_exit(&dtrace_lock);
mutex_exit(&dtrace_provider_lock);
@@ -7142,7 +7468,7 @@ dtrace_condense(dtrace_provider_id_t id)
* Make sure this isn't the dtrace provider itself.
*/
ASSERT(prov->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
@@ -8103,7 +8429,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
break;
default:
- err += efunc(dp->dtdo_len - 1, "bad return size");
+ err += efunc(dp->dtdo_len - 1, "bad return size\n");
}
}
@@ -9096,7 +9422,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
return (ecb);
}
-static void
+static int
dtrace_ecb_enable(dtrace_ecb_t *ecb)
{
dtrace_probe_t *probe = ecb->dte_probe;
@@ -9109,7 +9435,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
/*
* This is the NULL probe -- there's nothing to do.
*/
- return;
+ return (0);
}
if (probe->dtpr_ecb == NULL) {
@@ -9123,8 +9449,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
if (ecb->dte_predicate != NULL)
probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
- prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg);
+ return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg));
} else {
/*
* This probe is already active. Swing the last pointer to
@@ -9137,6 +9463,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
probe->dtpr_predcache = 0;
dtrace_sync();
+ return (0);
}
}
@@ -9312,6 +9639,35 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
break;
}
+ case DTRACEAGG_LLQUANTIZE: {
+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
+ uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
+ uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
+ uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
+ int64_t v;
+
+ agg->dtag_initial = desc->dtad_arg;
+ agg->dtag_aggregate = dtrace_aggregate_llquantize;
+
+ if (factor < 2 || low >= high || nsteps < factor)
+ goto err;
+
+ /*
+ * Now check that the number of steps evenly divides a power
+ * of the factor. (This assures both integer bucket size and
+ * linearity within each magnitude.)
+ */
+ for (v = factor; v < nsteps; v *= factor)
+ continue;
+
+ if ((v % nsteps) || (nsteps % factor))
+ goto err;
+
+ size = (dtrace_aggregate_llquantize_bucket(factor,
+ low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
+ break;
+ }
+
case DTRACEAGG_AVG:
agg->dtag_aggregate = dtrace_aggregate_avg;
size = sizeof (uint64_t) * 2;
@@ -9481,12 +9837,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
case DTRACEACT_PRINTA:
case DTRACEACT_SYSTEM:
case DTRACEACT_FREOPEN:
+ case DTRACEACT_DIFEXPR:
/*
* We know that our arg is a string -- turn it into a
* format.
*/
if (arg == NULL) {
- ASSERT(desc->dtad_kind == DTRACEACT_PRINTA);
+ ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
+ desc->dtad_kind == DTRACEACT_DIFEXPR);
format = 0;
} else {
ASSERT(arg != NULL);
@@ -9497,7 +9855,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
/*FALLTHROUGH*/
case DTRACEACT_LIBACT:
- case DTRACEACT_DIFEXPR:
+ case DTRACEACT_TRACEMEM:
+ case DTRACEACT_TRACEMEM_DYNSIZE:
if (dp == NULL)
return (EINVAL);
@@ -9920,7 +10279,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
return (DTRACE_MATCH_DONE);
- dtrace_ecb_enable(ecb);
+ if (dtrace_ecb_enable(ecb) < 0)
+ return (DTRACE_MATCH_FAIL);
+
return (DTRACE_MATCH_NEXT);
}
@@ -9978,6 +10339,7 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
caddr_t tomax = buf->dtb_tomax;
caddr_t xamot = buf->dtb_xamot;
dtrace_icookie_t cookie;
+ hrtime_t now = dtrace_gethrtime();
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
@@ -9993,6 +10355,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
buf->dtb_drops = 0;
buf->dtb_errors = 0;
buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
+ buf->dtb_interval = now - buf->dtb_switched;
+ buf->dtb_switched = now;
dtrace_interrupt_enable(cookie);
}
@@ -10025,14 +10389,17 @@ dtrace_buffer_activate(dtrace_state_t *state)
static int
dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
- processorid_t cpu)
+ processorid_t cpu, int *factor)
{
cpu_t *cp;
dtrace_buffer_t *buf;
+ int allocated = 0, desired = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(MUTEX_HELD(&dtrace_lock));
+ *factor = 1;
+
if (size > dtrace_nonroot_maxsize &&
!PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
return (EFBIG);
@@ -10057,7 +10424,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
ASSERT(buf->dtb_xamot == NULL);
- if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
+ if ((buf->dtb_tomax = kmem_zalloc(size,
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL)
goto err;
buf->dtb_size = size;
@@ -10068,7 +10436,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
if (flags & DTRACEBUF_NOSWITCH)
continue;
- if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
+ if ((buf->dtb_xamot = kmem_zalloc(size,
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL)
goto err;
} while ((cp = cp->cpu_next) != cpu_list);
@@ -10082,16 +10451,19 @@ err:
continue;
buf = &bufs[cp->cpu_id];
+ desired += 2;
if (buf->dtb_xamot != NULL) {
ASSERT(buf->dtb_tomax != NULL);
ASSERT(buf->dtb_size == size);
kmem_free(buf->dtb_xamot, size);
+ allocated++;
}
if (buf->dtb_tomax != NULL) {
ASSERT(buf->dtb_size == size);
kmem_free(buf->dtb_tomax, size);
+ allocated++;
}
buf->dtb_tomax = NULL;
@@ -10099,6 +10471,8 @@ err:
buf->dtb_size = 0;
} while ((cp = cp->cpu_next) != cpu_list);
+ *factor = desired / (allocated > 0 ? allocated : 1);
+
return (ENOMEM);
}
@@ -10400,6 +10774,36 @@ dtrace_buffer_polish(dtrace_buffer_t *buf)
}
}
+/*
+ * This routine determines if data generated at the specified time has likely
+ * been entirely consumed at user-level. This routine is called to determine
+ * if an ECB on a defunct probe (but for an active enabling) can be safely
+ * disabled and destroyed.
+ */
+static int
+dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
+{
+ int i;
+
+ for (i = 0; i < NCPU; i++) {
+ dtrace_buffer_t *buf = &bufs[i];
+
+ if (buf->dtb_size == 0)
+ continue;
+
+ if (buf->dtb_flags & DTRACEBUF_RING)
+ return (0);
+
+ if (!buf->dtb_switched && buf->dtb_offset != 0)
+ return (0);
+
+ if (buf->dtb_switched - buf->dtb_interval < when)
+ return (0);
+ }
+
+ return (1);
+}
+
static void
dtrace_buffer_free(dtrace_buffer_t *bufs)
{
@@ -10557,6 +10961,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab)
ASSERT(enab->dten_vstate->dtvs_state != NULL);
ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
enab->dten_vstate->dtvs_state->dts_nretained--;
+ dtrace_retained_gen++;
}
if (enab->dten_prev == NULL) {
@@ -10599,6 +11004,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab)
return (ENOSPC);
state->dts_nretained++;
+ dtrace_retained_gen++;
if (dtrace_retained == NULL) {
dtrace_retained = enab;
@@ -10713,7 +11119,7 @@ static int
dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
{
int i = 0;
- int matched = 0;
+ int total_matched = 0, matched = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(MUTEX_HELD(&dtrace_lock));
@@ -10724,7 +11130,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
enab->dten_current = ep;
enab->dten_error = 0;
- matched += dtrace_probe_enable(&ep->dted_probe, enab);
+ /*
+ * If a provider failed to enable a probe then get out and
+ * let the consumer know we failed.
+ */
+ if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
+ return (EBUSY);
+
+ total_matched += matched;
if (enab->dten_error != 0) {
/*
@@ -10752,7 +11165,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
enab->dten_probegen = dtrace_probegen;
if (nmatched != NULL)
- *nmatched = matched;
+ *nmatched = total_matched;
return (0);
}
@@ -10766,13 +11179,24 @@ dtrace_enabling_matchall(void)
mutex_enter(&dtrace_lock);
/*
- * Because we can be called after dtrace_detach() has been called, we
- * cannot assert that there are retained enablings. We can safely
- * load from dtrace_retained, however: the taskq_destroy() at the
- * end of dtrace_detach() will block pending our completion.
+ * Iterate over all retained enablings to see if any probes match
+ * against them. We only perform this operation on enablings for which
+ * we have sufficient permissions by virtue of being in the global zone
+ * or in the same zone as the DTrace client. Because we can be called
+ * after dtrace_detach() has been called, we cannot assert that there
+ * are retained enablings. We can safely load from dtrace_retained,
+ * however: the taskq_destroy() at the end of dtrace_detach() will
+ * block pending our completion.
*/
- for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next)
- (void) dtrace_enabling_match(enab, NULL);
+ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
+ dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
+ cred_t *cr = dcr->dcr_cred;
+ zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
+
+ if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
+ (zone == GLOBAL_ZONEID || getzoneid() == zone)))
+ (void) dtrace_enabling_match(enab, NULL);
+ }
mutex_exit(&dtrace_lock);
mutex_exit(&cpu_lock);
@@ -10830,6 +11254,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
{
int i, all = 0;
dtrace_probedesc_t desc;
+ dtrace_genid_t gen;
ASSERT(MUTEX_HELD(&dtrace_lock));
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
@@ -10840,15 +11265,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
}
do {
- dtrace_enabling_t *enab = dtrace_retained;
+ dtrace_enabling_t *enab;
void *parg = prv->dtpv_arg;
- for (; enab != NULL; enab = enab->dten_next) {
+retry:
+ gen = dtrace_retained_gen;
+ for (enab = dtrace_retained; enab != NULL;
+ enab = enab->dten_next) {
for (i = 0; i < enab->dten_ndesc; i++) {
desc = enab->dten_desc[i]->dted_probe;
mutex_exit(&dtrace_lock);
prv->dtpv_pops.dtps_provide(parg, &desc);
mutex_enter(&dtrace_lock);
+ /*
+ * Process the retained enablings again if
+ * they have changed while we weren't holding
+ * dtrace_lock.
+ */
+ if (gen != dtrace_retained_gen)
+ goto retry;
}
}
} while (all && (prv = prv->dtpv_next) != NULL);
@@ -10859,6 +11294,85 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
}
/*
+ * Called to reap ECBs that are attached to probes from defunct providers.
+ */
+static void
+dtrace_enabling_reap(void)
+{
+ dtrace_provider_t *prov;
+ dtrace_probe_t *probe;
+ dtrace_ecb_t *ecb;
+ hrtime_t when;
+ int i;
+
+ mutex_enter(&cpu_lock);
+ mutex_enter(&dtrace_lock);
+
+ for (i = 0; i < dtrace_nprobes; i++) {
+ if ((probe = dtrace_probes[i]) == NULL)
+ continue;
+
+ if (probe->dtpr_ecb == NULL)
+ continue;
+
+ prov = probe->dtpr_provider;
+
+ if ((when = prov->dtpv_defunct) == 0)
+ continue;
+
+ /*
+ * We have ECBs on a defunct provider: we want to reap these
+ * ECBs to allow the provider to unregister. The destruction
+ * of these ECBs must be done carefully: if we destroy the ECB
+ * and the consumer later wishes to consume an EPID that
+ * corresponds to the destroyed ECB (and if the EPID metadata
+ * has not been previously consumed), the consumer will abort
+ * processing on the unknown EPID. To reduce (but not, sadly,
+ * eliminate) the possibility of this, we will only destroy an
+ * ECB for a defunct provider if, for the state that
+ * corresponds to the ECB:
+ *
+ * (a) There is no speculative tracing (which can effectively
+ * cache an EPID for an arbitrary amount of time).
+ *
+ * (b) The principal buffers have been switched twice since the
+ * provider became defunct.
+ *
+ * (c) The aggregation buffers are of zero size or have been
+ * switched twice since the provider became defunct.
+ *
+ * We use dts_speculates to determine (a) and call a function
+ * (dtrace_buffer_consumed()) to determine (b) and (c). Note
+ * that as soon as we've been unable to destroy one of the ECBs
+ * associated with the probe, we quit trying -- reaping is only
+ * fruitful in as much as we can destroy all ECBs associated
+ * with the defunct provider's probes.
+ */
+ while ((ecb = probe->dtpr_ecb) != NULL) {
+ dtrace_state_t *state = ecb->dte_state;
+ dtrace_buffer_t *buf = state->dts_buffer;
+ dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
+
+ if (state->dts_speculates)
+ break;
+
+ if (!dtrace_buffer_consumed(buf, when))
+ break;
+
+ if (!dtrace_buffer_consumed(aggbuf, when))
+ break;
+
+ dtrace_ecb_disable(ecb);
+ ASSERT(probe->dtpr_ecb != ecb);
+ dtrace_ecb_destroy(ecb);
+ }
+ }
+
+ mutex_exit(&dtrace_lock);
+ mutex_exit(&cpu_lock);
+}
+
+/*
* DTrace DOF Functions
*/
/*ARGSUSED*/
@@ -10970,7 +11484,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp)
dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
- if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) {
+ if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
+ dof->dofh_loadsz != hdr.dofh_loadsz) {
kmem_free(dof, hdr.dofh_loadsz);
*errp = EFAULT;
return (NULL);
@@ -11362,15 +11877,20 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
(uintptr_t)sec->dofs_offset + offs);
kind = (dtrace_actkind_t)desc->dofa_kind;
- if (DTRACEACT_ISPRINTFLIKE(kind) &&
+ if ((DTRACEACT_ISPRINTFLIKE(kind) &&
(kind != DTRACEACT_PRINTA ||
+ desc->dofa_strtab != DOF_SECIDX_NONE)) ||
+ (kind == DTRACEACT_DIFEXPR &&
desc->dofa_strtab != DOF_SECIDX_NONE)) {
dof_sec_t *strtab;
char *str, *fmt;
uint64_t i;
/*
- * printf()-like actions must have a format string.
+ * The argument to these actions is an index into the
+ * DOF string table. For printf()-like actions, this
+ * is the format string. For print(), this is the
+ * CTF type of the expression result.
*/
if ((strtab = dtrace_dof_sect(dof,
DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
@@ -11698,6 +12218,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
}
}
+ if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
+ !(sec->dofs_flags & DOF_SECF_LOAD)) {
+ dtrace_dof_error(dof, "loadable section with load "
+ "flag unset");
+ return (-1);
+ }
+
if (!(sec->dofs_flags & DOF_SECF_LOAD))
continue; /* just ignore non-loadable sections */
@@ -11849,7 +12376,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
size = min;
- if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
+ if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
return (ENOMEM);
dstate->dtds_size = size;
@@ -12211,7 +12738,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
{
dtrace_optval_t *opt = state->dts_options, size;
processorid_t cpu;
- int flags = 0, rval;
+ int flags = 0, rval, factor, divisor = 1;
ASSERT(MUTEX_HELD(&dtrace_lock));
ASSERT(MUTEX_HELD(&cpu_lock));
@@ -12241,7 +12768,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
flags |= DTRACEBUF_INACTIVE;
}
- for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) {
+ for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
/*
* The size must be 8-byte aligned. If the size is not 8-byte
* aligned, drop it down by the difference.
@@ -12259,7 +12786,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
return (E2BIG);
}
- rval = dtrace_buffer_alloc(buf, size, flags, cpu);
+ rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
if (rval != ENOMEM) {
opt[which] = size;
@@ -12268,6 +12795,9 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
return (rval);
+
+ for (divisor = 2; divisor < factor; divisor <<= 1)
+ continue;
}
return (ENOMEM);
@@ -12367,7 +12897,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
goto out;
}
- spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP);
+ spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
+ KM_NOSLEEP | KM_NORMALPRI);
if (spec == NULL) {
rval = ENOMEM;
@@ -12378,7 +12909,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
state->dts_nspeculations = (int)nspec;
for (i = 0; i < nspec; i++) {
- if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) {
+ if ((buf = kmem_zalloc(bufsize,
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
rval = ENOMEM;
goto err;
}
@@ -14390,7 +14922,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
* If this wasn't an open with the "helper" minor, then it must be
* the "dtrace" minor.
*/
- ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE);
+ if (getminor(*devp) != DTRACEMNRN_DTRACE)
+ return (ENXIO);
/*
* If no DTRACE_PRIV_* bits are set in the credential, then the
@@ -14427,7 +14960,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
mutex_exit(&cpu_lock);
if (state == NULL) {
- if (--dtrace_opens == 0)
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
mutex_exit(&dtrace_lock);
return (EAGAIN);
@@ -14463,7 +14996,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
dtrace_state_destroy(state);
ASSERT(dtrace_opens > 0);
- if (--dtrace_opens == 0)
+
+ /*
+ * Only relinquish control of the kernel debugger interface when there
+ * are no consumers and no anonymous enablings.
+ */
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
mutex_exit(&dtrace_lock);
@@ -15458,7 +15996,8 @@ static struct dev_ops dtrace_ops = {
nodev, /* reset */
&dtrace_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
static struct modldrv modldrv = {