diff options
Diffstat (limited to 'uts/common/dtrace/dtrace.c')
| -rw-r--r-- | uts/common/dtrace/dtrace.c | 863 |
1 files changed, 701 insertions, 162 deletions
diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index c721386280f8..0c5e4b3a011a 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * DTrace - Dynamic Tracing for Solaris * @@ -121,7 +119,7 @@ dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); size_t dtrace_global_maxsize = (16 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; -dtrace_optval_t dtrace_helper_actions_max = 32; +dtrace_optval_t dtrace_helper_actions_max = 1024; dtrace_optval_t dtrace_helper_providers_max = 32; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; @@ -146,6 +144,7 @@ int dtrace_err_verbose; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; +hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; /* * DTrace External Variables @@ -186,7 +185,9 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ +static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ +static int dtrace_dynvar_failclean; /* dynvars failed to clean */ /* * DTrace Locking @@ -240,10 +241,16 @@ static void dtrace_nullop(void) {} +static int +dtrace_enable_nullop(void) +{ + return (0); +} + static dtrace_pops_t dtrace_provider_ops = { (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, (void (*)(void *, struct modctl *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, @@ -427,6 +434,7 @@ dtrace_load##bits(uintptr_t addr) \ #define DTRACE_DYNHASH_SINK 1 #define DTRACE_DYNHASH_VALID 2 +#define DTRACE_MATCH_FAIL -1 #define DTRACE_MATCH_NEXT 0 #define DTRACE_MATCH_DONE 1 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') @@ -453,11 +461,13 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); static int dtrace_enabling_match(dtrace_enabling_t *, int *); static void dtrace_enabling_matchall(void); +static void dtrace_enabling_reap(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, dtrace_state_t *, uint64_t, uint64_t); static dtrace_helpers_t *dtrace_helpers_create(proc_t *); static void dtrace_buffer_drop(dtrace_buffer_t *); +static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, @@ -1098,10 +1108,13 @@ dtrace_priv_proc_common_nocd() } static int -dtrace_priv_proc_destructive(dtrace_state_t *state) +dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate) { int action = state->dts_cred.dcr_action; + if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) + goto bad; + if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) goto bad; @@ -1123,15 +1136,17 @@ bad: } static int -dtrace_priv_proc_control(dtrace_state_t *state) +dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) { - if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) - return (1); + if (mstate->dtms_access & DTRACE_ACCESS_PROC) { + if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) + return (1); - if (dtrace_priv_proc_common_zone(state) && - dtrace_priv_proc_common_user(state) && - dtrace_priv_proc_common_nocd()) - return (1); + if (dtrace_priv_proc_common_zone(state) && + dtrace_priv_proc_common_user(state) && + dtrace_priv_proc_common_nocd()) + return (1); + } cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1139,9 +1154,10 @@ dtrace_priv_proc_control(dtrace_state_t *state) } static int -dtrace_priv_proc(dtrace_state_t *state) +dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate) { - if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) + if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && + (state->dts_cred.dcr_action & DTRACE_CRA_PROC)) return (1); cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1172,6 +1188,109 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) } /* + * Determine if the dte_cond of the specified ECB allows for processing of + * the current probe to continue. Note that this routine may allow continued + * processing, but with access(es) stripped from the mstate's dtms_access + * field. + */ +static int +dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, + dtrace_ecb_t *ecb) +{ + dtrace_probe_t *probe = ecb->dte_probe; + dtrace_provider_t *prov = probe->dtpr_provider; + dtrace_pops_t *pops = &prov->dtpv_pops; + int mode = DTRACE_MODE_NOPRIV_DROP; + + ASSERT(ecb->dte_cond); + + if (pops->dtps_mode != NULL) { + mode = pops->dtps_mode(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg); + + ASSERT((mode & DTRACE_MODE_USER) || + (mode & DTRACE_MODE_KERNEL)); + ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || + (mode & DTRACE_MODE_NOPRIV_DROP)); + } + + /* + * If the dte_cond bits indicate that this consumer is only allowed to + * see user-mode firings of this probe, call the provider's dtps_mode() + * entry point to check that the probe was fired while in a user + * context. If that's not the case, use the policy specified by the + * provider to determine if we drop the probe or merely restrict + * operation. + */ + if (ecb->dte_cond & DTRACE_COND_USERMODE) { + ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); + + if (!(mode & DTRACE_MODE_USER)) { + if (mode & DTRACE_MODE_NOPRIV_DROP) + return (0); + + mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; + } + } + + /* + * This is more subtle than it looks. We have to be absolutely certain + * that CRED() isn't going to change out from under us so it's only + * legit to examine that structure if we're in constrained situations. + * Currently, the only times we'll this check is if a non-super-user + * has enabled the profile or syscall providers -- providers that + * allow visibility of all processes. For the profile case, the check + * above will ensure that we're examining a user context. + */ + if (ecb->dte_cond & DTRACE_COND_OWNER) { + cred_t *cr; + cred_t *s_cr = state->dts_cred.dcr_cred; + proc_t *proc; + + ASSERT(s_cr != NULL); + + if ((cr = CRED()) == NULL || + s_cr->cr_uid != cr->cr_uid || + s_cr->cr_uid != cr->cr_ruid || + s_cr->cr_uid != cr->cr_suid || + s_cr->cr_gid != cr->cr_gid || + s_cr->cr_gid != cr->cr_rgid || + s_cr->cr_gid != cr->cr_sgid || + (proc = ttoproc(curthread)) == NULL || + (proc->p_flag & SNOCD)) { + if (mode & DTRACE_MODE_NOPRIV_DROP) + return (0); + + mstate->dtms_access &= ~DTRACE_ACCESS_PROC; + } + } + + /* + * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not + * in our zone, check to see if our mode policy is to restrict rather + * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC + * and DTRACE_ACCESS_ARGS + */ + if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { + cred_t *cr; + cred_t *s_cr = state->dts_cred.dcr_cred; + + ASSERT(s_cr != NULL); + + if ((cr = CRED()) == NULL || + s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { + if (mode & DTRACE_MODE_NOPRIV_DROP) + return (0); + + mstate->dtms_access &= + ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); + } + } + + return (1); +} + +/* * Note: not called from probe context. This function is called * asynchronously (and at a regular interval) from outside of probe context to * clean the dirty dynamic variable lists on all CPUs. Dynamic variable @@ -1182,12 +1301,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; dtrace_dstate_percpu_t *dcpu; - int i, work = 0; + dtrace_dynvar_t **rinsep; + int i, j, work = 0; for (i = 0; i < NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; - - ASSERT(dcpu->dtdsc_rinsing == NULL); + rinsep = &dcpu->dtdsc_rinsing; /* * If the dirty list is NULL, there is no dirty work to do. @@ -1195,14 +1314,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) if (dcpu->dtdsc_dirty == NULL) continue; - /* - * If the clean list is non-NULL, then we're not going to do - * any work for this CPU -- it means that there has not been - * a dtrace_dynvar() allocation on this CPU (or from this CPU) - * since the last time we cleaned house. - */ - if (dcpu->dtdsc_clean != NULL) + if (dcpu->dtdsc_rinsing != NULL) { + /* + * If the rinsing list is non-NULL, then it is because + * this CPU was selected to accept another CPU's + * dirty list -- and since that time, dirty buffers + * have accumulated. This is a highly unlikely + * condition, but we choose to ignore the dirty + * buffers -- they'll be picked up a future cleanse. + */ continue; + } + + if (dcpu->dtdsc_clean != NULL) { + /* + * If the clean list is non-NULL, then we're in a + * situation where a CPU has done deallocations (we + * have a non-NULL dirty list) but no allocations (we + * also have a non-NULL clean list). We can't simply + * move the dirty list into the clean list on this + * CPU, yet we also don't want to allow this condition + * to persist, lest a short clean list prevent a + * massive dirty list from being cleaned (which in + * turn could lead to otherwise avoidable dynamic + * drops). To deal with this, we look for some CPU + * with a NULL clean list, NULL dirty list, and NULL + * rinsing list -- and then we borrow this CPU to + * rinse our dirty list. + */ + for (j = 0; j < NCPU; j++) { + dtrace_dstate_percpu_t *rinser; + + rinser = &dstate->dtds_percpu[j]; + + if (rinser->dtdsc_rinsing != NULL) + continue; + + if (rinser->dtdsc_dirty != NULL) + continue; + + if (rinser->dtdsc_clean != NULL) + continue; + + rinsep = &rinser->dtdsc_rinsing; + break; + } + + if (j == NCPU) { + /* + * We were unable to find another CPU that + * could accept this dirty list -- we are + * therefore unable to clean it now. + */ + dtrace_dynvar_failclean++; + continue; + } + } work = 1; @@ -1219,7 +1386,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * on a hash chain, either the dirty list or the * rinsing list for some CPU must be non-NULL.) */ - dcpu->dtdsc_rinsing = dirty; + *rinsep = dirty; dtrace_membar_producer(); } while (dtrace_casptr(&dcpu->dtdsc_dirty, dirty, NULL) != dirty); @@ -1650,7 +1817,7 @@ retry: ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); /* - * Now we'll move the clean list to the free list. + * Now we'll move the clean list to our free list. * It's impossible for this to fail: the only way * the free list can be updated is through this * code path, and only one CPU can own the clean list. @@ -1663,6 +1830,7 @@ retry: * owners of the clean lists out before resetting * the clean lists. */ + dcpu = &dstate->dtds_percpu[me]; rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); ASSERT(rval == NULL); goto retry; @@ -1804,6 +1972,75 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) lquanta[levels + 1] += incr; } +static int +dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, + uint16_t high, uint16_t nsteps, int64_t value) +{ + int64_t this = 1, last, next; + int base = 1, order; + + ASSERT(factor <= nsteps); + ASSERT(nsteps % factor == 0); + + for (order = 0; order < low; order++) + this *= factor; + + /* + * If our value is less than our factor taken to the power of the + * low order of magnitude, it goes into the zeroth bucket. + */ + if (value < (last = this)) + return (0); + + for (this *= factor; order <= high; order++) { + int nbuckets = this > nsteps ? nsteps : this; + + if ((next = this * factor) < this) { + /* + * We should not generally get log/linear quantizations + * with a high magnitude that allows 64-bits to + * overflow, but we nonetheless protect against this + * by explicitly checking for overflow, and clamping + * our value accordingly. + */ + value = this - 1; + } + + if (value < this) { + /* + * If our value lies within this order of magnitude, + * determine its position by taking the offset within + * the order of magnitude, dividing by the bucket + * width, and adding to our (accumulated) base. + */ + return (base + (value - last) / (this / nbuckets)); + } + + base += nbuckets - (nbuckets / factor); + last = this; + this = next; + } + + /* + * Our value is greater than or equal to our factor taken to the + * power of one plus the high magnitude -- return the top bucket. + */ + return (base); +} + +static void +dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) +{ + uint64_t arg = *llquanta++; + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + + llquanta[dtrace_aggregate_llquantize_bucket(factor, + low, high, nsteps, nval)] += incr; +} + /*ARGSUSED*/ static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) @@ -2585,6 +2822,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, switch (v) { case DIF_VAR_ARGS: + if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) { + cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= + CPU_DTRACE_KPRIV; + return (0); + } + ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { @@ -2620,7 +2863,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_UREGS: { klwp_t *lwp; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); if ((lwp = curthread->t_lwp) == NULL) { @@ -2632,6 +2875,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(lwp->lwp_regs, ndx)); } + case DIF_VAR_VMREGS: { + uint64_t rval; + + if (!dtrace_priv_kernel(state)) + return (0); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + + rval = dtrace_getvmreg(ndx, + &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); + + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (rval); + } + case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); @@ -2684,7 +2943,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_stackdepth); case DIF_VAR_USTACKDEPTH: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { /* @@ -2739,7 +2998,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_caller); case DIF_VAR_UCALLER: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { @@ -2787,7 +3046,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_PID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2809,7 +3068,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_pidp->pid_id); case DIF_VAR_PPID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2836,7 +3095,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_tid); case DIF_VAR_EXECNAME: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2856,7 +3115,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2876,7 +3135,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_UID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2897,7 +3156,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_cred->cr_uid); case DIF_VAR_GID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -2919,7 +3178,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_ERRNO: { klwp_t *lwp; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); /* @@ -3259,7 +3518,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && + dtrace_priv_proc_control(state, mstate) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyout(kaddr, uaddr, size, flags); @@ -3274,7 +3533,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && + dtrace_priv_proc_control(state, mstate) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyoutstr(kaddr, uaddr, size, flags); @@ -3600,7 +3859,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int64_t index = (int64_t)tupregs[1].dttk_value; int64_t remaining = (int64_t)tupregs[2].dttk_value; size_t len = dtrace_strlen((char *)s, size); - int64_t i = 0; + int64_t i; if (!dtrace_canload(s, len + 1, mstate, vstate)) { regs[rd] = NULL; @@ -3645,7 +3904,54 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - case DIF_SUBR_GETMAJOR: + case DIF_SUBR_TOUPPER: + case DIF_SUBR_TOLOWER: { + uintptr_t s = tupregs[0].dttk_value; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + char *dest = (char *)mstate->dtms_scratch_ptr, c; + size_t len = dtrace_strlen((char *)s, size); + char lower, upper, convert; + int64_t i; + + if (subr == DIF_SUBR_TOUPPER) { + lower = 'a'; + upper = 'z'; + convert = 'A'; + } else { + lower = 'A'; + upper = 'Z'; + convert = 'a'; + } + + if (!dtrace_canload(s, len + 1, mstate, vstate)) { + regs[rd] = NULL; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = NULL; + break; + } + + for (i = 0; i < size - 1; i++) { + if ((c = dtrace_load8(s + i)) == '\0') + break; + + if (c >= lower && c <= upper) + c = convert + (c - lower); + + dest[i] = c; + } + + ASSERT(i < size); + dest[i] = '\0'; + regs[rd] = (uintptr_t)dest; + mstate->dtms_scratch_ptr += size; + break; + } + +case DIF_SUBR_GETMAJOR: #ifdef _LP64 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; #else @@ -3907,9 +4213,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; - int64_t val = i < 0 ? i * -1 : i; - uint64_t size = 22; /* enough room for 2^64 in decimal */ + uint64_t val, digit; + uint64_t size = 65; /* enough room for 2^64 in binary */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } + + val = (base == 10 && i < 0) ? i * -1 : i; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -3917,13 +4234,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - for (*end-- = '\0'; val; val /= 10) - *end-- = '0' + (val % 10); + for (*end-- = '\0'; val; val /= base) { + if ((digit = val % base) <= '9' - '0') { + *end-- = '0' + digit; + } else { + *end-- = 'a' + (digit - ('9' - '0') - 1); + } + } + + if (i == 0 && base == 16) + *end-- = '0'; + + if (base == 16) + *end-- = 'x'; - if (i == 0) + if (i == 0 || base == 8 || base == 16) *end-- = '0'; - if (i < 0) + if (i < 0 && base == 10) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; @@ -5558,6 +5886,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; + uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; @@ -5578,6 +5907,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, #endif mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; + mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC; *flags &= ~CPU_DTRACE_ERROR; if (prov == dtrace_provider) { @@ -5615,65 +5945,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, } } - if (ecb->dte_cond) { - /* - * If the dte_cond bits indicate that this - * consumer is only allowed to see user-mode firings - * of this probe, call the provider's dtps_usermode() - * entry point to check that the probe was fired - * while in a user context. Skip this ECB if that's - * not the case. - */ - if ((ecb->dte_cond & DTRACE_COND_USERMODE) && - prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg) == 0) - continue; - - /* - * This is more subtle than it looks. We have to be - * absolutely certain that CRED() isn't going to - * change out from under us so it's only legit to - * examine that structure if we're in constrained - * situations. Currently, the only times we'll this - * check is if a non-super-user has enabled the - * profile or syscall providers -- providers that - * allow visibility of all processes. For the - * profile case, the check above will ensure that - * we're examining a user context. - */ - if (ecb->dte_cond & DTRACE_COND_OWNER) { - cred_t *cr; - cred_t *s_cr = - ecb->dte_state->dts_cred.dcr_cred; - proc_t *proc; - - ASSERT(s_cr != NULL); - - if ((cr = CRED()) == NULL || - s_cr->cr_uid != cr->cr_uid || - s_cr->cr_uid != cr->cr_ruid || - s_cr->cr_uid != cr->cr_suid || - s_cr->cr_gid != cr->cr_gid || - s_cr->cr_gid != cr->cr_rgid || - s_cr->cr_gid != cr->cr_sgid || - (proc = ttoproc(curthread)) == NULL || - (proc->p_flag & SNOCD)) - continue; - } - - if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { - cred_t *cr; - cred_t *s_cr = - ecb->dte_state->dts_cred.dcr_cred; - - ASSERT(s_cr != NULL); - - if ((cr = CRED()) == NULL || - s_cr->cr_zone->zone_id != - cr->cr_zone->zone_id) - continue; - } - } + if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb)) + continue; if (now - state->dts_alive > dtrace_deadman_timeout) { /* @@ -5713,9 +5986,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, mstate.dtms_present |= DTRACE_MSTATE_EPID; if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) - mstate.dtms_access = DTRACE_ACCESS_KERNEL; - else - mstate.dtms_access = 0; + mstate.dtms_access |= DTRACE_ACCESS_KERNEL; if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; @@ -5775,7 +6046,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, switch (act->dta_kind) { case DTRACEACT_STOP: - if (dtrace_priv_proc_destructive(state)) + if (dtrace_priv_proc_destructive(state, + &mstate)) dtrace_action_stop(); continue; @@ -5802,7 +6074,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_JSTACK: case DTRACEACT_USTACK: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, &mstate)) continue; /* @@ -5835,6 +6107,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, continue; } + /* + * Clear the string space, since there's no + * helper to do it for us. + */ + if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) { + int depth = DTRACE_USTACK_NFRAMES( + rec->dtrd_arg); + size_t strsize = DTRACE_USTACK_STRSIZE( + rec->dtrd_arg); + uint64_t *buf = (uint64_t *)(tomax + + valoffs); + void *strspace = &buf[depth + 1]; + + dtrace_bzero(strspace, + MIN(depth, strsize)); + } + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack((uint64_t *) (tomax + valoffs), @@ -5888,7 +6177,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, continue; case DTRACEACT_RAISE: - if (dtrace_priv_proc_destructive(state)) + if (dtrace_priv_proc_destructive(state, + &mstate)) dtrace_action_raise(val); continue; @@ -5915,6 +6205,11 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_TRACEMEM: + break; + + case DTRACEACT_TRACEMEM_DYNSIZE: + tracememsize = val; break; case DTRACEACT_SYM: @@ -5928,7 +6223,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_UADDR: { struct pid *pid = curthread->t_procp->p_pidp; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc(state, &mstate)) continue; DTRACE_STORE(uint64_t, tomax, @@ -5980,6 +6275,12 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { uintptr_t end = valoffs + size; + if (tracememsize != 0 && + valoffs + tracememsize < end) { + end = valoffs + tracememsize; + tracememsize = 0; + } + if (!dtrace_vcanload((void *)(uintptr_t)val, &dp->dtdo_rtype, &mstate, vstate)) continue; @@ -6655,7 +6956,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, { dtrace_probe_t template, *probe; dtrace_hash_t *hash = NULL; - int len, best = INT_MAX, nmatched = 0; + int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -6667,7 +6968,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - (void) (*matched)(probe, arg); + if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); @@ -6714,8 +7016,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) + if ((rc = (*matched)(probe, arg)) != + DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); break; + } } return (nmatched); @@ -6734,8 +7040,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) + if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); break; + } } return (nmatched); @@ -6852,9 +7161,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, if ((priv & DTRACE_PRIV_KERNEL) && (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && - pops->dtps_usermode == NULL) { + pops->dtps_mode == NULL) { cmn_err(CE_WARN, "failed to register provider '%s': need " - "dtps_usermode() op for given privilege attributes", name); + "dtps_mode() op for given privilege attributes", name); return (EINVAL); } @@ -6951,11 +7260,11 @@ dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; - int i, self = 0; + int i, self = 0, noreap = 0; dtrace_probe_t *probe, *first = NULL; if (old->dtpv_pops.dtps_enable == - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { /* * If DTrace itself is the provider, we're called with locks * already held. @@ -7008,14 +7317,31 @@ dtrace_unregister(dtrace_provider_id_t id) continue; /* - * We have at least one ECB; we can't remove this provider. + * If we are trying to unregister a defunct provider, and the + * provider was made defunct within the interval dictated by + * dtrace_unregister_defunct_reap, we'll (asynchronously) + * attempt to reap our enablings. To denote that the provider + * should reattempt to unregister itself at some point in the + * future, we will return a differentiable error code (EAGAIN + * instead of EBUSY) in this case. */ + if (dtrace_gethrtime() - old->dtpv_defunct > + dtrace_unregister_defunct_reap) + noreap = 1; + if (!self) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); } - return (EBUSY); + + if (noreap) + return (EBUSY); + + (void) taskq_dispatch(dtrace_taskq, + (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); + + return (EAGAIN); } /* @@ -7101,12 +7427,12 @@ dtrace_invalidate(dtrace_provider_id_t id) dtrace_provider_t *pvp = (dtrace_provider_t *)id; ASSERT(pvp->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); - pvp->dtpv_defunct = 1; + pvp->dtpv_defunct = dtrace_gethrtime(); mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); @@ -7142,7 +7468,7 @@ dtrace_condense(dtrace_provider_id_t id) * Make sure this isn't the dtrace provider itself. */ ASSERT(prov->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); @@ -8103,7 +8429,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; default: - err += efunc(dp->dtdo_len - 1, "bad return size"); + err += efunc(dp->dtdo_len - 1, "bad return size\n"); } } @@ -9096,7 +9422,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) return (ecb); } -static void +static int dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; @@ -9109,7 +9435,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) /* * This is the NULL probe -- there's nothing to do. */ - return; + return (0); } if (probe->dtpr_ecb == NULL) { @@ -9123,8 +9449,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) if (ecb->dte_predicate != NULL) probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; - prov->dtpv_pops.dtps_enable(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg); + return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg)); } else { /* * This probe is already active. Swing the last pointer to @@ -9137,6 +9463,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) probe->dtpr_predcache = 0; dtrace_sync(); + return (0); } } @@ -9312,6 +9639,35 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; } + case DTRACEAGG_LLQUANTIZE: { + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); + int64_t v; + + agg->dtag_initial = desc->dtad_arg; + agg->dtag_aggregate = dtrace_aggregate_llquantize; + + if (factor < 2 || low >= high || nsteps < factor) + goto err; + + /* + * Now check that the number of steps evenly divides a power + * of the factor. (This assures both integer bucket size and + * linearity within each magnitude.) + */ + for (v = factor; v < nsteps; v *= factor) + continue; + + if ((v % nsteps) || (nsteps % factor)) + goto err; + + size = (dtrace_aggregate_llquantize_bucket(factor, + low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); + break; + } + case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; @@ -9481,12 +9837,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ if (arg == NULL) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || + desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { ASSERT(arg != NULL); @@ -9497,7 +9855,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) /*FALLTHROUGH*/ case DTRACEACT_LIBACT: - case DTRACEACT_DIFEXPR: + case DTRACEACT_TRACEMEM: + case DTRACEACT_TRACEMEM_DYNSIZE: if (dp == NULL) return (EINVAL); @@ -9920,7 +10279,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) return (DTRACE_MATCH_DONE); - dtrace_ecb_enable(ecb); + if (dtrace_ecb_enable(ecb) < 0) + return (DTRACE_MATCH_FAIL); + return (DTRACE_MATCH_NEXT); } @@ -9978,6 +10339,7 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) caddr_t tomax = buf->dtb_tomax; caddr_t xamot = buf->dtb_xamot; dtrace_icookie_t cookie; + hrtime_t now = dtrace_gethrtime(); ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); @@ -9993,6 +10355,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_drops = 0; buf->dtb_errors = 0; buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); + buf->dtb_interval = now - buf->dtb_switched; + buf->dtb_switched = now; dtrace_interrupt_enable(cookie); } @@ -10025,14 +10389,17 @@ dtrace_buffer_activate(dtrace_state_t *state) static int dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, - processorid_t cpu) + processorid_t cpu, int *factor) { cpu_t *cp; dtrace_buffer_t *buf; + int allocated = 0, desired = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); + *factor = 1; + if (size > dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); @@ -10057,7 +10424,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, ASSERT(buf->dtb_xamot == NULL); - if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) + if ((buf->dtb_tomax = kmem_zalloc(size, + KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; buf->dtb_size = size; @@ -10068,7 +10436,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, if (flags & DTRACEBUF_NOSWITCH) continue; - if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) + if ((buf->dtb_xamot = kmem_zalloc(size, + KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; } while ((cp = cp->cpu_next) != cpu_list); @@ -10082,16 +10451,19 @@ err: continue; buf = &bufs[cp->cpu_id]; + desired += 2; if (buf->dtb_xamot != NULL) { ASSERT(buf->dtb_tomax != NULL); ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_xamot, size); + allocated++; } if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_tomax, size); + allocated++; } buf->dtb_tomax = NULL; @@ -10099,6 +10471,8 @@ err: buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); + *factor = desired / (allocated > 0 ? allocated : 1); + return (ENOMEM); } @@ -10400,6 +10774,36 @@ dtrace_buffer_polish(dtrace_buffer_t *buf) } } +/* + * This routine determines if data generated at the specified time has likely + * been entirely consumed at user-level. This routine is called to determine + * if an ECB on a defunct probe (but for an active enabling) can be safely + * disabled and destroyed. + */ +static int +dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) +{ + int i; + + for (i = 0; i < NCPU; i++) { + dtrace_buffer_t *buf = &bufs[i]; + + if (buf->dtb_size == 0) + continue; + + if (buf->dtb_flags & DTRACEBUF_RING) + return (0); + + if (!buf->dtb_switched && buf->dtb_offset != 0) + return (0); + + if (buf->dtb_switched - buf->dtb_interval < when) + return (0); + } + + return (1); +} + static void dtrace_buffer_free(dtrace_buffer_t *bufs) { @@ -10557,6 +10961,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; + dtrace_retained_gen++; } if (enab->dten_prev == NULL) { @@ -10599,6 +11004,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) return (ENOSPC); state->dts_nretained++; + dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; @@ -10713,7 +11119,7 @@ static int dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) { int i = 0; - int matched = 0; + int total_matched = 0, matched = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -10724,7 +11130,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; - matched += dtrace_probe_enable(&ep->dted_probe, enab); + /* + * If a provider failed to enable a probe then get out and + * let the consumer know we failed. + */ + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) + return (EBUSY); + + total_matched += matched; if (enab->dten_error != 0) { /* @@ -10752,7 +11165,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) - *nmatched = matched; + *nmatched = total_matched; return (0); } @@ -10766,13 +11179,24 @@ dtrace_enabling_matchall(void) mutex_enter(&dtrace_lock); /* - * Because we can be called after dtrace_detach() has been called, we - * cannot assert that there are retained enablings. We can safely - * load from dtrace_retained, however: the taskq_destroy() at the - * end of dtrace_detach() will block pending our completion. + * Iterate over all retained enablings to see if any probes match + * against them. We only perform this operation on enablings for which + * we have sufficient permissions by virtue of being in the global zone + * or in the same zone as the DTrace client. Because we can be called + * after dtrace_detach() has been called, we cannot assert that there + * are retained enablings. We can safely load from dtrace_retained, + * however: the taskq_destroy() at the end of dtrace_detach() will + * block pending our completion. */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) - (void) dtrace_enabling_match(enab, NULL); + for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { + dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred; + cred_t *cr = dcr->dcr_cred; + zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0; + + if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL && + (zone == GLOBAL_ZONEID || getzoneid() == zone))) + (void) dtrace_enabling_match(enab, NULL); + } mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); @@ -10830,6 +11254,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; + dtrace_genid_t gen; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&dtrace_provider_lock)); @@ -10840,15 +11265,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } do { - dtrace_enabling_t *enab = dtrace_retained; + dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; - for (; enab != NULL; enab = enab->dten_next) { +retry: + gen = dtrace_retained_gen; + for (enab = dtrace_retained; enab != NULL; + enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; mutex_exit(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); mutex_enter(&dtrace_lock); + /* + * Process the retained enablings again if + * they have changed while we weren't holding + * dtrace_lock. + */ + if (gen != dtrace_retained_gen) + goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); @@ -10859,6 +11294,85 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } /* + * Called to reap ECBs that are attached to probes from defunct providers. + */ +static void +dtrace_enabling_reap(void) +{ + dtrace_provider_t *prov; + dtrace_probe_t *probe; + dtrace_ecb_t *ecb; + hrtime_t when; + int i; + + mutex_enter(&cpu_lock); + mutex_enter(&dtrace_lock); + + for (i = 0; i < dtrace_nprobes; i++) { + if ((probe = dtrace_probes[i]) == NULL) + continue; + + if (probe->dtpr_ecb == NULL) + continue; + + prov = probe->dtpr_provider; + + if ((when = prov->dtpv_defunct) == 0) + continue; + + /* + * We have ECBs on a defunct provider: we want to reap these + * ECBs to allow the provider to unregister. The destruction + * of these ECBs must be done carefully: if we destroy the ECB + * and the consumer later wishes to consume an EPID that + * corresponds to the destroyed ECB (and if the EPID metadata + * has not been previously consumed), the consumer will abort + * processing on the unknown EPID. To reduce (but not, sadly, + * eliminate) the possibility of this, we will only destroy an + * ECB for a defunct provider if, for the state that + * corresponds to the ECB: + * + * (a) There is no speculative tracing (which can effectively + * cache an EPID for an arbitrary amount of time). + * + * (b) The principal buffers have been switched twice since the + * provider became defunct. + * + * (c) The aggregation buffers are of zero size or have been + * switched twice since the provider became defunct. + * + * We use dts_speculates to determine (a) and call a function + * (dtrace_buffer_consumed()) to determine (b) and (c). Note + * that as soon as we've been unable to destroy one of the ECBs + * associated with the probe, we quit trying -- reaping is only + * fruitful in as much as we can destroy all ECBs associated + * with the defunct provider's probes. + */ + while ((ecb = probe->dtpr_ecb) != NULL) { + dtrace_state_t *state = ecb->dte_state; + dtrace_buffer_t *buf = state->dts_buffer; + dtrace_buffer_t *aggbuf = state->dts_aggbuffer; + + if (state->dts_speculates) + break; + + if (!dtrace_buffer_consumed(buf, when)) + break; + + if (!dtrace_buffer_consumed(aggbuf, when)) + break; + + dtrace_ecb_disable(ecb); + ASSERT(probe->dtpr_ecb != ecb); + dtrace_ecb_destroy(ecb); + } + } + + mutex_exit(&dtrace_lock); + mutex_exit(&cpu_lock); +} + +/* * DTrace DOF Functions */ /*ARGSUSED*/ @@ -10970,7 +11484,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { + if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || + dof->dofh_loadsz != hdr.dofh_loadsz) { kmem_free(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); @@ -11362,15 +11877,20 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if (DTRACEACT_ISPRINTFLIKE(kind) && + if ((DTRACEACT_ISPRINTFLIKE(kind) && (kind != DTRACEACT_PRINTA || + desc->dofa_strtab != DOF_SECIDX_NONE)) || + (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * printf()-like actions must have a format string. + * The argument to these actions is an index into the + * DOF string table. For printf()-like actions, this + * is the format string. For print(), this is the + * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -11698,6 +12218,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } } + if (DOF_SEC_ISLOADABLE(sec->dofs_type) && + !(sec->dofs_flags & DOF_SECF_LOAD)) { + dtrace_dof_error(dof, "loadable section with load " + "flag unset"); + return (-1); + } + if (!(sec->dofs_flags & DOF_SECF_LOAD)) continue; /* just ignore non-loadable sections */ @@ -11849,7 +12376,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min; - if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL) + if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) return (ENOMEM); dstate->dtds_size = size; @@ -12211,7 +12738,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu; - int flags = 0, rval; + int flags = 0, rval, factor, divisor = 1; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); @@ -12241,7 +12768,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } - for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { + for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -12259,7 +12786,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) return (E2BIG); } - rval = dtrace_buffer_alloc(buf, size, flags, cpu); + rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); if (rval != ENOMEM) { opt[which] = size; @@ -12268,6 +12795,9 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) return (rval); + + for (divisor = 2; divisor < factor; divisor <<= 1) + continue; } return (ENOMEM); @@ -12367,7 +12897,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) goto out; } - spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP); + spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), + KM_NOSLEEP | KM_NORMALPRI); if (spec == NULL) { rval = ENOMEM; @@ -12378,7 +12909,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) state->dts_nspeculations = (int)nspec; for (i = 0; i < nspec; i++) { - if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) { + if ((buf = kmem_zalloc(bufsize, + KM_NOSLEEP | KM_NORMALPRI)) == NULL) { rval = ENOMEM; goto err; } @@ -14390,7 +14922,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) * If this wasn't an open with the "helper" minor, then it must be * the "dtrace" minor. */ - ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); + if (getminor(*devp) != DTRACEMNRN_DTRACE) + return (ENXIO); /* * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -14427,7 +14960,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) mutex_exit(&cpu_lock); if (state == NULL) { - if (--dtrace_opens == 0) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); mutex_exit(&dtrace_lock); return (EAGAIN); @@ -14463,7 +14996,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_destroy(state); ASSERT(dtrace_opens > 0); - if (--dtrace_opens == 0) + + /* + * Only relinquish control of the kernel debugger interface when there + * are no consumers and no anonymous enablings. + */ + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); mutex_exit(&dtrace_lock); @@ -15458,7 +15996,8 @@ static struct dev_ops dtrace_ops = { nodev, /* reset */ &dtrace_cb_ops, /* driver operations */ NULL, /* bus operations */ - nodev /* dev power */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ }; static struct modldrv modldrv = { |
