diff options
| author | Martin Matuska <mm@FreeBSD.org> | 2012-07-01 14:55:35 +0000 | 
|---|---|---|
| committer | Martin Matuska <mm@FreeBSD.org> | 2012-07-01 14:55:35 +0000 | 
| commit | 5a27a66a81254f7b8eadd92ceac8ff50132e971c (patch) | |
| tree | a7fdb0498552fa601ffec363d7a91a3095209b8e /uts/common/dtrace/dtrace.c | |
| parent | d661fdff24a3141883f5058d89a5f044c736ef56 (diff) | |
Notes
Diffstat (limited to 'uts/common/dtrace/dtrace.c')
| -rw-r--r-- | uts/common/dtrace/dtrace.c | 863 | 
1 files changed, 701 insertions, 162 deletions
diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index c721386280f8..0c5e4b3a011a 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -20,12 +20,10 @@   */  /* - * Copyright 2008 Sun Microsystems, Inc.  All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved.   */ -#pragma ident	"%Z%%M%	%I%	%E% SMI" -  /*   * DTrace - Dynamic Tracing for Solaris   * @@ -121,7 +119,7 @@ dtrace_optval_t	dtrace_dof_maxsize = (256 * 1024);  size_t		dtrace_global_maxsize = (16 * 1024);  size_t		dtrace_actions_max = (16 * 1024);  size_t		dtrace_retain_max = 1024; -dtrace_optval_t	dtrace_helper_actions_max = 32; +dtrace_optval_t	dtrace_helper_actions_max = 1024;  dtrace_optval_t	dtrace_helper_providers_max = 32;  dtrace_optval_t	dtrace_dstate_defsize = (1 * 1024 * 1024);  size_t		dtrace_strsize_default = 256; @@ -146,6 +144,7 @@ int		dtrace_err_verbose;  hrtime_t	dtrace_deadman_interval = NANOSEC;  hrtime_t	dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;  hrtime_t	dtrace_deadman_user = (hrtime_t)30 * NANOSEC; +hrtime_t	dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;  /*   * DTrace External Variables @@ -186,7 +185,9 @@ static dtrace_ecb_t	*dtrace_ecb_create_cache; /* cached created ECB */  static dtrace_genid_t	dtrace_probegen;	/* current probe generation */  static dtrace_helpers_t *dtrace_deferred_pid;	/* deferred helper list */  static dtrace_enabling_t *dtrace_retained;	/* list of retained enablings */ +static dtrace_genid_t	dtrace_retained_gen;	/* current retained enab gen */  static dtrace_dynvar_t	dtrace_dynhash_sink;	/* end of dynamic hash chains */ +static int		dtrace_dynvar_failclean; /* dynvars failed to clean */  /*   * DTrace Locking @@ -240,10 +241,16 @@ static void  dtrace_nullop(void)  {} +static int +dtrace_enable_nullop(void) +{ +	return (0); +} +  static dtrace_pops_t	dtrace_provider_ops = {  	(void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,  	(void (*)(void *, struct modctl *))dtrace_nullop, -	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop, +	(int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,  	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,  	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,  	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop, @@ -427,6 +434,7 @@ dtrace_load##bits(uintptr_t addr)					\  #define	DTRACE_DYNHASH_SINK	1  #define	DTRACE_DYNHASH_VALID	2 +#define	DTRACE_MATCH_FAIL	-1  #define	DTRACE_MATCH_NEXT	0  #define	DTRACE_MATCH_DONE	1  #define	DTRACE_ANCHORED(probe)	((probe)->dtpr_func[0] != '\0') @@ -453,11 +461,13 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);  static void dtrace_enabling_provide(dtrace_provider_t *);  static int dtrace_enabling_match(dtrace_enabling_t *, int *);  static void dtrace_enabling_matchall(void); +static void dtrace_enabling_reap(void);  static dtrace_state_t *dtrace_anon_grab(void);  static uint64_t dtrace_helper(int, dtrace_mstate_t *,      dtrace_state_t *, uint64_t, uint64_t);  static dtrace_helpers_t *dtrace_helpers_create(proc_t *);  static void dtrace_buffer_drop(dtrace_buffer_t *); +static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);  static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,      dtrace_state_t *, dtrace_mstate_t *);  static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, @@ -1098,10 +1108,13 @@ dtrace_priv_proc_common_nocd()  }  static int -dtrace_priv_proc_destructive(dtrace_state_t *state) +dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)  {  	int action = state->dts_cred.dcr_action; +	if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) +		goto bad; +  	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&  	    dtrace_priv_proc_common_zone(state) == 0)  		goto bad; @@ -1123,15 +1136,17 @@ bad:  }  static int -dtrace_priv_proc_control(dtrace_state_t *state) +dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)  { -	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) -		return (1); +	if (mstate->dtms_access & DTRACE_ACCESS_PROC) { +		if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) +			return (1); -	if (dtrace_priv_proc_common_zone(state) && -	    dtrace_priv_proc_common_user(state) && -	    dtrace_priv_proc_common_nocd()) -		return (1); +		if (dtrace_priv_proc_common_zone(state) && +		    dtrace_priv_proc_common_user(state) && +		    dtrace_priv_proc_common_nocd()) +			return (1); +	}  	cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1139,9 +1154,10 @@ dtrace_priv_proc_control(dtrace_state_t *state)  }  static int -dtrace_priv_proc(dtrace_state_t *state) +dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)  { -	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) +	if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && +	    (state->dts_cred.dcr_action & DTRACE_CRA_PROC))  		return (1);  	cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1172,6 +1188,109 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state)  }  /* + * Determine if the dte_cond of the specified ECB allows for processing of + * the current probe to continue.  Note that this routine may allow continued + * processing, but with access(es) stripped from the mstate's dtms_access + * field. + */ +static int +dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, +    dtrace_ecb_t *ecb) +{ +	dtrace_probe_t *probe = ecb->dte_probe; +	dtrace_provider_t *prov = probe->dtpr_provider; +	dtrace_pops_t *pops = &prov->dtpv_pops; +	int mode = DTRACE_MODE_NOPRIV_DROP; + +	ASSERT(ecb->dte_cond); + +	if (pops->dtps_mode != NULL) { +		mode = pops->dtps_mode(prov->dtpv_arg, +		    probe->dtpr_id, probe->dtpr_arg); + +		ASSERT((mode & DTRACE_MODE_USER) || +		    (mode & DTRACE_MODE_KERNEL)); +		ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || +		    (mode & DTRACE_MODE_NOPRIV_DROP)); +	} + +	/* +	 * If the dte_cond bits indicate that this consumer is only allowed to +	 * see user-mode firings of this probe, call the provider's dtps_mode() +	 * entry point to check that the probe was fired while in a user +	 * context.  If that's not the case, use the policy specified by the +	 * provider to determine if we drop the probe or merely restrict +	 * operation. +	 */ +	if (ecb->dte_cond & DTRACE_COND_USERMODE) { +		ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); + +		if (!(mode & DTRACE_MODE_USER)) { +			if (mode & DTRACE_MODE_NOPRIV_DROP) +				return (0); + +			mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; +		} +	} + +	/* +	 * This is more subtle than it looks. We have to be absolutely certain +	 * that CRED() isn't going to change out from under us so it's only +	 * legit to examine that structure if we're in constrained situations. +	 * Currently, the only times we'll this check is if a non-super-user +	 * has enabled the profile or syscall providers -- providers that +	 * allow visibility of all processes. For the profile case, the check +	 * above will ensure that we're examining a user context. +	 */ +	if (ecb->dte_cond & DTRACE_COND_OWNER) { +		cred_t *cr; +		cred_t *s_cr = state->dts_cred.dcr_cred; +		proc_t *proc; + +		ASSERT(s_cr != NULL); + +		if ((cr = CRED()) == NULL || +		    s_cr->cr_uid != cr->cr_uid || +		    s_cr->cr_uid != cr->cr_ruid || +		    s_cr->cr_uid != cr->cr_suid || +		    s_cr->cr_gid != cr->cr_gid || +		    s_cr->cr_gid != cr->cr_rgid || +		    s_cr->cr_gid != cr->cr_sgid || +		    (proc = ttoproc(curthread)) == NULL || +		    (proc->p_flag & SNOCD)) { +			if (mode & DTRACE_MODE_NOPRIV_DROP) +				return (0); + +			mstate->dtms_access &= ~DTRACE_ACCESS_PROC; +		} +	} + +	/* +	 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not +	 * in our zone, check to see if our mode policy is to restrict rather +	 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC +	 * and DTRACE_ACCESS_ARGS +	 */ +	if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { +		cred_t *cr; +		cred_t *s_cr = state->dts_cred.dcr_cred; + +		ASSERT(s_cr != NULL); + +		if ((cr = CRED()) == NULL || +		    s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { +			if (mode & DTRACE_MODE_NOPRIV_DROP) +				return (0); + +			mstate->dtms_access &= +			    ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); +		} +	} + +	return (1); +} + +/*   * Note:  not called from probe context.  This function is called   * asynchronously (and at a regular interval) from outside of probe context to   * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable @@ -1182,12 +1301,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)  {  	dtrace_dynvar_t *dirty;  	dtrace_dstate_percpu_t *dcpu; -	int i, work = 0; +	dtrace_dynvar_t **rinsep; +	int i, j, work = 0;  	for (i = 0; i < NCPU; i++) {  		dcpu = &dstate->dtds_percpu[i]; - -		ASSERT(dcpu->dtdsc_rinsing == NULL); +		rinsep = &dcpu->dtdsc_rinsing;  		/*  		 * If the dirty list is NULL, there is no dirty work to do. @@ -1195,14 +1314,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)  		if (dcpu->dtdsc_dirty == NULL)  			continue; -		/* -		 * If the clean list is non-NULL, then we're not going to do -		 * any work for this CPU -- it means that there has not been -		 * a dtrace_dynvar() allocation on this CPU (or from this CPU) -		 * since the last time we cleaned house. -		 */ -		if (dcpu->dtdsc_clean != NULL) +		if (dcpu->dtdsc_rinsing != NULL) { +			/* +			 * If the rinsing list is non-NULL, then it is because +			 * this CPU was selected to accept another CPU's +			 * dirty list -- and since that time, dirty buffers +			 * have accumulated.  This is a highly unlikely +			 * condition, but we choose to ignore the dirty +			 * buffers -- they'll be picked up a future cleanse. +			 */  			continue; +		} + +		if (dcpu->dtdsc_clean != NULL) { +			/* +			 * If the clean list is non-NULL, then we're in a +			 * situation where a CPU has done deallocations (we +			 * have a non-NULL dirty list) but no allocations (we +			 * also have a non-NULL clean list).  We can't simply +			 * move the dirty list into the clean list on this +			 * CPU, yet we also don't want to allow this condition +			 * to persist, lest a short clean list prevent a +			 * massive dirty list from being cleaned (which in +			 * turn could lead to otherwise avoidable dynamic +			 * drops).  To deal with this, we look for some CPU +			 * with a NULL clean list, NULL dirty list, and NULL +			 * rinsing list -- and then we borrow this CPU to +			 * rinse our dirty list. +			 */ +			for (j = 0; j < NCPU; j++) { +				dtrace_dstate_percpu_t *rinser; + +				rinser = &dstate->dtds_percpu[j]; + +				if (rinser->dtdsc_rinsing != NULL) +					continue; + +				if (rinser->dtdsc_dirty != NULL) +					continue; + +				if (rinser->dtdsc_clean != NULL) +					continue; + +				rinsep = &rinser->dtdsc_rinsing; +				break; +			} + +			if (j == NCPU) { +				/* +				 * We were unable to find another CPU that +				 * could accept this dirty list -- we are +				 * therefore unable to clean it now. +				 */ +				dtrace_dynvar_failclean++; +				continue; +			} +		}  		work = 1; @@ -1219,7 +1386,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)  			 * on a hash chain, either the dirty list or the  			 * rinsing list for some CPU must be non-NULL.)  			 */ -			dcpu->dtdsc_rinsing = dirty; +			*rinsep = dirty;  			dtrace_membar_producer();  		} while (dtrace_casptr(&dcpu->dtdsc_dirty,  		    dirty, NULL) != dirty); @@ -1650,7 +1817,7 @@ retry:  			ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);  			/* -			 * Now we'll move the clean list to the free list. +			 * Now we'll move the clean list to our free list.  			 * It's impossible for this to fail:  the only way  			 * the free list can be updated is through this  			 * code path, and only one CPU can own the clean list. @@ -1663,6 +1830,7 @@ retry:  			 * owners of the clean lists out before resetting  			 * the clean lists.  			 */ +			dcpu = &dstate->dtds_percpu[me];  			rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);  			ASSERT(rval == NULL);  			goto retry; @@ -1804,6 +1972,75 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)  	lquanta[levels + 1] += incr;  } +static int +dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, +    uint16_t high, uint16_t nsteps, int64_t value) +{ +	int64_t this = 1, last, next; +	int base = 1, order; + +	ASSERT(factor <= nsteps); +	ASSERT(nsteps % factor == 0); + +	for (order = 0; order < low; order++) +		this *= factor; + +	/* +	 * If our value is less than our factor taken to the power of the +	 * low order of magnitude, it goes into the zeroth bucket. +	 */ +	if (value < (last = this)) +		return (0); + +	for (this *= factor; order <= high; order++) { +		int nbuckets = this > nsteps ? nsteps : this; + +		if ((next = this * factor) < this) { +			/* +			 * We should not generally get log/linear quantizations +			 * with a high magnitude that allows 64-bits to +			 * overflow, but we nonetheless protect against this +			 * by explicitly checking for overflow, and clamping +			 * our value accordingly. +			 */ +			value = this - 1; +		} + +		if (value < this) { +			/* +			 * If our value lies within this order of magnitude, +			 * determine its position by taking the offset within +			 * the order of magnitude, dividing by the bucket +			 * width, and adding to our (accumulated) base. +			 */ +			return (base + (value - last) / (this / nbuckets)); +		} + +		base += nbuckets - (nbuckets / factor); +		last = this; +		this = next; +	} + +	/* +	 * Our value is greater than or equal to our factor taken to the +	 * power of one plus the high magnitude -- return the top bucket. +	 */ +	return (base); +} + +static void +dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) +{ +	uint64_t arg = *llquanta++; +	uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); +	uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); +	uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); +	uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + +	llquanta[dtrace_aggregate_llquantize_bucket(factor, +	    low, high, nsteps, nval)] += incr; +} +  /*ARGSUSED*/  static void  dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) @@ -2585,6 +2822,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  	switch (v) {  	case DIF_VAR_ARGS: +		if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) { +			cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= +			    CPU_DTRACE_KPRIV; +			return (0); +		} +  		ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);  		if (ndx >= sizeof (mstate->dtms_arg) /  		    sizeof (mstate->dtms_arg[0])) { @@ -2620,7 +2863,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  	case DIF_VAR_UREGS: {  		klwp_t *lwp; -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		if ((lwp = curthread->t_lwp) == NULL) { @@ -2632,6 +2875,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		return (dtrace_getreg(lwp->lwp_regs, ndx));  	} +	case DIF_VAR_VMREGS: { +		uint64_t rval; + +		if (!dtrace_priv_kernel(state)) +			return (0); + +		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + +		rval = dtrace_getvmreg(ndx, +		    &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); + +		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + +		return (rval); +	} +  	case DIF_VAR_CURTHREAD:  		if (!dtrace_priv_kernel(state))  			return (0); @@ -2684,7 +2943,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		return (mstate->dtms_stackdepth);  	case DIF_VAR_USTACKDEPTH: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {  			/* @@ -2739,7 +2998,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		return (mstate->dtms_caller);  	case DIF_VAR_UCALLER: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { @@ -2787,7 +3046,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		    state, mstate));  	case DIF_VAR_PID: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -2809,7 +3068,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		return ((uint64_t)curthread->t_procp->p_pidp->pid_id);  	case DIF_VAR_PPID: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -2836,7 +3095,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		return ((uint64_t)curthread->t_tid);  	case DIF_VAR_EXECNAME: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -2856,7 +3115,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		    state, mstate));  	case DIF_VAR_ZONENAME: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -2876,7 +3135,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		    state, mstate));  	case DIF_VAR_UID: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -2897,7 +3156,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  		return ((uint64_t)curthread->t_procp->p_cred->cr_uid);  	case DIF_VAR_GID: -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -2919,7 +3178,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,  	case DIF_VAR_ERRNO: {  		klwp_t *lwp; -		if (!dtrace_priv_proc(state)) +		if (!dtrace_priv_proc(state, mstate))  			return (0);  		/* @@ -3259,7 +3518,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,  		uint64_t size = tupregs[2].dttk_value;  		if (!dtrace_destructive_disallow && -		    dtrace_priv_proc_control(state) && +		    dtrace_priv_proc_control(state, mstate) &&  		    !dtrace_istoxic(kaddr, size)) {  			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);  			dtrace_copyout(kaddr, uaddr, size, flags); @@ -3274,7 +3533,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,  		uint64_t size = tupregs[2].dttk_value;  		if (!dtrace_destructive_disallow && -		    dtrace_priv_proc_control(state) && +		    dtrace_priv_proc_control(state, mstate) &&  		    !dtrace_istoxic(kaddr, size)) {  			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);  			dtrace_copyoutstr(kaddr, uaddr, size, flags); @@ -3600,7 +3859,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,  		int64_t index = (int64_t)tupregs[1].dttk_value;  		int64_t remaining = (int64_t)tupregs[2].dttk_value;  		size_t len = dtrace_strlen((char *)s, size); -		int64_t i = 0; +		int64_t i;  		if (!dtrace_canload(s, len + 1, mstate, vstate)) {  			regs[rd] = NULL; @@ -3645,7 +3904,54 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,  		break;  	} -	case DIF_SUBR_GETMAJOR: +	case DIF_SUBR_TOUPPER: +	case DIF_SUBR_TOLOWER: { +		uintptr_t s = tupregs[0].dttk_value; +		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; +		char *dest = (char *)mstate->dtms_scratch_ptr, c; +		size_t len = dtrace_strlen((char *)s, size); +		char lower, upper, convert; +		int64_t i; + +		if (subr == DIF_SUBR_TOUPPER) { +			lower = 'a'; +			upper = 'z'; +			convert = 'A'; +		} else { +			lower = 'A'; +			upper = 'Z'; +			convert = 'a'; +		} + +		if (!dtrace_canload(s, len + 1, mstate, vstate)) { +			regs[rd] = NULL; +			break; +		} + +		if (!DTRACE_INSCRATCH(mstate, size)) { +			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); +			regs[rd] = NULL; +			break; +		} + +		for (i = 0; i < size - 1; i++) { +			if ((c = dtrace_load8(s + i)) == '\0') +				break; + +			if (c >= lower && c <= upper) +				c = convert + (c - lower); + +			dest[i] = c; +		} + +		ASSERT(i < size); +		dest[i] = '\0'; +		regs[rd] = (uintptr_t)dest; +		mstate->dtms_scratch_ptr += size; +		break; +	} + +case DIF_SUBR_GETMAJOR:  #ifdef _LP64  		regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;  #else @@ -3907,9 +4213,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,  	case DIF_SUBR_LLTOSTR: {  		int64_t i = (int64_t)tupregs[0].dttk_value; -		int64_t val = i < 0 ? i * -1 : i; -		uint64_t size = 22;	/* enough room for 2^64 in decimal */ +		uint64_t val, digit; +		uint64_t size = 65;	/* enough room for 2^64 in binary */  		char *end = (char *)mstate->dtms_scratch_ptr + size - 1; +		int base = 10; + +		if (nargs > 1) { +			if ((base = tupregs[1].dttk_value) <= 1 || +			    base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { +				*flags |= CPU_DTRACE_ILLOP; +				break; +			} +		} + +		val = (base == 10 && i < 0) ? i * -1 : i;  		if (!DTRACE_INSCRATCH(mstate, size)) {  			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -3917,13 +4234,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,  			break;  		} -		for (*end-- = '\0'; val; val /= 10) -			*end-- = '0' + (val % 10); +		for (*end-- = '\0'; val; val /= base) { +			if ((digit = val % base) <= '9' - '0') { +				*end-- = '0' + digit; +			} else { +				*end-- = 'a' + (digit - ('9' - '0') - 1); +			} +		} + +		if (i == 0 && base == 16) +			*end-- = '0'; + +		if (base == 16) +			*end-- = 'x'; -		if (i == 0) +		if (i == 0 || base == 8 || base == 16)  			*end-- = '0'; -		if (i < 0) +		if (i < 0 && base == 10)  			*end-- = '-';  		regs[rd] = (uintptr_t)end + 1; @@ -5558,6 +5886,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  		dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];  		dtrace_vstate_t *vstate = &state->dts_vstate;  		dtrace_provider_t *prov = probe->dtpr_provider; +		uint64_t tracememsize = 0;  		int committed = 0;  		caddr_t tomax; @@ -5578,6 +5907,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  #endif  		mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; +		mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;  		*flags &= ~CPU_DTRACE_ERROR;  		if (prov == dtrace_provider) { @@ -5615,65 +5945,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  			}  		} -		if (ecb->dte_cond) { -			/* -			 * If the dte_cond bits indicate that this -			 * consumer is only allowed to see user-mode firings -			 * of this probe, call the provider's dtps_usermode() -			 * entry point to check that the probe was fired -			 * while in a user context. Skip this ECB if that's -			 * not the case. -			 */ -			if ((ecb->dte_cond & DTRACE_COND_USERMODE) && -			    prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, -			    probe->dtpr_id, probe->dtpr_arg) == 0) -				continue; - -			/* -			 * This is more subtle than it looks. We have to be -			 * absolutely certain that CRED() isn't going to -			 * change out from under us so it's only legit to -			 * examine that structure if we're in constrained -			 * situations. Currently, the only times we'll this -			 * check is if a non-super-user has enabled the -			 * profile or syscall providers -- providers that -			 * allow visibility of all processes. For the -			 * profile case, the check above will ensure that -			 * we're examining a user context. -			 */ -			if (ecb->dte_cond & DTRACE_COND_OWNER) { -				cred_t *cr; -				cred_t *s_cr = -				    ecb->dte_state->dts_cred.dcr_cred; -				proc_t *proc; - -				ASSERT(s_cr != NULL); - -				if ((cr = CRED()) == NULL || -				    s_cr->cr_uid != cr->cr_uid || -				    s_cr->cr_uid != cr->cr_ruid || -				    s_cr->cr_uid != cr->cr_suid || -				    s_cr->cr_gid != cr->cr_gid || -				    s_cr->cr_gid != cr->cr_rgid || -				    s_cr->cr_gid != cr->cr_sgid || -				    (proc = ttoproc(curthread)) == NULL || -				    (proc->p_flag & SNOCD)) -					continue; -			} - -			if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { -				cred_t *cr; -				cred_t *s_cr = -				    ecb->dte_state->dts_cred.dcr_cred; - -				ASSERT(s_cr != NULL); - -				if ((cr = CRED()) == NULL || -				    s_cr->cr_zone->zone_id != -				    cr->cr_zone->zone_id) -					continue; -			} -		} +		if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb)) +			continue;  		if (now - state->dts_alive > dtrace_deadman_timeout) {  			/* @@ -5713,9 +5986,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  		mstate.dtms_present |= DTRACE_MSTATE_EPID;  		if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) -			mstate.dtms_access = DTRACE_ACCESS_KERNEL; -		else -			mstate.dtms_access = 0; +			mstate.dtms_access |= DTRACE_ACCESS_KERNEL;  		if (pred != NULL) {  			dtrace_difo_t *dp = pred->dtp_difo; @@ -5775,7 +6046,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  			switch (act->dta_kind) {  			case DTRACEACT_STOP: -				if (dtrace_priv_proc_destructive(state)) +				if (dtrace_priv_proc_destructive(state, +				    &mstate))  					dtrace_action_stop();  				continue; @@ -5802,7 +6074,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  			case DTRACEACT_JSTACK:  			case DTRACEACT_USTACK: -				if (!dtrace_priv_proc(state)) +				if (!dtrace_priv_proc(state, &mstate))  					continue;  				/* @@ -5835,6 +6107,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  					continue;  				} +				/* +				 * Clear the string space, since there's no +				 * helper to do it for us. +				 */ +				if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) { +					int depth = DTRACE_USTACK_NFRAMES( +					    rec->dtrd_arg); +					size_t strsize = DTRACE_USTACK_STRSIZE( +					    rec->dtrd_arg); +					uint64_t *buf = (uint64_t *)(tomax + +					    valoffs); +					void *strspace = &buf[depth + 1]; + +					dtrace_bzero(strspace, +					    MIN(depth, strsize)); +				} +  				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);  				dtrace_getupcstack((uint64_t *)  				    (tomax + valoffs), @@ -5888,7 +6177,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  				continue;  			case DTRACEACT_RAISE: -				if (dtrace_priv_proc_destructive(state)) +				if (dtrace_priv_proc_destructive(state, +				    &mstate))  					dtrace_action_raise(val);  				continue; @@ -5915,6 +6205,11 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  			case DTRACEACT_PRINTA:  			case DTRACEACT_SYSTEM:  			case DTRACEACT_FREOPEN: +			case DTRACEACT_TRACEMEM: +				break; + +			case DTRACEACT_TRACEMEM_DYNSIZE: +				tracememsize = val;  				break;  			case DTRACEACT_SYM: @@ -5928,7 +6223,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  			case DTRACEACT_UADDR: {  				struct pid *pid = curthread->t_procp->p_pidp; -				if (!dtrace_priv_proc(state)) +				if (!dtrace_priv_proc(state, &mstate))  					continue;  				DTRACE_STORE(uint64_t, tomax, @@ -5980,6 +6275,12 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,  			if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {  				uintptr_t end = valoffs + size; +				if (tracememsize != 0 && +				    valoffs + tracememsize < end) { +					end = valoffs + tracememsize; +					tracememsize = 0; +				} +  				if (!dtrace_vcanload((void *)(uintptr_t)val,  				    &dp->dtdo_rtype, &mstate, vstate))  					continue; @@ -6655,7 +6956,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,  {  	dtrace_probe_t template, *probe;  	dtrace_hash_t *hash = NULL; -	int len, best = INT_MAX, nmatched = 0; +	int len, rc, best = INT_MAX, nmatched = 0;  	dtrace_id_t i;  	ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -6667,7 +6968,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,  	if (pkp->dtpk_id != DTRACE_IDNONE) {  		if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&  		    dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { -			(void) (*matched)(probe, arg); +			if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) +				return (DTRACE_MATCH_FAIL);  			nmatched++;  		}  		return (nmatched); @@ -6714,8 +7016,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,  			nmatched++; -			if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) +			if ((rc = (*matched)(probe, arg)) != +			    DTRACE_MATCH_NEXT) { +				if (rc == DTRACE_MATCH_FAIL) +					return (DTRACE_MATCH_FAIL);  				break; +			}  		}  		return (nmatched); @@ -6734,8 +7040,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,  		nmatched++; -		if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) +		if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { +			if (rc == DTRACE_MATCH_FAIL) +				return (DTRACE_MATCH_FAIL);  			break; +		}  	}  	return (nmatched); @@ -6852,9 +7161,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,  	if ((priv & DTRACE_PRIV_KERNEL) &&  	    (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && -	    pops->dtps_usermode == NULL) { +	    pops->dtps_mode == NULL) {  		cmn_err(CE_WARN, "failed to register provider '%s': need " -		    "dtps_usermode() op for given privilege attributes", name); +		    "dtps_mode() op for given privilege attributes", name);  		return (EINVAL);  	} @@ -6951,11 +7260,11 @@ dtrace_unregister(dtrace_provider_id_t id)  {  	dtrace_provider_t *old = (dtrace_provider_t *)id;  	dtrace_provider_t *prev = NULL; -	int i, self = 0; +	int i, self = 0, noreap = 0;  	dtrace_probe_t *probe, *first = NULL;  	if (old->dtpv_pops.dtps_enable == -	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { +	    (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {  		/*  		 * If DTrace itself is the provider, we're called with locks  		 * already held. @@ -7008,14 +7317,31 @@ dtrace_unregister(dtrace_provider_id_t id)  			continue;  		/* -		 * We have at least one ECB; we can't remove this provider. +		 * If we are trying to unregister a defunct provider, and the +		 * provider was made defunct within the interval dictated by +		 * dtrace_unregister_defunct_reap, we'll (asynchronously) +		 * attempt to reap our enablings.  To denote that the provider +		 * should reattempt to unregister itself at some point in the +		 * future, we will return a differentiable error code (EAGAIN +		 * instead of EBUSY) in this case.  		 */ +		if (dtrace_gethrtime() - old->dtpv_defunct > +		    dtrace_unregister_defunct_reap) +			noreap = 1; +  		if (!self) {  			mutex_exit(&dtrace_lock);  			mutex_exit(&mod_lock);  			mutex_exit(&dtrace_provider_lock);  		} -		return (EBUSY); + +		if (noreap) +			return (EBUSY); + +		(void) taskq_dispatch(dtrace_taskq, +		    (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); + +		return (EAGAIN);  	}  	/* @@ -7101,12 +7427,12 @@ dtrace_invalidate(dtrace_provider_id_t id)  	dtrace_provider_t *pvp = (dtrace_provider_t *)id;  	ASSERT(pvp->dtpv_pops.dtps_enable != -	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); +	    (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);  	mutex_enter(&dtrace_provider_lock);  	mutex_enter(&dtrace_lock); -	pvp->dtpv_defunct = 1; +	pvp->dtpv_defunct = dtrace_gethrtime();  	mutex_exit(&dtrace_lock);  	mutex_exit(&dtrace_provider_lock); @@ -7142,7 +7468,7 @@ dtrace_condense(dtrace_provider_id_t id)  	 * Make sure this isn't the dtrace provider itself.  	 */  	ASSERT(prov->dtpv_pops.dtps_enable != -	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); +	    (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);  	mutex_enter(&dtrace_provider_lock);  	mutex_enter(&dtrace_lock); @@ -8103,7 +8429,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,  			break;  		default: -			err += efunc(dp->dtdo_len - 1, "bad return size"); +			err += efunc(dp->dtdo_len - 1, "bad return size\n");  		}  	} @@ -9096,7 +9422,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)  	return (ecb);  } -static void +static int  dtrace_ecb_enable(dtrace_ecb_t *ecb)  {  	dtrace_probe_t *probe = ecb->dte_probe; @@ -9109,7 +9435,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)  		/*  		 * This is the NULL probe -- there's nothing to do.  		 */ -		return; +		return (0);  	}  	if (probe->dtpr_ecb == NULL) { @@ -9123,8 +9449,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)  		if (ecb->dte_predicate != NULL)  			probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; -		prov->dtpv_pops.dtps_enable(prov->dtpv_arg, -		    probe->dtpr_id, probe->dtpr_arg); +		return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, +		    probe->dtpr_id, probe->dtpr_arg));  	} else {  		/*  		 * This probe is already active.  Swing the last pointer to @@ -9137,6 +9463,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)  		probe->dtpr_predcache = 0;  		dtrace_sync(); +		return (0);  	}  } @@ -9312,6 +9639,35 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)  		break;  	} +	case DTRACEAGG_LLQUANTIZE: { +		uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); +		uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); +		uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); +		uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); +		int64_t v; + +		agg->dtag_initial = desc->dtad_arg; +		agg->dtag_aggregate = dtrace_aggregate_llquantize; + +		if (factor < 2 || low >= high || nsteps < factor) +			goto err; + +		/* +		 * Now check that the number of steps evenly divides a power +		 * of the factor.  (This assures both integer bucket size and +		 * linearity within each magnitude.) +		 */ +		for (v = factor; v < nsteps; v *= factor) +			continue; + +		if ((v % nsteps) || (nsteps % factor)) +			goto err; + +		size = (dtrace_aggregate_llquantize_bucket(factor, +		    low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); +		break; +	} +  	case DTRACEAGG_AVG:  		agg->dtag_aggregate = dtrace_aggregate_avg;  		size = sizeof (uint64_t) * 2; @@ -9481,12 +9837,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)  		case DTRACEACT_PRINTA:  		case DTRACEACT_SYSTEM:  		case DTRACEACT_FREOPEN: +		case DTRACEACT_DIFEXPR:  			/*  			 * We know that our arg is a string -- turn it into a  			 * format.  			 */  			if (arg == NULL) { -				ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); +				ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || +				    desc->dtad_kind == DTRACEACT_DIFEXPR);  				format = 0;  			} else {  				ASSERT(arg != NULL); @@ -9497,7 +9855,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)  			/*FALLTHROUGH*/  		case DTRACEACT_LIBACT: -		case DTRACEACT_DIFEXPR: +		case DTRACEACT_TRACEMEM: +		case DTRACEACT_TRACEMEM_DYNSIZE:  			if (dp == NULL)  				return (EINVAL); @@ -9920,7 +10279,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)  	if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)  		return (DTRACE_MATCH_DONE); -	dtrace_ecb_enable(ecb); +	if (dtrace_ecb_enable(ecb) < 0) +		return (DTRACE_MATCH_FAIL); +  	return (DTRACE_MATCH_NEXT);  } @@ -9978,6 +10339,7 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)  	caddr_t tomax = buf->dtb_tomax;  	caddr_t xamot = buf->dtb_xamot;  	dtrace_icookie_t cookie; +	hrtime_t now = dtrace_gethrtime();  	ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));  	ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); @@ -9993,6 +10355,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)  	buf->dtb_drops = 0;  	buf->dtb_errors = 0;  	buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); +	buf->dtb_interval = now - buf->dtb_switched; +	buf->dtb_switched = now;  	dtrace_interrupt_enable(cookie);  } @@ -10025,14 +10389,17 @@ dtrace_buffer_activate(dtrace_state_t *state)  static int  dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, -    processorid_t cpu) +    processorid_t cpu, int *factor)  {  	cpu_t *cp;  	dtrace_buffer_t *buf; +	int allocated = 0, desired = 0;  	ASSERT(MUTEX_HELD(&cpu_lock));  	ASSERT(MUTEX_HELD(&dtrace_lock)); +	*factor = 1; +  	if (size > dtrace_nonroot_maxsize &&  	    !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))  		return (EFBIG); @@ -10057,7 +10424,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,  		ASSERT(buf->dtb_xamot == NULL); -		if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) +		if ((buf->dtb_tomax = kmem_zalloc(size, +		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)  			goto err;  		buf->dtb_size = size; @@ -10068,7 +10436,8 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,  		if (flags & DTRACEBUF_NOSWITCH)  			continue; -		if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) +		if ((buf->dtb_xamot = kmem_zalloc(size, +		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)  			goto err;  	} while ((cp = cp->cpu_next) != cpu_list); @@ -10082,16 +10451,19 @@ err:  			continue;  		buf = &bufs[cp->cpu_id]; +		desired += 2;  		if (buf->dtb_xamot != NULL) {  			ASSERT(buf->dtb_tomax != NULL);  			ASSERT(buf->dtb_size == size);  			kmem_free(buf->dtb_xamot, size); +			allocated++;  		}  		if (buf->dtb_tomax != NULL) {  			ASSERT(buf->dtb_size == size);  			kmem_free(buf->dtb_tomax, size); +			allocated++;  		}  		buf->dtb_tomax = NULL; @@ -10099,6 +10471,8 @@ err:  		buf->dtb_size = 0;  	} while ((cp = cp->cpu_next) != cpu_list); +	*factor = desired / (allocated > 0 ? allocated : 1); +  	return (ENOMEM);  } @@ -10400,6 +10774,36 @@ dtrace_buffer_polish(dtrace_buffer_t *buf)  	}  } +/* + * This routine determines if data generated at the specified time has likely + * been entirely consumed at user-level.  This routine is called to determine + * if an ECB on a defunct probe (but for an active enabling) can be safely + * disabled and destroyed. + */ +static int +dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) +{ +	int i; + +	for (i = 0; i < NCPU; i++) { +		dtrace_buffer_t *buf = &bufs[i]; + +		if (buf->dtb_size == 0) +			continue; + +		if (buf->dtb_flags & DTRACEBUF_RING) +			return (0); + +		if (!buf->dtb_switched && buf->dtb_offset != 0) +			return (0); + +		if (buf->dtb_switched - buf->dtb_interval < when) +			return (0); +	} + +	return (1); +} +  static void  dtrace_buffer_free(dtrace_buffer_t *bufs)  { @@ -10557,6 +10961,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab)  		ASSERT(enab->dten_vstate->dtvs_state != NULL);  		ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);  		enab->dten_vstate->dtvs_state->dts_nretained--; +		dtrace_retained_gen++;  	}  	if (enab->dten_prev == NULL) { @@ -10599,6 +11004,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab)  		return (ENOSPC);  	state->dts_nretained++; +	dtrace_retained_gen++;  	if (dtrace_retained == NULL) {  		dtrace_retained = enab; @@ -10713,7 +11119,7 @@ static int  dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)  {  	int i = 0; -	int matched = 0; +	int total_matched = 0, matched = 0;  	ASSERT(MUTEX_HELD(&cpu_lock));  	ASSERT(MUTEX_HELD(&dtrace_lock)); @@ -10724,7 +11130,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)  		enab->dten_current = ep;  		enab->dten_error = 0; -		matched += dtrace_probe_enable(&ep->dted_probe, enab); +		/* +		 * If a provider failed to enable a probe then get out and +		 * let the consumer know we failed. +		 */ +		if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) +			return (EBUSY); + +		total_matched += matched;  		if (enab->dten_error != 0) {  			/* @@ -10752,7 +11165,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)  	enab->dten_probegen = dtrace_probegen;  	if (nmatched != NULL) -		*nmatched = matched; +		*nmatched = total_matched;  	return (0);  } @@ -10766,13 +11179,24 @@ dtrace_enabling_matchall(void)  	mutex_enter(&dtrace_lock);  	/* -	 * Because we can be called after dtrace_detach() has been called, we -	 * cannot assert that there are retained enablings.  We can safely -	 * load from dtrace_retained, however:  the taskq_destroy() at the -	 * end of dtrace_detach() will block pending our completion. +	 * Iterate over all retained enablings to see if any probes match +	 * against them.  We only perform this operation on enablings for which +	 * we have sufficient permissions by virtue of being in the global zone +	 * or in the same zone as the DTrace client.  Because we can be called +	 * after dtrace_detach() has been called, we cannot assert that there +	 * are retained enablings.  We can safely load from dtrace_retained, +	 * however:  the taskq_destroy() at the end of dtrace_detach() will +	 * block pending our completion.  	 */ -	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) -		(void) dtrace_enabling_match(enab, NULL); +	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { +		dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred; +		cred_t *cr = dcr->dcr_cred; +		zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0; + +		if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL && +		    (zone == GLOBAL_ZONEID || getzoneid() == zone))) +			(void) dtrace_enabling_match(enab, NULL); +	}  	mutex_exit(&dtrace_lock);  	mutex_exit(&cpu_lock); @@ -10830,6 +11254,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv)  {  	int i, all = 0;  	dtrace_probedesc_t desc; +	dtrace_genid_t gen;  	ASSERT(MUTEX_HELD(&dtrace_lock));  	ASSERT(MUTEX_HELD(&dtrace_provider_lock)); @@ -10840,15 +11265,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv)  	}  	do { -		dtrace_enabling_t *enab = dtrace_retained; +		dtrace_enabling_t *enab;  		void *parg = prv->dtpv_arg; -		for (; enab != NULL; enab = enab->dten_next) { +retry: +		gen = dtrace_retained_gen; +		for (enab = dtrace_retained; enab != NULL; +		    enab = enab->dten_next) {  			for (i = 0; i < enab->dten_ndesc; i++) {  				desc = enab->dten_desc[i]->dted_probe;  				mutex_exit(&dtrace_lock);  				prv->dtpv_pops.dtps_provide(parg, &desc);  				mutex_enter(&dtrace_lock); +				/* +				 * Process the retained enablings again if +				 * they have changed while we weren't holding +				 * dtrace_lock. +				 */ +				if (gen != dtrace_retained_gen) +					goto retry;  			}  		}  	} while (all && (prv = prv->dtpv_next) != NULL); @@ -10859,6 +11294,85 @@ dtrace_enabling_provide(dtrace_provider_t *prv)  }  /* + * Called to reap ECBs that are attached to probes from defunct providers. + */ +static void +dtrace_enabling_reap(void) +{ +	dtrace_provider_t *prov; +	dtrace_probe_t *probe; +	dtrace_ecb_t *ecb; +	hrtime_t when; +	int i; + +	mutex_enter(&cpu_lock); +	mutex_enter(&dtrace_lock); + +	for (i = 0; i < dtrace_nprobes; i++) { +		if ((probe = dtrace_probes[i]) == NULL) +			continue; + +		if (probe->dtpr_ecb == NULL) +			continue; + +		prov = probe->dtpr_provider; + +		if ((when = prov->dtpv_defunct) == 0) +			continue; + +		/* +		 * We have ECBs on a defunct provider:  we want to reap these +		 * ECBs to allow the provider to unregister.  The destruction +		 * of these ECBs must be done carefully:  if we destroy the ECB +		 * and the consumer later wishes to consume an EPID that +		 * corresponds to the destroyed ECB (and if the EPID metadata +		 * has not been previously consumed), the consumer will abort +		 * processing on the unknown EPID.  To reduce (but not, sadly, +		 * eliminate) the possibility of this, we will only destroy an +		 * ECB for a defunct provider if, for the state that +		 * corresponds to the ECB: +		 * +		 *  (a)	There is no speculative tracing (which can effectively +		 *	cache an EPID for an arbitrary amount of time). +		 * +		 *  (b)	The principal buffers have been switched twice since the +		 *	provider became defunct. +		 * +		 *  (c)	The aggregation buffers are of zero size or have been +		 *	switched twice since the provider became defunct. +		 * +		 * We use dts_speculates to determine (a) and call a function +		 * (dtrace_buffer_consumed()) to determine (b) and (c).  Note +		 * that as soon as we've been unable to destroy one of the ECBs +		 * associated with the probe, we quit trying -- reaping is only +		 * fruitful in as much as we can destroy all ECBs associated +		 * with the defunct provider's probes. +		 */ +		while ((ecb = probe->dtpr_ecb) != NULL) { +			dtrace_state_t *state = ecb->dte_state; +			dtrace_buffer_t *buf = state->dts_buffer; +			dtrace_buffer_t *aggbuf = state->dts_aggbuffer; + +			if (state->dts_speculates) +				break; + +			if (!dtrace_buffer_consumed(buf, when)) +				break; + +			if (!dtrace_buffer_consumed(aggbuf, when)) +				break; + +			dtrace_ecb_disable(ecb); +			ASSERT(probe->dtpr_ecb != ecb); +			dtrace_ecb_destroy(ecb); +		} +	} + +	mutex_exit(&dtrace_lock); +	mutex_exit(&cpu_lock); +} + +/*   * DTrace DOF Functions   */  /*ARGSUSED*/ @@ -10970,7 +11484,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp)  	dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); -	if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { +	if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || +	    dof->dofh_loadsz != hdr.dofh_loadsz) {  		kmem_free(dof, hdr.dofh_loadsz);  		*errp = EFAULT;  		return (NULL); @@ -11362,15 +11877,20 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,  		    (uintptr_t)sec->dofs_offset + offs);  		kind = (dtrace_actkind_t)desc->dofa_kind; -		if (DTRACEACT_ISPRINTFLIKE(kind) && +		if ((DTRACEACT_ISPRINTFLIKE(kind) &&  		    (kind != DTRACEACT_PRINTA || +		    desc->dofa_strtab != DOF_SECIDX_NONE)) || +		    (kind == DTRACEACT_DIFEXPR &&  		    desc->dofa_strtab != DOF_SECIDX_NONE)) {  			dof_sec_t *strtab;  			char *str, *fmt;  			uint64_t i;  			/* -			 * printf()-like actions must have a format string. +			 * The argument to these actions is an index into the +			 * DOF string table.  For printf()-like actions, this +			 * is the format string.  For print(), this is the +			 * CTF type of the expression result.  			 */  			if ((strtab = dtrace_dof_sect(dof,  			    DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -11698,6 +12218,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,  			}  		} +		if (DOF_SEC_ISLOADABLE(sec->dofs_type) && +		    !(sec->dofs_flags & DOF_SECF_LOAD)) { +			dtrace_dof_error(dof, "loadable section with load " +			    "flag unset"); +			return (-1); +		} +  		if (!(sec->dofs_flags & DOF_SECF_LOAD))  			continue; /* just ignore non-loadable sections */ @@ -11849,7 +12376,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)  	if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))  		size = min; -	if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL) +	if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)  		return (ENOMEM);  	dstate->dtds_size = size; @@ -12211,7 +12738,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)  {  	dtrace_optval_t *opt = state->dts_options, size;  	processorid_t cpu; -	int flags = 0, rval; +	int flags = 0, rval, factor, divisor = 1;  	ASSERT(MUTEX_HELD(&dtrace_lock));  	ASSERT(MUTEX_HELD(&cpu_lock)); @@ -12241,7 +12768,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)  			flags |= DTRACEBUF_INACTIVE;  	} -	for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { +	for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {  		/*  		 * The size must be 8-byte aligned.  If the size is not 8-byte  		 * aligned, drop it down by the difference. @@ -12259,7 +12786,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)  			return (E2BIG);  		} -		rval = dtrace_buffer_alloc(buf, size, flags, cpu); +		rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);  		if (rval != ENOMEM) {  			opt[which] = size; @@ -12268,6 +12795,9 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)  		if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)  			return (rval); + +		for (divisor = 2; divisor < factor; divisor <<= 1) +			continue;  	}  	return (ENOMEM); @@ -12367,7 +12897,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)  		goto out;  	} -	spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP); +	spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), +	    KM_NOSLEEP | KM_NORMALPRI);  	if (spec == NULL) {  		rval = ENOMEM; @@ -12378,7 +12909,8 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)  	state->dts_nspeculations = (int)nspec;  	for (i = 0; i < nspec; i++) { -		if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) { +		if ((buf = kmem_zalloc(bufsize, +		    KM_NOSLEEP | KM_NORMALPRI)) == NULL) {  			rval = ENOMEM;  			goto err;  		} @@ -14390,7 +14922,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)  	 * If this wasn't an open with the "helper" minor, then it must be  	 * the "dtrace" minor.  	 */ -	ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); +	if (getminor(*devp) != DTRACEMNRN_DTRACE) +		return (ENXIO);  	/*  	 * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -14427,7 +14960,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)  	mutex_exit(&cpu_lock);  	if (state == NULL) { -		if (--dtrace_opens == 0) +		if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)  			(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);  		mutex_exit(&dtrace_lock);  		return (EAGAIN); @@ -14463,7 +14996,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)  	dtrace_state_destroy(state);  	ASSERT(dtrace_opens > 0); -	if (--dtrace_opens == 0) + +	/* +	 * Only relinquish control of the kernel debugger interface when there +	 * are no consumers and no anonymous enablings. +	 */ +	if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)  		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);  	mutex_exit(&dtrace_lock); @@ -15458,7 +15996,8 @@ static struct dev_ops dtrace_ops = {  	nodev,			/* reset */  	&dtrace_cb_ops,		/* driver operations */  	NULL,			/* bus operations */ -	nodev			/* dev power */ +	nodev,			/* dev power */ +	ddi_quiesce_not_needed,		/* quiesce */  };  static struct modldrv modldrv = {  | 
