diff options
| -rw-r--r-- | uts/intel/dtrace/fasttrap_isa.c | 1745 | ||||
| -rw-r--r-- | uts/sparc/dtrace/fasttrap_isa.c | 1597 | 
2 files changed, 3342 insertions, 0 deletions
| diff --git a/uts/intel/dtrace/fasttrap_isa.c b/uts/intel/dtrace/fasttrap_isa.c new file mode 100644 index 000000000000..1b93869a7358 --- /dev/null +++ b/uts/intel/dtrace/fasttrap_isa.c @@ -0,0 +1,1745 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc.  All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident	"%Z%%M%	%I%	%E% SMI" + +#include <sys/fasttrap_isa.h> +#include <sys/fasttrap_impl.h> +#include <sys/dtrace.h> +#include <sys/dtrace_impl.h> +#include <sys/cmn_err.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/segments.h> +#include <sys/x86_archext.h> +#include <sys/sysmacros.h> +#include <sys/trap.h> +#include <sys/archsystm.h> + +/* + * Lossless User-Land Tracing on x86 + * --------------------------------- + * + * The execution of most instructions is not dependent on the address; for + * these instructions it is sufficient to copy them into the user process's + * address space and execute them. To effectively single-step an instruction + * in user-land, we copy out the following sequence of instructions to scratch + * space in the user thread's ulwp_t structure. + * + * We then set the program counter (%eip or %rip) to point to this scratch + * space. Once execution resumes, the original instruction is executed and + * then control flow is redirected to what was originally the subsequent + * instruction. If the kernel attemps to deliver a signal while single- + * stepping, the signal is deferred and the program counter is moved into the + * second sequence of instructions. The second sequence ends in a trap into + * the kernel where the deferred signal is then properly handled and delivered. + * + * For instructions whose execute is position dependent, we perform simple + * emulation. These instructions are limited to control transfer + * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle + * of %rip-relative addressing that means that almost any instruction can be + * position dependent. For all the details on how we emulate generic + * instructions included %rip-relative instructions, see the code in + * fasttrap_pid_probe() below where we handle instructions of type + * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). + */ + +#define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3) +#define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7) +#define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7) +#define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm)) + +#define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3) +#define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7) +#define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7) + +#define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1) +#define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1) +#define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1) +#define	FASTTRAP_REX_B(rex)		((rex) & 1) +#define	FASTTRAP_REX(w, r, x, b)	\ +	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) + +/* + * Single-byte op-codes. + */ +#define	FASTTRAP_PUSHL_EBP	0x55 + +#define	FASTTRAP_JO		0x70 +#define	FASTTRAP_JNO		0x71 +#define	FASTTRAP_JB		0x72 +#define	FASTTRAP_JAE		0x73 +#define	FASTTRAP_JE		0x74 +#define	FASTTRAP_JNE		0x75 +#define	FASTTRAP_JBE		0x76 +#define	FASTTRAP_JA		0x77 +#define	FASTTRAP_JS		0x78 +#define	FASTTRAP_JNS		0x79 +#define	FASTTRAP_JP		0x7a +#define	FASTTRAP_JNP		0x7b +#define	FASTTRAP_JL		0x7c +#define	FASTTRAP_JGE		0x7d +#define	FASTTRAP_JLE		0x7e +#define	FASTTRAP_JG		0x7f + +#define	FASTTRAP_NOP		0x90 + +#define	FASTTRAP_MOV_EAX	0xb8 +#define	FASTTRAP_MOV_ECX	0xb9 + +#define	FASTTRAP_RET16		0xc2 +#define	FASTTRAP_RET		0xc3 + +#define	FASTTRAP_LOOPNZ		0xe0 +#define	FASTTRAP_LOOPZ		0xe1 +#define	FASTTRAP_LOOP		0xe2 +#define	FASTTRAP_JCXZ		0xe3 + +#define	FASTTRAP_CALL		0xe8 +#define	FASTTRAP_JMP32		0xe9 +#define	FASTTRAP_JMP8		0xeb + +#define	FASTTRAP_INT3		0xcc +#define	FASTTRAP_INT		0xcd + +#define	FASTTRAP_2_BYTE_OP	0x0f +#define	FASTTRAP_GROUP5_OP	0xff + +/* + * Two-byte op-codes (second byte only). + */ +#define	FASTTRAP_0F_JO		0x80 +#define	FASTTRAP_0F_JNO		0x81 +#define	FASTTRAP_0F_JB		0x82 +#define	FASTTRAP_0F_JAE		0x83 +#define	FASTTRAP_0F_JE		0x84 +#define	FASTTRAP_0F_JNE		0x85 +#define	FASTTRAP_0F_JBE		0x86 +#define	FASTTRAP_0F_JA		0x87 +#define	FASTTRAP_0F_JS		0x88 +#define	FASTTRAP_0F_JNS		0x89 +#define	FASTTRAP_0F_JP		0x8a +#define	FASTTRAP_0F_JNP		0x8b +#define	FASTTRAP_0F_JL		0x8c +#define	FASTTRAP_0F_JGE		0x8d +#define	FASTTRAP_0F_JLE		0x8e +#define	FASTTRAP_0F_JG		0x8f + +#define	FASTTRAP_EFLAGS_OF	0x800 +#define	FASTTRAP_EFLAGS_DF	0x400 +#define	FASTTRAP_EFLAGS_SF	0x080 +#define	FASTTRAP_EFLAGS_ZF	0x040 +#define	FASTTRAP_EFLAGS_AF	0x010 +#define	FASTTRAP_EFLAGS_PF	0x004 +#define	FASTTRAP_EFLAGS_CF	0x001 + +/* + * Instruction prefixes. + */ +#define	FASTTRAP_PREFIX_OPERAND	0x66 +#define	FASTTRAP_PREFIX_ADDRESS	0x67 +#define	FASTTRAP_PREFIX_CS	0x2E +#define	FASTTRAP_PREFIX_DS	0x3E +#define	FASTTRAP_PREFIX_ES	0x26 +#define	FASTTRAP_PREFIX_FS	0x64 +#define	FASTTRAP_PREFIX_GS	0x65 +#define	FASTTRAP_PREFIX_SS	0x36 +#define	FASTTRAP_PREFIX_LOCK	0xF0 +#define	FASTTRAP_PREFIX_REP	0xF3 +#define	FASTTRAP_PREFIX_REPNE	0xF2 + +#define	FASTTRAP_NOREG	0xff + +/* + * Map between instruction register encodings and the kernel constants which + * correspond to indicies into struct regs. + */ +#ifdef __amd64 +static const uint8_t regmap[16] = { +	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, +	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, +}; +#else +static const uint8_t regmap[8] = { +	EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI +}; +#endif + +static ulong_t fasttrap_getreg(struct regs *, uint_t); + +static uint64_t +fasttrap_anarg(struct regs *rp, int function_entry, int argno) +{ +	uint64_t value; +	int shift = function_entry ? 1 : 0; + +#ifdef __amd64 +	if (curproc->p_model == DATAMODEL_LP64) { +		uintptr_t *stack; + +		/* +		 * In 64-bit mode, the first six arguments are stored in +		 * registers. +		 */ +		if (argno < 6) +			return ((&rp->r_rdi)[argno]); + +		stack = (uintptr_t *)rp->r_sp; +		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); +		value = dtrace_fulword(&stack[argno - 6 + shift]); +		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); +	} else { +#endif +		uint32_t *stack = (uint32_t *)rp->r_sp; +		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); +		value = dtrace_fuword32(&stack[argno + shift]); +		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); +#ifdef __amd64 +	} +#endif + +	return (value); +} + +/*ARGSUSED*/ +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, +    fasttrap_probe_type_t type) +{ +	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; +	size_t len = FASTTRAP_MAX_INSTR_SIZE; +	size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); +	uint_t start = 0; +	int rmindex, size; +	uint8_t seg, rex = 0; + +	/* +	 * Read the instruction at the given address out of the process's +	 * address space. We don't have to worry about a debugger +	 * changing this instruction before we overwrite it with our trap +	 * instruction since P_PR_LOCK is set. Since instructions can span +	 * pages, we potentially read the instruction in two parts. If the +	 * second part fails, we just zero out that part of the instruction. +	 */ +	if (uread(p, &instr[0], first, pc) != 0) +		return (-1); +	if (len > first && +	    uread(p, &instr[first], len - first, pc + first) != 0) { +		bzero(&instr[first], len - first); +		len = first; +	} + +	/* +	 * If the disassembly fails, then we have a malformed instruction. +	 */ +	if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) +		return (-1); + +	/* +	 * Make sure the disassembler isn't completely broken. +	 */ +	ASSERT(-1 <= rmindex && rmindex < size); + +	/* +	 * If the computed size is greater than the number of bytes read, +	 * then it was a malformed instruction possibly because it fell on a +	 * page boundary and the subsequent page was missing or because of +	 * some malicious user. +	 */ +	if (size > len) +		return (-1); + +	tp->ftt_size = (uint8_t)size; +	tp->ftt_segment = FASTTRAP_SEG_NONE; + +	/* +	 * Find the start of the instruction's opcode by processing any +	 * legacy prefixes. +	 */ +	for (;;) { +		seg = 0; +		switch (instr[start]) { +		case FASTTRAP_PREFIX_SS: +			seg++; +			/*FALLTHRU*/ +		case FASTTRAP_PREFIX_GS: +			seg++; +			/*FALLTHRU*/ +		case FASTTRAP_PREFIX_FS: +			seg++; +			/*FALLTHRU*/ +		case FASTTRAP_PREFIX_ES: +			seg++; +			/*FALLTHRU*/ +		case FASTTRAP_PREFIX_DS: +			seg++; +			/*FALLTHRU*/ +		case FASTTRAP_PREFIX_CS: +			seg++; +			/*FALLTHRU*/ +		case FASTTRAP_PREFIX_OPERAND: +		case FASTTRAP_PREFIX_ADDRESS: +		case FASTTRAP_PREFIX_LOCK: +		case FASTTRAP_PREFIX_REP: +		case FASTTRAP_PREFIX_REPNE: +			if (seg != 0) { +				/* +				 * It's illegal for an instruction to specify +				 * two segment prefixes -- give up on this +				 * illegal instruction. +				 */ +				if (tp->ftt_segment != FASTTRAP_SEG_NONE) +					return (-1); + +				tp->ftt_segment = seg; +			} +			start++; +			continue; +		} +		break; +	} + +#ifdef __amd64 +	/* +	 * Identify the REX prefix on 64-bit processes. +	 */ +	if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) +		rex = instr[start++]; +#endif + +	/* +	 * Now that we're pretty sure that the instruction is okay, copy the +	 * valid part to the tracepoint. +	 */ +	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); + +	tp->ftt_type = FASTTRAP_T_COMMON; +	if (instr[start] == FASTTRAP_2_BYTE_OP) { +		switch (instr[start + 1]) { +		case FASTTRAP_0F_JO: +		case FASTTRAP_0F_JNO: +		case FASTTRAP_0F_JB: +		case FASTTRAP_0F_JAE: +		case FASTTRAP_0F_JE: +		case FASTTRAP_0F_JNE: +		case FASTTRAP_0F_JBE: +		case FASTTRAP_0F_JA: +		case FASTTRAP_0F_JS: +		case FASTTRAP_0F_JNS: +		case FASTTRAP_0F_JP: +		case FASTTRAP_0F_JNP: +		case FASTTRAP_0F_JL: +		case FASTTRAP_0F_JGE: +		case FASTTRAP_0F_JLE: +		case FASTTRAP_0F_JG: +			tp->ftt_type = FASTTRAP_T_JCC; +			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; +			tp->ftt_dest = pc + tp->ftt_size + +			    /* LINTED - alignment */ +			    *(int32_t *)&instr[start + 2]; +			break; +		} +	} else if (instr[start] == FASTTRAP_GROUP5_OP) { +		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); +		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); +		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); + +		if (reg == 2 || reg == 4) { +			uint_t i, sz; + +			if (reg == 2) +				tp->ftt_type = FASTTRAP_T_CALL; +			else +				tp->ftt_type = FASTTRAP_T_JMP; + +			if (mod == 3) +				tp->ftt_code = 2; +			else +				tp->ftt_code = 1; + +			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + +			/* +			 * See AMD x86-64 Architecture Programmer's Manual +			 * Volume 3, Section 1.2.7, Table 1-12, and +			 * Appendix A.3.1, Table A-15. +			 */ +			if (mod != 3 && rm == 4) { +				uint8_t sib = instr[start + 2]; +				uint_t index = FASTTRAP_SIB_INDEX(sib); +				uint_t base = FASTTRAP_SIB_BASE(sib); + +				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); + +				tp->ftt_index = (index == 4) ? +				    FASTTRAP_NOREG : +				    regmap[index | (FASTTRAP_REX_X(rex) << 3)]; +				tp->ftt_base = (mod == 0 && base == 5) ? +				    FASTTRAP_NOREG : +				    regmap[base | (FASTTRAP_REX_B(rex) << 3)]; + +				i = 3; +				sz = mod == 1 ? 1 : 4; +			} else { +				/* +				 * In 64-bit mode, mod == 0 and r/m == 5 +				 * denotes %rip-relative addressing; in 32-bit +				 * mode, the base register isn't used. In both +				 * modes, there is a 32-bit operand. +				 */ +				if (mod == 0 && rm == 5) { +#ifdef __amd64 +					if (p->p_model == DATAMODEL_LP64) +						tp->ftt_base = REG_RIP; +					else +#endif +						tp->ftt_base = FASTTRAP_NOREG; +					sz = 4; +				} else  { +					uint8_t base = rm | +					    (FASTTRAP_REX_B(rex) << 3); + +					tp->ftt_base = regmap[base]; +					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; +				} +				tp->ftt_index = FASTTRAP_NOREG; +				i = 2; +			} + +			if (sz == 1) { +				tp->ftt_dest = *(int8_t *)&instr[start + i]; +			} else if (sz == 4) { +				/* LINTED - alignment */ +				tp->ftt_dest = *(int32_t *)&instr[start + i]; +			} else { +				tp->ftt_dest = 0; +			} +		} +	} else { +		switch (instr[start]) { +		case FASTTRAP_RET: +			tp->ftt_type = FASTTRAP_T_RET; +			break; + +		case FASTTRAP_RET16: +			tp->ftt_type = FASTTRAP_T_RET16; +			/* LINTED - alignment */ +			tp->ftt_dest = *(uint16_t *)&instr[start + 1]; +			break; + +		case FASTTRAP_JO: +		case FASTTRAP_JNO: +		case FASTTRAP_JB: +		case FASTTRAP_JAE: +		case FASTTRAP_JE: +		case FASTTRAP_JNE: +		case FASTTRAP_JBE: +		case FASTTRAP_JA: +		case FASTTRAP_JS: +		case FASTTRAP_JNS: +		case FASTTRAP_JP: +		case FASTTRAP_JNP: +		case FASTTRAP_JL: +		case FASTTRAP_JGE: +		case FASTTRAP_JLE: +		case FASTTRAP_JG: +			tp->ftt_type = FASTTRAP_T_JCC; +			tp->ftt_code = instr[start]; +			tp->ftt_dest = pc + tp->ftt_size + +			    (int8_t)instr[start + 1]; +			break; + +		case FASTTRAP_LOOPNZ: +		case FASTTRAP_LOOPZ: +		case FASTTRAP_LOOP: +			tp->ftt_type = FASTTRAP_T_LOOP; +			tp->ftt_code = instr[start]; +			tp->ftt_dest = pc + tp->ftt_size + +			    (int8_t)instr[start + 1]; +			break; + +		case FASTTRAP_JCXZ: +			tp->ftt_type = FASTTRAP_T_JCXZ; +			tp->ftt_dest = pc + tp->ftt_size + +			    (int8_t)instr[start + 1]; +			break; + +		case FASTTRAP_CALL: +			tp->ftt_type = FASTTRAP_T_CALL; +			tp->ftt_dest = pc + tp->ftt_size + +			    /* LINTED - alignment */ +			    *(int32_t *)&instr[start + 1]; +			tp->ftt_code = 0; +			break; + +		case FASTTRAP_JMP32: +			tp->ftt_type = FASTTRAP_T_JMP; +			tp->ftt_dest = pc + tp->ftt_size + +			    /* LINTED - alignment */ +			    *(int32_t *)&instr[start + 1]; +			break; +		case FASTTRAP_JMP8: +			tp->ftt_type = FASTTRAP_T_JMP; +			tp->ftt_dest = pc + tp->ftt_size + +			    (int8_t)instr[start + 1]; +			break; + +		case FASTTRAP_PUSHL_EBP: +			if (start == 0) +				tp->ftt_type = FASTTRAP_T_PUSHL_EBP; +			break; + +		case FASTTRAP_NOP: +#ifdef __amd64 +			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + +			/* +			 * On amd64 we have to be careful not to confuse a nop +			 * (actually xchgl %eax, %eax) with an instruction using +			 * the same opcode, but that does something different +			 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). +			 */ +			if (FASTTRAP_REX_B(rex) == 0) +#endif +				tp->ftt_type = FASTTRAP_T_NOP; +			break; + +		case FASTTRAP_INT3: +			/* +			 * The pid provider shares the int3 trap with debugger +			 * breakpoints so we can't instrument them. +			 */ +			ASSERT(instr[start] == FASTTRAP_INSTR); +			return (-1); + +		case FASTTRAP_INT: +			/* +			 * Interrupts seem like they could be traced with +			 * no negative implications, but it's possible that +			 * a thread could be redirected by the trap handling +			 * code which would eventually return to the +			 * instruction after the interrupt. If the interrupt +			 * were in our scratch space, the subsequent +			 * instruction might be overwritten before we return. +			 * Accordingly we refuse to instrument any interrupt. +			 */ +			return (-1); +		} +	} + +#ifdef __amd64 +	if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { +		/* +		 * If the process is 64-bit and the instruction type is still +		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an +		 * execute it -- we need to watch for %rip-relative +		 * addressing mode. See the portion of fasttrap_pid_probe() +		 * below where we handle tracepoints with type +		 * FASTTRAP_T_COMMON for how we emulate instructions that +		 * employ %rip-relative addressing. +		 */ +		if (rmindex != -1) { +			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); +			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); +			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); + +			ASSERT(rmindex > start); + +			if (mod == 0 && rm == 5) { +				/* +				 * We need to be sure to avoid other +				 * registers used by this instruction. While +				 * the reg field may determine the op code +				 * rather than denoting a register, assuming +				 * that it denotes a register is always safe. +				 * We leave the REX field intact and use +				 * whatever value's there for simplicity. +				 */ +				if (reg != 0) { +					tp->ftt_ripmode = FASTTRAP_RIP_1 | +					    (FASTTRAP_RIP_X * +					    FASTTRAP_REX_B(rex)); +					rm = 0; +				} else { +					tp->ftt_ripmode = FASTTRAP_RIP_2 | +					    (FASTTRAP_RIP_X * +					    FASTTRAP_REX_B(rex)); +					rm = 1; +				} + +				tp->ftt_modrm = tp->ftt_instr[rmindex]; +				tp->ftt_instr[rmindex] = +				    FASTTRAP_MODRM(2, reg, rm); +			} +		} +	} +#endif + +	return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ +	fasttrap_instr_t instr = FASTTRAP_INSTR; + +	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) +		return (-1); + +	return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ +	uint8_t instr; + +	/* +	 * Distinguish between read or write failures and a changed +	 * instruction. +	 */ +	if (uread(p, &instr, 1, tp->ftt_pc) != 0) +		return (0); +	if (instr != FASTTRAP_INSTR) +		return (0); +	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) +		return (-1); + +	return (0); +} + +#ifdef __amd64 +static uintptr_t +fasttrap_fulword_noerr(const void *uaddr) +{ +	uintptr_t ret; + +	if (fasttrap_fulword(uaddr, &ret) == 0) +		return (ret); + +	return (0); +} +#endif + +static uint32_t +fasttrap_fuword32_noerr(const void *uaddr) +{ +	uint32_t ret; + +	if (fasttrap_fuword32(uaddr, &ret) == 0) +		return (ret); + +	return (0); +} + +static void +fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, +    uintptr_t new_pc) +{ +	fasttrap_tracepoint_t *tp; +	fasttrap_bucket_t *bucket; +	fasttrap_id_t *id; +	kmutex_t *pid_mtx; + +	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; +	mutex_enter(pid_mtx); +	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + +	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { +		if (pid == tp->ftt_pid && pc == tp->ftt_pc && +		    tp->ftt_proc->ftpc_acount != 0) +			break; +	} + +	/* +	 * Don't sweat it if we can't find the tracepoint again; unlike +	 * when we're in fasttrap_pid_probe(), finding the tracepoint here +	 * is not essential to the correct execution of the process. +	 */ +	if (tp == NULL) { +		mutex_exit(pid_mtx); +		return; +	} + +	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { +		/* +		 * If there's a branch that could act as a return site, we +		 * need to trace it, and check here if the program counter is +		 * external to the function. +		 */ +		if (tp->ftt_type != FASTTRAP_T_RET && +		    tp->ftt_type != FASTTRAP_T_RET16 && +		    new_pc - id->fti_probe->ftp_faddr < +		    id->fti_probe->ftp_fsize) +			continue; + +		dtrace_probe(id->fti_probe->ftp_id, +		    pc - id->fti_probe->ftp_faddr, +		    rp->r_r0, rp->r_r1, 0, 0); +	} + +	mutex_exit(pid_mtx); +} + +static void +fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) +{ +	sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + +	sqp->sq_info.si_signo = SIGSEGV; +	sqp->sq_info.si_code = SEGV_MAPERR; +	sqp->sq_info.si_addr = (caddr_t)addr; + +	mutex_enter(&p->p_lock); +	sigaddqa(p, t, sqp); +	mutex_exit(&p->p_lock); + +	if (t != NULL) +		aston(t); +} + +#ifdef __amd64 +static void +fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc, +    uintptr_t *argv) +{ +	int i, x, cap = MIN(argc, probe->ftp_nargs); +	uintptr_t *stack = (uintptr_t *)rp->r_sp; + +	for (i = 0; i < cap; i++) { +		x = probe->ftp_argmap[i]; + +		if (x < 6) +			argv[i] = (&rp->r_rdi)[x]; +		else +			argv[i] = fasttrap_fulword_noerr(&stack[x]); +	} + +	for (; i < argc; i++) { +		argv[i] = 0; +	} +} +#endif + +static void +fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc, +    uint32_t *argv) +{ +	int i, x, cap = MIN(argc, probe->ftp_nargs); +	uint32_t *stack = (uint32_t *)rp->r_sp; + +	for (i = 0; i < cap; i++) { +		x = probe->ftp_argmap[i]; + +		argv[i] = fasttrap_fuword32_noerr(&stack[x]); +	} + +	for (; i < argc; i++) { +		argv[i] = 0; +	} +} + +static int +fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr) +{ +	proc_t *p = curproc; +	user_desc_t *desc; +	uint16_t sel, ndx, type; +	uintptr_t limit; + +	switch (tp->ftt_segment) { +	case FASTTRAP_SEG_CS: +		sel = rp->r_cs; +		break; +	case FASTTRAP_SEG_DS: +		sel = rp->r_ds; +		break; +	case FASTTRAP_SEG_ES: +		sel = rp->r_es; +		break; +	case FASTTRAP_SEG_FS: +		sel = rp->r_fs; +		break; +	case FASTTRAP_SEG_GS: +		sel = rp->r_gs; +		break; +	case FASTTRAP_SEG_SS: +		sel = rp->r_ss; +		break; +	} + +	/* +	 * Make sure the given segment register specifies a user priority +	 * selector rather than a kernel selector. +	 */ +	if (!SELISUPL(sel)) +		return (-1); + +	ndx = SELTOIDX(sel); + +	/* +	 * Check the bounds and grab the descriptor out of the specified +	 * descriptor table. +	 */ +	if (SELISLDT(sel)) { +		if (ndx > p->p_ldtlimit) +			return (-1); + +		desc = p->p_ldt + ndx; + +	} else { +		if (ndx >= NGDT) +			return (-1); + +		desc = cpu_get_gdt() + ndx; +	} + +	/* +	 * The descriptor must have user privilege level and it must be +	 * present in memory. +	 */ +	if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1) +		return (-1); + +	type = desc->usd_type; + +	/* +	 * If the S bit in the type field is not set, this descriptor can +	 * only be used in system context. +	 */ +	if ((type & 0x10) != 0x10) +		return (-1); + +	limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1); + +	if (tp->ftt_segment == FASTTRAP_SEG_CS) { +		/* +		 * The code/data bit and readable bit must both be set. +		 */ +		if ((type & 0xa) != 0xa) +			return (-1); + +		if (*addr > limit) +			return (-1); +	} else { +		/* +		 * The code/data bit must be clear. +		 */ +		if ((type & 0x8) != 0) +			return (-1); + +		/* +		 * If the expand-down bit is clear, we just check the limit as +		 * it would naturally be applied. Otherwise, we need to check +		 * that the address is the range [limit + 1 .. 0xffff] or +		 * [limit + 1 ... 0xffffffff] depending on if the default +		 * operand size bit is set. +		 */ +		if ((type & 0x4) == 0) { +			if (*addr > limit) +				return (-1); +		} else if (desc->usd_def32) { +			if (*addr < limit + 1 || 0xffff < *addr) +				return (-1); +		} else { +			if (*addr < limit + 1 || 0xffffffff < *addr) +				return (-1); +		} +	} + +	*addr += USEGD_GETBASE(desc); + +	return (0); +} + +int +fasttrap_pid_probe(struct regs *rp) +{ +	proc_t *p = curproc; +	uintptr_t pc = rp->r_pc - 1, new_pc = 0; +	fasttrap_bucket_t *bucket; +	kmutex_t *pid_mtx; +	fasttrap_tracepoint_t *tp, tp_local; +	pid_t pid; +	dtrace_icookie_t cookie; +	uint_t is_enabled = 0; + +	/* +	 * It's possible that a user (in a veritable orgy of bad planning) +	 * could redirect this thread's flow of control before it reached the +	 * return probe fasttrap. In this case we need to kill the process +	 * since it's in a unrecoverable state. +	 */ +	if (curthread->t_dtrace_step) { +		ASSERT(curthread->t_dtrace_on); +		fasttrap_sigtrap(p, curthread, pc); +		return (0); +	} + +	/* +	 * Clear all user tracing flags. +	 */ +	curthread->t_dtrace_ft = 0; +	curthread->t_dtrace_pc = 0; +	curthread->t_dtrace_npc = 0; +	curthread->t_dtrace_scrpc = 0; +	curthread->t_dtrace_astpc = 0; +#ifdef __amd64 +	curthread->t_dtrace_regv = 0; +#endif + +	/* +	 * Treat a child created by a call to vfork(2) as if it were its +	 * parent. We know that there's only one thread of control in such a +	 * process: this one. +	 */ +	while (p->p_flag & SVFORK) { +		p = p->p_parent; +	} + +	pid = p->p_pid; +	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; +	mutex_enter(pid_mtx); +	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + +	/* +	 * Lookup the tracepoint that the process just hit. +	 */ +	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { +		if (pid == tp->ftt_pid && pc == tp->ftt_pc && +		    tp->ftt_proc->ftpc_acount != 0) +			break; +	} + +	/* +	 * If we couldn't find a matching tracepoint, either a tracepoint has +	 * been inserted without using the pid<pid> ioctl interface (see +	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. +	 */ +	if (tp == NULL) { +		mutex_exit(pid_mtx); +		return (-1); +	} + +	/* +	 * Set the program counter to the address of the traced instruction +	 * so that it looks right in ustack() output. +	 */ +	rp->r_pc = pc; + +	if (tp->ftt_ids != NULL) { +		fasttrap_id_t *id; + +#ifdef __amd64 +		if (p->p_model == DATAMODEL_LP64) { +			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { +				fasttrap_probe_t *probe = id->fti_probe; + +				if (id->fti_ptype == DTFTP_ENTRY) { +					/* +					 * We note that this was an entry +					 * probe to help ustack() find the +					 * first caller. +					 */ +					cookie = dtrace_interrupt_disable(); +					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); +					dtrace_probe(probe->ftp_id, rp->r_rdi, +					    rp->r_rsi, rp->r_rdx, rp->r_rcx, +					    rp->r_r8); +					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); +					dtrace_interrupt_enable(cookie); +				} else if (id->fti_ptype == DTFTP_IS_ENABLED) { +					/* +					 * Note that in this case, we don't +					 * call dtrace_probe() since it's only +					 * an artificial probe meant to change +					 * the flow of control so that it +					 * encounters the true probe. +					 */ +					is_enabled = 1; +				} else if (probe->ftp_argmap == NULL) { +					dtrace_probe(probe->ftp_id, rp->r_rdi, +					    rp->r_rsi, rp->r_rdx, rp->r_rcx, +					    rp->r_r8); +				} else { +					uintptr_t t[5]; + +					fasttrap_usdt_args64(probe, rp, +					    sizeof (t) / sizeof (t[0]), t); + +					dtrace_probe(probe->ftp_id, t[0], t[1], +					    t[2], t[3], t[4]); +				} +			} +		} else { +#endif +			uintptr_t s0, s1, s2, s3, s4, s5; +			uint32_t *stack = (uint32_t *)rp->r_sp; + +			/* +			 * In 32-bit mode, all arguments are passed on the +			 * stack. If this is a function entry probe, we need +			 * to skip the first entry on the stack as it +			 * represents the return address rather than a +			 * parameter to the function. +			 */ +			s0 = fasttrap_fuword32_noerr(&stack[0]); +			s1 = fasttrap_fuword32_noerr(&stack[1]); +			s2 = fasttrap_fuword32_noerr(&stack[2]); +			s3 = fasttrap_fuword32_noerr(&stack[3]); +			s4 = fasttrap_fuword32_noerr(&stack[4]); +			s5 = fasttrap_fuword32_noerr(&stack[5]); + +			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { +				fasttrap_probe_t *probe = id->fti_probe; + +				if (id->fti_ptype == DTFTP_ENTRY) { +					/* +					 * We note that this was an entry +					 * probe to help ustack() find the +					 * first caller. +					 */ +					cookie = dtrace_interrupt_disable(); +					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); +					dtrace_probe(probe->ftp_id, s1, s2, +					    s3, s4, s5); +					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); +					dtrace_interrupt_enable(cookie); +				} else if (id->fti_ptype == DTFTP_IS_ENABLED) { +					/* +					 * Note that in this case, we don't +					 * call dtrace_probe() since it's only +					 * an artificial probe meant to change +					 * the flow of control so that it +					 * encounters the true probe. +					 */ +					is_enabled = 1; +				} else if (probe->ftp_argmap == NULL) { +					dtrace_probe(probe->ftp_id, s0, s1, +					    s2, s3, s4); +				} else { +					uint32_t t[5]; + +					fasttrap_usdt_args32(probe, rp, +					    sizeof (t) / sizeof (t[0]), t); + +					dtrace_probe(probe->ftp_id, t[0], t[1], +					    t[2], t[3], t[4]); +				} +			} +#ifdef __amd64 +		} +#endif +	} + +	/* +	 * We're about to do a bunch of work so we cache a local copy of +	 * the tracepoint to emulate the instruction, and then find the +	 * tracepoint again later if we need to light up any return probes. +	 */ +	tp_local = *tp; +	mutex_exit(pid_mtx); +	tp = &tp_local; + +	/* +	 * Set the program counter to appear as though the traced instruction +	 * had completely executed. This ensures that fasttrap_getreg() will +	 * report the expected value for REG_RIP. +	 */ +	rp->r_pc = pc + tp->ftt_size; + +	/* +	 * If there's an is-enabled probe connected to this tracepoint it +	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' +	 * instruction that was placed there by DTrace when the binary was +	 * linked. As this probe is, in fact, enabled, we need to stuff 1 +	 * into %eax or %rax. Accordingly, we can bypass all the instruction +	 * emulation logic since we know the inevitable result. It's possible +	 * that a user could construct a scenario where the 'is-enabled' +	 * probe was on some other instruction, but that would be a rather +	 * exotic way to shoot oneself in the foot. +	 */ +	if (is_enabled) { +		rp->r_r0 = 1; +		new_pc = rp->r_pc; +		goto done; +	} + +	/* +	 * We emulate certain types of instructions to ensure correctness +	 * (in the case of position dependent instructions) or optimize +	 * common cases. The rest we have the thread execute back in user- +	 * land. +	 */ +	switch (tp->ftt_type) { +	case FASTTRAP_T_RET: +	case FASTTRAP_T_RET16: +	{ +		uintptr_t dst; +		uintptr_t addr; +		int ret; + +		/* +		 * We have to emulate _every_ facet of the behavior of a ret +		 * instruction including what happens if the load from %esp +		 * fails; in that case, we send a SIGSEGV. +		 */ +#ifdef __amd64 +		if (p->p_model == DATAMODEL_NATIVE) { +#endif +			ret = fasttrap_fulword((void *)rp->r_sp, &dst); +			addr = rp->r_sp + sizeof (uintptr_t); +#ifdef __amd64 +		} else { +			uint32_t dst32; +			ret = fasttrap_fuword32((void *)rp->r_sp, &dst32); +			dst = dst32; +			addr = rp->r_sp + sizeof (uint32_t); +		} +#endif + +		if (ret == -1) { +			fasttrap_sigsegv(p, curthread, rp->r_sp); +			new_pc = pc; +			break; +		} + +		if (tp->ftt_type == FASTTRAP_T_RET16) +			addr += tp->ftt_dest; + +		rp->r_sp = addr; +		new_pc = dst; +		break; +	} + +	case FASTTRAP_T_JCC: +	{ +		uint_t taken; + +		switch (tp->ftt_code) { +		case FASTTRAP_JO: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0; +			break; +		case FASTTRAP_JNO: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0; +			break; +		case FASTTRAP_JB: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0; +			break; +		case FASTTRAP_JAE: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0; +			break; +		case FASTTRAP_JE: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; +			break; +		case FASTTRAP_JNE: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; +			break; +		case FASTTRAP_JBE: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 || +			    (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; +			break; +		case FASTTRAP_JA: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 && +			    (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; +			break; +		case FASTTRAP_JS: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0; +			break; +		case FASTTRAP_JNS: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0; +			break; +		case FASTTRAP_JP: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0; +			break; +		case FASTTRAP_JNP: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0; +			break; +		case FASTTRAP_JL: +			taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != +			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); +			break; +		case FASTTRAP_JGE: +			taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == +			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); +			break; +		case FASTTRAP_JLE: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 || +			    ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != +			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); +			break; +		case FASTTRAP_JG: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && +			    ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == +			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); +			break; + +		} + +		if (taken) +			new_pc = tp->ftt_dest; +		else +			new_pc = pc + tp->ftt_size; +		break; +	} + +	case FASTTRAP_T_LOOP: +	{ +		uint_t taken; +#ifdef __amd64 +		greg_t cx = rp->r_rcx--; +#else +		greg_t cx = rp->r_ecx--; +#endif + +		switch (tp->ftt_code) { +		case FASTTRAP_LOOPNZ: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && +			    cx != 0; +			break; +		case FASTTRAP_LOOPZ: +			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 && +			    cx != 0; +			break; +		case FASTTRAP_LOOP: +			taken = (cx != 0); +			break; +		} + +		if (taken) +			new_pc = tp->ftt_dest; +		else +			new_pc = pc + tp->ftt_size; +		break; +	} + +	case FASTTRAP_T_JCXZ: +	{ +#ifdef __amd64 +		greg_t cx = rp->r_rcx; +#else +		greg_t cx = rp->r_ecx; +#endif + +		if (cx == 0) +			new_pc = tp->ftt_dest; +		else +			new_pc = pc + tp->ftt_size; +		break; +	} + +	case FASTTRAP_T_PUSHL_EBP: +	{ +		int ret; +		uintptr_t addr; +#ifdef __amd64 +		if (p->p_model == DATAMODEL_NATIVE) { +#endif +			addr = rp->r_sp - sizeof (uintptr_t); +			ret = fasttrap_sulword((void *)addr, rp->r_fp); +#ifdef __amd64 +		} else { +			addr = rp->r_sp - sizeof (uint32_t); +			ret = fasttrap_suword32((void *)addr, +			    (uint32_t)rp->r_fp); +		} +#endif + +		if (ret == -1) { +			fasttrap_sigsegv(p, curthread, addr); +			new_pc = pc; +			break; +		} + +		rp->r_sp = addr; +		new_pc = pc + tp->ftt_size; +		break; +	} + +	case FASTTRAP_T_NOP: +		new_pc = pc + tp->ftt_size; +		break; + +	case FASTTRAP_T_JMP: +	case FASTTRAP_T_CALL: +		if (tp->ftt_code == 0) { +			new_pc = tp->ftt_dest; +		} else { +			uintptr_t value, addr = tp->ftt_dest; + +			if (tp->ftt_base != FASTTRAP_NOREG) +				addr += fasttrap_getreg(rp, tp->ftt_base); +			if (tp->ftt_index != FASTTRAP_NOREG) +				addr += fasttrap_getreg(rp, tp->ftt_index) << +				    tp->ftt_scale; + +			if (tp->ftt_code == 1) { +				/* +				 * If there's a segment prefix for this +				 * instruction, we'll need to check permissions +				 * and bounds on the given selector, and adjust +				 * the address accordingly. +				 */ +				if (tp->ftt_segment != FASTTRAP_SEG_NONE && +				    fasttrap_do_seg(tp, rp, &addr) != 0) { +					fasttrap_sigsegv(p, curthread, addr); +					new_pc = pc; +					break; +				} + +#ifdef __amd64 +				if (p->p_model == DATAMODEL_NATIVE) { +#endif +					if (fasttrap_fulword((void *)addr, +					    &value) == -1) { +						fasttrap_sigsegv(p, curthread, +						    addr); +						new_pc = pc; +						break; +					} +					new_pc = value; +#ifdef __amd64 +				} else { +					uint32_t value32; +					addr = (uintptr_t)(uint32_t)addr; +					if (fasttrap_fuword32((void *)addr, +					    &value32) == -1) { +						fasttrap_sigsegv(p, curthread, +						    addr); +						new_pc = pc; +						break; +					} +					new_pc = value32; +				} +#endif +			} else { +				new_pc = addr; +			} +		} + +		/* +		 * If this is a call instruction, we need to push the return +		 * address onto the stack. If this fails, we send the process +		 * a SIGSEGV and reset the pc to emulate what would happen if +		 * this instruction weren't traced. +		 */ +		if (tp->ftt_type == FASTTRAP_T_CALL) { +			int ret; +			uintptr_t addr; +#ifdef __amd64 +			if (p->p_model == DATAMODEL_NATIVE) { +				addr = rp->r_sp - sizeof (uintptr_t); +				ret = fasttrap_sulword((void *)addr, +				    pc + tp->ftt_size); +			} else { +#endif +				addr = rp->r_sp - sizeof (uint32_t); +				ret = fasttrap_suword32((void *)addr, +				    (uint32_t)(pc + tp->ftt_size)); +#ifdef __amd64 +			} +#endif + +			if (ret == -1) { +				fasttrap_sigsegv(p, curthread, addr); +				new_pc = pc; +				break; +			} + +			rp->r_sp = addr; +		} + +		break; + +	case FASTTRAP_T_COMMON: +	{ +		uintptr_t addr; +#if defined(__amd64) +		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; +#else +		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; +#endif +		uint_t i = 0; +		klwp_t *lwp = ttolwp(curthread); + +		/* +		 * Compute the address of the ulwp_t and step over the +		 * ul_self pointer. The method used to store the user-land +		 * thread pointer is very different on 32- and 64-bit +		 * kernels. +		 */ +#if defined(__amd64) +		if (p->p_model == DATAMODEL_LP64) { +			addr = lwp->lwp_pcb.pcb_fsbase; +			addr += sizeof (void *); +		} else { +			addr = lwp->lwp_pcb.pcb_gsbase; +			addr += sizeof (caddr32_t); +		} +#else +		addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); +		addr += sizeof (void *); +#endif + +		/* +		 * Generic Instruction Tracing +		 * --------------------------- +		 * +		 * This is the layout of the scratch space in the user-land +		 * thread structure for our generated instructions. +		 * +		 *	32-bit mode			bytes +		 *	------------------------	----- +		 * a:	<original instruction>		<= 15 +		 *	jmp	<pc + tp->ftt_size>	    5 +		 * b:	<original instrction>		<= 15 +		 *	int	T_DTRACE_RET		    2 +		 *					----- +		 *					<= 37 +		 * +		 *	64-bit mode			bytes +		 *	------------------------	----- +		 * a:	<original instruction>		<= 15 +		 *	jmp	0(%rip)			    6 +		 *	<pc + tp->ftt_size>		    8 +		 * b:	<original instruction>		<= 15 +		 * 	int	T_DTRACE_RET		    2 +		 * 					----- +		 * 					<= 46 +		 * +		 * The %pc is set to a, and curthread->t_dtrace_astpc is set +		 * to b. If we encounter a signal on the way out of the +		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc +		 * so that we execute the original instruction and re-enter +		 * the kernel rather than redirecting to the next instruction. +		 * +		 * If there are return probes (so we know that we're going to +		 * need to reenter the kernel after executing the original +		 * instruction), the scratch space will just contain the +		 * original instruction followed by an interrupt -- the same +		 * data as at b. +		 * +		 * %rip-relative Addressing +		 * ------------------------ +		 * +		 * There's a further complication in 64-bit mode due to %rip- +		 * relative addressing. While this is clearly a beneficial +		 * architectural decision for position independent code, it's +		 * hard not to see it as a personal attack against the pid +		 * provider since before there was a relatively small set of +		 * instructions to emulate; with %rip-relative addressing, +		 * almost every instruction can potentially depend on the +		 * address at which it's executed. Rather than emulating +		 * the broad spectrum of instructions that can now be +		 * position dependent, we emulate jumps and others as in +		 * 32-bit mode, and take a different tack for instructions +		 * using %rip-relative addressing. +		 * +		 * For every instruction that uses the ModRM byte, the +		 * in-kernel disassembler reports its location. We use the +		 * ModRM byte to identify that an instruction uses +		 * %rip-relative addressing and to see what other registers +		 * the instruction uses. To emulate those instructions, +		 * we modify the instruction to be %rax-relative rather than +		 * %rip-relative (or %rcx-relative if the instruction uses +		 * %rax; or %r8- or %r9-relative if the REX.B is present so +		 * we don't have to rewrite the REX prefix). We then load +		 * the value that %rip would have been into the scratch +		 * register and generate an instruction to reset the scratch +		 * register back to its original value. The instruction +		 * sequence looks like this: +		 * +		 *	64-mode %rip-relative		bytes +		 *	------------------------	----- +		 * a:	<modified instruction>		<= 15 +		 *	movq	$<value>, %<scratch>	    6 +		 *	jmp	0(%rip)			    6 +		 *	<pc + tp->ftt_size>		    8 +		 * b:	<modified instruction>  	<= 15 +		 * 	int	T_DTRACE_RET		    2 +		 * 					----- +		 *					   52 +		 * +		 * We set curthread->t_dtrace_regv so that upon receiving +		 * a signal we can reset the value of the scratch register. +		 */ + +		ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); + +		curthread->t_dtrace_scrpc = addr; +		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); +		i += tp->ftt_size; + +#ifdef __amd64 +		if (tp->ftt_ripmode != 0) { +			greg_t *reg; + +			ASSERT(p->p_model == DATAMODEL_LP64); +			ASSERT(tp->ftt_ripmode & +			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); + +			/* +			 * If this was a %rip-relative instruction, we change +			 * it to be either a %rax- or %rcx-relative +			 * instruction (depending on whether those registers +			 * are used as another operand; or %r8- or %r9- +			 * relative depending on the value of REX.B). We then +			 * set that register and generate a movq instruction +			 * to reset the value. +			 */ +			if (tp->ftt_ripmode & FASTTRAP_RIP_X) +				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); +			else +				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); + +			if (tp->ftt_ripmode & FASTTRAP_RIP_1) +				scratch[i++] = FASTTRAP_MOV_EAX; +			else +				scratch[i++] = FASTTRAP_MOV_ECX; + +			switch (tp->ftt_ripmode) { +			case FASTTRAP_RIP_1: +				reg = &rp->r_rax; +				curthread->t_dtrace_reg = REG_RAX; +				break; +			case FASTTRAP_RIP_2: +				reg = &rp->r_rcx; +				curthread->t_dtrace_reg = REG_RCX; +				break; +			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: +				reg = &rp->r_r8; +				curthread->t_dtrace_reg = REG_R8; +				break; +			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: +				reg = &rp->r_r9; +				curthread->t_dtrace_reg = REG_R9; +				break; +			} + +			/* LINTED - alignment */ +			*(uint64_t *)&scratch[i] = *reg; +			curthread->t_dtrace_regv = *reg; +			*reg = pc + tp->ftt_size; +			i += sizeof (uint64_t); +		} +#endif + +		/* +		 * Generate the branch instruction to what would have +		 * normally been the subsequent instruction. In 32-bit mode, +		 * this is just a relative branch; in 64-bit mode this is a +		 * %rip-relative branch that loads the 64-bit pc value +		 * immediately after the jmp instruction. +		 */ +#ifdef __amd64 +		if (p->p_model == DATAMODEL_LP64) { +			scratch[i++] = FASTTRAP_GROUP5_OP; +			scratch[i++] = FASTTRAP_MODRM(0, 4, 5); +			/* LINTED - alignment */ +			*(uint32_t *)&scratch[i] = 0; +			i += sizeof (uint32_t); +			/* LINTED - alignment */ +			*(uint64_t *)&scratch[i] = pc + tp->ftt_size; +			i += sizeof (uint64_t); +		} else { +#endif +			/* +			 * Set up the jmp to the next instruction; note that +			 * the size of the traced instruction cancels out. +			 */ +			scratch[i++] = FASTTRAP_JMP32; +			/* LINTED - alignment */ +			*(uint32_t *)&scratch[i] = pc - addr - 5; +			i += sizeof (uint32_t); +#ifdef __amd64 +		} +#endif + +		curthread->t_dtrace_astpc = addr + i; +		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); +		i += tp->ftt_size; +		scratch[i++] = FASTTRAP_INT; +		scratch[i++] = T_DTRACE_RET; + +		ASSERT(i <= sizeof (scratch)); + +		if (fasttrap_copyout(scratch, (char *)addr, i)) { +			fasttrap_sigtrap(p, curthread, pc); +			new_pc = pc; +			break; +		} + +		if (tp->ftt_retids != NULL) { +			curthread->t_dtrace_step = 1; +			curthread->t_dtrace_ret = 1; +			new_pc = curthread->t_dtrace_astpc; +		} else { +			new_pc = curthread->t_dtrace_scrpc; +		} + +		curthread->t_dtrace_pc = pc; +		curthread->t_dtrace_npc = pc + tp->ftt_size; +		curthread->t_dtrace_on = 1; +		break; +	} + +	default: +		panic("fasttrap: mishandled an instruction"); +	} + +done: +	/* +	 * If there were no return probes when we first found the tracepoint, +	 * we should feel no obligation to honor any return probes that were +	 * subsequently enabled -- they'll just have to wait until the next +	 * time around. +	 */ +	if (tp->ftt_retids != NULL) { +		/* +		 * We need to wait until the results of the instruction are +		 * apparent before invoking any return probes. If this +		 * instruction was emulated we can just call +		 * fasttrap_return_common(); if it needs to be executed, we +		 * need to wait until the user thread returns to the kernel. +		 */ +		if (tp->ftt_type != FASTTRAP_T_COMMON) { +			/* +			 * Set the program counter to the address of the traced +			 * instruction so that it looks right in ustack() +			 * output. We had previously set it to the end of the +			 * instruction to simplify %rip-relative addressing. +			 */ +			rp->r_pc = pc; + +			fasttrap_return_common(rp, pc, pid, new_pc); +		} else { +			ASSERT(curthread->t_dtrace_ret != 0); +			ASSERT(curthread->t_dtrace_pc == pc); +			ASSERT(curthread->t_dtrace_scrpc != 0); +			ASSERT(new_pc == curthread->t_dtrace_astpc); +		} +	} + +	rp->r_pc = new_pc; + +	return (0); +} + +int +fasttrap_return_probe(struct regs *rp) +{ +	proc_t *p = curproc; +	uintptr_t pc = curthread->t_dtrace_pc; +	uintptr_t npc = curthread->t_dtrace_npc; + +	curthread->t_dtrace_pc = 0; +	curthread->t_dtrace_npc = 0; +	curthread->t_dtrace_scrpc = 0; +	curthread->t_dtrace_astpc = 0; + +	/* +	 * Treat a child created by a call to vfork(2) as if it were its +	 * parent. We know that there's only one thread of control in such a +	 * process: this one. +	 */ +	while (p->p_flag & SVFORK) { +		p = p->p_parent; +	} + +	/* +	 * We set rp->r_pc to the address of the traced instruction so +	 * that it appears to dtrace_probe() that we're on the original +	 * instruction, and so that the user can't easily detect our +	 * complex web of lies. dtrace_return_probe() (our caller) +	 * will correctly set %pc after we return. +	 */ +	rp->r_pc = pc; + +	fasttrap_return_common(rp, pc, p->p_pid, npc); + +	return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, +    int aframes) +{ +	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, +    int aframes) +{ +	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno)); +} + +static ulong_t +fasttrap_getreg(struct regs *rp, uint_t reg) +{ +#ifdef __amd64 +	switch (reg) { +	case REG_R15:		return (rp->r_r15); +	case REG_R14:		return (rp->r_r14); +	case REG_R13:		return (rp->r_r13); +	case REG_R12:		return (rp->r_r12); +	case REG_R11:		return (rp->r_r11); +	case REG_R10:		return (rp->r_r10); +	case REG_R9:		return (rp->r_r9); +	case REG_R8:		return (rp->r_r8); +	case REG_RDI:		return (rp->r_rdi); +	case REG_RSI:		return (rp->r_rsi); +	case REG_RBP:		return (rp->r_rbp); +	case REG_RBX:		return (rp->r_rbx); +	case REG_RDX:		return (rp->r_rdx); +	case REG_RCX:		return (rp->r_rcx); +	case REG_RAX:		return (rp->r_rax); +	case REG_TRAPNO:	return (rp->r_trapno); +	case REG_ERR:		return (rp->r_err); +	case REG_RIP:		return (rp->r_rip); +	case REG_CS:		return (rp->r_cs); +	case REG_RFL:		return (rp->r_rfl); +	case REG_RSP:		return (rp->r_rsp); +	case REG_SS:		return (rp->r_ss); +	case REG_FS:		return (rp->r_fs); +	case REG_GS:		return (rp->r_gs); +	case REG_DS:		return (rp->r_ds); +	case REG_ES:		return (rp->r_es); +	case REG_FSBASE:	return (rdmsr(MSR_AMD_FSBASE)); +	case REG_GSBASE:	return (rdmsr(MSR_AMD_GSBASE)); +	} + +	panic("dtrace: illegal register constant"); +	/*NOTREACHED*/ +#else +	if (reg >= _NGREG) +		panic("dtrace: illegal register constant"); + +	return (((greg_t *)&rp->r_gs)[reg]); +#endif +} diff --git a/uts/sparc/dtrace/fasttrap_isa.c b/uts/sparc/dtrace/fasttrap_isa.c new file mode 100644 index 000000000000..45d87478d6a2 --- /dev/null +++ b/uts/sparc/dtrace/fasttrap_isa.c @@ -0,0 +1,1597 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc.  All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident	"%Z%%M%	%I%	%E% SMI" + +#include <sys/fasttrap_isa.h> +#include <sys/fasttrap_impl.h> +#include <sys/dtrace.h> +#include <sys/dtrace_impl.h> +#include <sys/cmn_err.h> +#include <sys/frame.h> +#include <sys/stack.h> +#include <sys/sysmacros.h> +#include <sys/trap.h> + +#include <v9/sys/machpcb.h> +#include <v9/sys/privregs.h> + +/* + * Lossless User-Land Tracing on SPARC + * ----------------------------------- + * + * The Basic Idea + * + * The most important design constraint is, of course, correct execution of + * the user thread above all else. The next most important goal is rapid + * execution. We combine execution of instructions in user-land with + * emulation of certain instructions in the kernel to aim for complete + * correctness and maximal performance. + * + * We take advantage of the split PC/NPC architecture to speed up logical + * single-stepping; when we copy an instruction out to the scratch space in + * the ulwp_t structure (held in the %g7 register on SPARC), we can + * effectively single step by setting the PC to our scratch space and leaving + * the NPC alone. This executes the replaced instruction and then continues + * on without having to reenter the kernel as with single- stepping. The + * obvious caveat is for instructions whose execution is PC dependant -- + * branches, call and link instructions (call and jmpl), and the rdpc + * instruction. These instructions cannot be executed in the manner described + * so they must be emulated in the kernel. + * + * Emulation for this small set of instructions if fairly simple; the most + * difficult part being emulating branch conditions. + * + * + * A Cache Heavy Portfolio + * + * It's important to note at this time that copying an instruction out to the + * ulwp_t scratch space in user-land is rather complicated. SPARC has + * separate data and instruction caches so any writes to the D$ (using a + * store instruction for example) aren't necessarily reflected in the I$. + * The flush instruction can be used to synchronize the two and must be used + * for any self-modifying code, but the flush instruction only applies to the + * primary address space (the absence of a flusha analogue to the flush + * instruction that accepts an ASI argument is an obvious omission from SPARC + * v9 where the notion of the alternate address space was introduced on + * SPARC). To correctly copy out the instruction we must use a block store + * that doesn't allocate in the D$ and ensures synchronization with the I$; + * see dtrace_blksuword32() for the implementation  (this function uses + * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner + * described). Refer to the UltraSPARC I/II manual for details on the + * ASI_BLK_COMMIT_S ASI. + * + * + * Return Subtleties + * + * When we're firing a return probe we need to expose the value returned by + * the function being traced. Since the function can set the return value + * in its last instruction, we need to fire the return probe only _after_ + * the effects of the instruction are apparent. For instructions that we + * emulate, we can call dtrace_probe() after we've performed the emulation; + * for instructions that we execute after we return to user-land, we set + * %pc to the instruction we copied out (as described above) and set %npc + * to a trap instruction stashed in the ulwp_t structure. After the traced + * instruction is executed, the trap instruction returns control to the + * kernel where we can fire the return probe. + * + * This need for a second trap in cases where we execute the traced + * instruction makes it all the more important to emulate the most common + * instructions to avoid the second trip in and out of the kernel. + * + * + * Making it Fast + * + * Since copying out an instruction is neither simple nor inexpensive for the + * CPU, we should attempt to avoid doing it in as many cases as possible. + * Since function entry and return are usually the most interesting probe + * sites, we attempt to tune the performance of the fasttrap provider around + * instructions typically in those places. + * + * Looking at a bunch of functions in libraries and executables reveals that + * most functions begin with either a save or a sethi (to setup a larger + * argument to the save) and end with a restore or an or (in the case of leaf + * functions). To try to improve performance, we emulate all of these + * instructions in the kernel. + * + * The save and restore instructions are a little tricky since they perform + * register window maniplulation. Rather than trying to tinker with the + * register windows from the kernel, we emulate the implicit add that takes + * place as part of those instructions and set the %pc to point to a simple + * save or restore we've hidden in the ulwp_t structure. If we're in a return + * probe so want to make it seem as though the tracepoint has been completely + * executed we need to remember that we've pulled this trick with restore and + * pull registers from the previous window (the one that we'll switch to once + * the simple store instruction is executed) rather than the current one. This + * is why in the case of emulating a restore we set the DTrace CPU flag + * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes + * (see fasttrap_return_common()). + */ + +#define	OP(x)		((x) >> 30) +#define	OP2(x)		(((x) >> 22) & 0x07) +#define	OP3(x)		(((x) >> 19) & 0x3f) +#define	RCOND(x)	(((x) >> 25) & 0x07) +#define	COND(x)		(((x) >> 25) & 0x0f) +#define	A(x)		(((x) >> 29) & 0x01) +#define	I(x)		(((x) >> 13) & 0x01) +#define	RD(x)		(((x) >> 25) & 0x1f) +#define	RS1(x)		(((x) >> 14) & 0x1f) +#define	RS2(x)		(((x) >> 0) & 0x1f) +#define	CC(x)		(((x) >> 20) & 0x03) +#define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) +#define	DISP22(x)	((x) & 0x3fffff) +#define	DISP19(x)	((x) & 0x7ffff) +#define	DISP30(x)	((x) & 0x3fffffff) +#define	SW_TRAP(x)	((x) & 0x7f) + +#define	OP3_OR		0x02 +#define	OP3_RD		0x28 +#define	OP3_JMPL	0x38 +#define	OP3_RETURN	0x39 +#define	OP3_TCC		0x3a +#define	OP3_SAVE	0x3c +#define	OP3_RESTORE	0x3d + +#define	OP3_PREFETCH	0x2d +#define	OP3_CASA	0x3c +#define	OP3_PREFETCHA	0x3d +#define	OP3_CASXA	0x3e + +#define	OP2_ILLTRAP	0x0 +#define	OP2_BPcc	0x1 +#define	OP2_Bicc	0x2 +#define	OP2_BPr		0x3 +#define	OP2_SETHI	0x4 +#define	OP2_FBPfcc	0x5 +#define	OP2_FBfcc	0x6 + +#define	R_G0		0 +#define	R_O0		8 +#define	R_SP		14 +#define	R_I0		24 +#define	R_I1		25 +#define	R_I2		26 +#define	R_I3		27 +#define	R_I4		28 + +/* + * Check the comment in fasttrap.h when changing these offsets or adding + * new instructions. + */ +#define	FASTTRAP_OFF_SAVE	64 +#define	FASTTRAP_OFF_RESTORE	68 +#define	FASTTRAP_OFF_FTRET	72 +#define	FASTTRAP_OFF_RETURN	76 + +#define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */ + +/* + * Tunable to let users turn off the fancy save instruction optimization. + * If a program is non-ABI compliant, there's a possibility that the save + * instruction optimization could cause an error. + */ +int fasttrap_optimize_save = 1; + +static uint64_t +fasttrap_anarg(struct regs *rp, int argno) +{ +	uint64_t value; + +	if (argno < 6) +		return ((&rp->r_o0)[argno]); + +	if (curproc->p_model == DATAMODEL_NATIVE) { +		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + +		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); +		value = dtrace_fulword(&fr->fr_argd[argno]); +		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | +		    CPU_DTRACE_BADALIGN); +	} else { +		struct frame32 *fr = (struct frame32 *)rp->r_sp; + +		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); +		value = dtrace_fuword32(&fr->fr_argd[argno]); +		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | +		    CPU_DTRACE_BADALIGN); +	} + +	return (value); +} + +static ulong_t fasttrap_getreg(struct regs *, uint_t); +static void fasttrap_putreg(struct regs *, uint_t, ulong_t); + +static void +fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, +    uint_t fake_restore, int argc, uintptr_t *argv) +{ +	int i, x, cap = MIN(argc, probe->ftp_nargs); +	int inc = (fake_restore ? 16 : 0); + +	/* +	 * The only way we'll hit the fake_restore case is if a USDT probe is +	 * invoked as a tail-call. While it wouldn't be incorrect, we can +	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp +	 * directly since a tail-call can't be made if the invoked function +	 * would use the argument dump space (i.e. if there were more than +	 * 6 arguments). We take this shortcut because unconditionally rooting +	 * around for R_FP (R_SP + 16) would be unnecessarily painful. +	 */ + +	if (curproc->p_model == DATAMODEL_NATIVE) { +		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); +		uintptr_t v; + +		for (i = 0; i < cap; i++) { +			x = probe->ftp_argmap[i]; + +			if (x < 6) +				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); +			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) +				argv[i] = 0; +		} + +	} else { +		struct frame32 *fr = (struct frame32 *)rp->r_sp; +		uint32_t v; + +		for (i = 0; i < cap; i++) { +			x = probe->ftp_argmap[i]; + +			if (x < 6) +				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); +			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) +				argv[i] = 0; +		} +	} + +	for (; i < argc; i++) { +		argv[i] = 0; +	} +} + +static void +fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, +    uint_t fake_restore) +{ +	fasttrap_tracepoint_t *tp; +	fasttrap_bucket_t *bucket; +	fasttrap_id_t *id; +	kmutex_t *pid_mtx; +	dtrace_icookie_t cookie; + +	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; +	mutex_enter(pid_mtx); +	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + +	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { +		if (pid == tp->ftt_pid && pc == tp->ftt_pc && +		    tp->ftt_proc->ftpc_acount != 0) +			break; +	} + +	/* +	 * Don't sweat it if we can't find the tracepoint again; unlike +	 * when we're in fasttrap_pid_probe(), finding the tracepoint here +	 * is not essential to the correct execution of the process. +	 */ +	if (tp == NULL || tp->ftt_retids == NULL) { +		mutex_exit(pid_mtx); +		return; +	} + +	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { +		fasttrap_probe_t *probe = id->fti_probe; + +		if (id->fti_ptype == DTFTP_POST_OFFSETS) { +			if (probe->ftp_argmap != NULL && fake_restore) { +				uintptr_t t[5]; + +				fasttrap_usdt_args(probe, rp, fake_restore, +				    sizeof (t) / sizeof (t[0]), t); + +				cookie = dtrace_interrupt_disable(); +				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); +				dtrace_probe(probe->ftp_id, t[0], t[1], +				    t[2], t[3], t[4]); +				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); +				dtrace_interrupt_enable(cookie); + +			} else if (probe->ftp_argmap != NULL) { +				uintptr_t t[5]; + +				fasttrap_usdt_args(probe, rp, fake_restore, +				    sizeof (t) / sizeof (t[0]), t); + +				dtrace_probe(probe->ftp_id, t[0], t[1], +				    t[2], t[3], t[4]); + +			} else if (fake_restore) { +				uintptr_t arg0 = fasttrap_getreg(rp, R_I0); +				uintptr_t arg1 = fasttrap_getreg(rp, R_I1); +				uintptr_t arg2 = fasttrap_getreg(rp, R_I2); +				uintptr_t arg3 = fasttrap_getreg(rp, R_I3); +				uintptr_t arg4 = fasttrap_getreg(rp, R_I4); + +				cookie = dtrace_interrupt_disable(); +				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); +				dtrace_probe(probe->ftp_id, arg0, arg1, +				    arg2, arg3, arg4); +				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); +				dtrace_interrupt_enable(cookie); + +			} else { +				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, +				    rp->r_o2, rp->r_o3, rp->r_o4); +			} + +			continue; +		} + +		/* +		 * If this is only a possible return point, we must +		 * be looking at a potential tail call in leaf context. +		 * If the %npc is still within this function, then we +		 * must have misidentified a jmpl as a tail-call when it +		 * is, in fact, part of a jump table. It would be nice to +		 * remove this tracepoint, but this is neither the time +		 * nor the place. +		 */ +		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && +		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) +			continue; + +		/* +		 * It's possible for a function to branch to the delay slot +		 * of an instruction that we've identified as a return site. +		 * We can dectect this spurious return probe activation by +		 * observing that in this case %npc will be %pc + 4 and %npc +		 * will be inside the current function (unless the user is +		 * doing _crazy_ instruction picking in which case there's +		 * very little we can do). The second check is important +		 * in case the last instructions of a function make a tail- +		 * call to the function located immediately subsequent. +		 */ +		if (rp->r_npc == rp->r_pc + 4 && +		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) +			continue; + +		/* +		 * The first argument is the offset of return tracepoint +		 * in the function; the remaining arguments are the return +		 * values. +		 * +		 * If fake_restore is set, we need to pull the return values +		 * out of the %i's rather than the %o's -- a little trickier. +		 */ +		if (!fake_restore) { +			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, +			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); +		} else { +			uintptr_t arg0 = fasttrap_getreg(rp, R_I0); +			uintptr_t arg1 = fasttrap_getreg(rp, R_I1); +			uintptr_t arg2 = fasttrap_getreg(rp, R_I2); +			uintptr_t arg3 = fasttrap_getreg(rp, R_I3); + +			cookie = dtrace_interrupt_disable(); +			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); +			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, +			    arg0, arg1, arg2, arg3); +			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); +			dtrace_interrupt_enable(cookie); +		} +	} + +	mutex_exit(pid_mtx); +} + +int +fasttrap_pid_probe(struct regs *rp) +{ +	proc_t *p = curproc; +	fasttrap_tracepoint_t *tp, tp_local; +	fasttrap_id_t *id; +	pid_t pid; +	uintptr_t pc = rp->r_pc; +	uintptr_t npc = rp->r_npc; +	uintptr_t orig_pc = pc; +	fasttrap_bucket_t *bucket; +	kmutex_t *pid_mtx; +	uint_t fake_restore = 0, is_enabled = 0; +	dtrace_icookie_t cookie; + +	/* +	 * It's possible that a user (in a veritable orgy of bad planning) +	 * could redirect this thread's flow of control before it reached the +	 * return probe fasttrap. In this case we need to kill the process +	 * since it's in a unrecoverable state. +	 */ +	if (curthread->t_dtrace_step) { +		ASSERT(curthread->t_dtrace_on); +		fasttrap_sigtrap(p, curthread, pc); +		return (0); +	} + +	/* +	 * Clear all user tracing flags. +	 */ +	curthread->t_dtrace_ft = 0; +	curthread->t_dtrace_pc = 0; +	curthread->t_dtrace_npc = 0; +	curthread->t_dtrace_scrpc = 0; +	curthread->t_dtrace_astpc = 0; + +	/* +	 * Treat a child created by a call to vfork(2) as if it were its +	 * parent. We know that there's only one thread of control in such a +	 * process: this one. +	 */ +	while (p->p_flag & SVFORK) { +		p = p->p_parent; +	} + +	pid = p->p_pid; +	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; +	mutex_enter(pid_mtx); +	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + +	/* +	 * Lookup the tracepoint that the process just hit. +	 */ +	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { +		if (pid == tp->ftt_pid && pc == tp->ftt_pc && +		    tp->ftt_proc->ftpc_acount != 0) +			break; +	} + +	/* +	 * If we couldn't find a matching tracepoint, either a tracepoint has +	 * been inserted without using the pid<pid> ioctl interface (see +	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. +	 */ +	if (tp == NULL) { +		mutex_exit(pid_mtx); +		return (-1); +	} + +	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { +		fasttrap_probe_t *probe = id->fti_probe; +		int isentry = (id->fti_ptype == DTFTP_ENTRY); + +		if (id->fti_ptype == DTFTP_IS_ENABLED) { +			is_enabled = 1; +			continue; +		} + +		/* +		 * We note that this was an entry probe to help ustack() find +		 * the first caller. +		 */ +		if (isentry) { +			cookie = dtrace_interrupt_disable(); +			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); +		} +		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, +		    rp->r_o3, rp->r_o4); +		if (isentry) { +			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); +			dtrace_interrupt_enable(cookie); +		} +	} + +	/* +	 * We're about to do a bunch of work so we cache a local copy of +	 * the tracepoint to emulate the instruction, and then find the +	 * tracepoint again later if we need to light up any return probes. +	 */ +	tp_local = *tp; +	mutex_exit(pid_mtx); +	tp = &tp_local; + +	/* +	 * If there's an is-enabled probe conntected to this tracepoint it +	 * means that there was a 'mov %g0, %o0' instruction that was placed +	 * there by DTrace when the binary was linked. As this probe is, in +	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can +	 * bypass all the instruction emulation logic since we know the +	 * inevitable result. It's possible that a user could construct a +	 * scenario where the 'is-enabled' probe was on some other +	 * instruction, but that would be a rather exotic way to shoot oneself +	 * in the foot. +	 */ +	if (is_enabled) { +		rp->r_o0 = 1; +		pc = rp->r_npc; +		npc = pc + 4; +		goto done; +	} + +	/* +	 * We emulate certain types of instructions to ensure correctness +	 * (in the case of position dependent instructions) or optimize +	 * common cases. The rest we have the thread execute back in user- +	 * land. +	 */ +	switch (tp->ftt_type) { +	case FASTTRAP_T_SAVE: +	{ +		int32_t imm; + +		/* +		 * This an optimization to let us handle function entry +		 * probes more efficiently. Many functions begin with a save +		 * instruction that follows the pattern: +		 *	save	%sp, <imm>, %sp +		 * +		 * Meanwhile, we've stashed the instruction: +		 *	save	%g1, %g0, %sp +		 * +		 * off of %g7, so all we have to do is stick the right value +		 * into %g1 and reset %pc to point to the instruction we've +		 * cleverly hidden (%npc should not be touched). +		 */ + +		imm = tp->ftt_instr << 19; +		imm >>= 19; +		rp->r_g1 = rp->r_sp + imm; +		pc = rp->r_g7 + FASTTRAP_OFF_SAVE; +		break; +	} + +	case FASTTRAP_T_RESTORE: +	{ +		ulong_t value; +		uint_t rd; + +		/* +		 * This is an optimization to let us handle function +		 * return probes more efficiently. Most non-leaf functions +		 * end with the sequence: +		 *	ret +		 *	restore	<reg>, <reg_or_imm>, %oX +		 * +		 * We've stashed the instruction: +		 *	restore	%g0, %g0, %g0 +		 * +		 * off of %g7 so we just need to place the correct value +		 * in the right %i register (since after our fake-o +		 * restore, the %i's will become the %o's) and set the %pc +		 * to point to our hidden restore. We also set fake_restore to +		 * let fasttrap_return_common() know that it will find the +		 * return values in the %i's rather than the %o's. +		 */ + +		if (I(tp->ftt_instr)) { +			int32_t imm; + +			imm = tp->ftt_instr << 19; +			imm >>= 19; +			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; +		} else { +			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + +			    fasttrap_getreg(rp, RS2(tp->ftt_instr)); +		} + +		/* +		 * Convert %o's to %i's; leave %g's as they are. +		 */ +		rd = RD(tp->ftt_instr); +		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); + +		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; +		fake_restore = 1; +		break; +	} + +	case FASTTRAP_T_RETURN: +	{ +		uintptr_t target; + +		/* +		 * A return instruction is like a jmpl (without the link +		 * part) that executes an implicit restore. We've stashed +		 * the instruction: +		 *	return %o0 +		 * +		 * off of %g7 so we just need to place the target in %o0 +		 * and set the %pc to point to the stashed return instruction. +		 * We use %o0 since that register disappears after the return +		 * executes, erasing any evidence of this tampering. +		 */ +		if (I(tp->ftt_instr)) { +			int32_t imm; + +			imm = tp->ftt_instr << 19; +			imm >>= 19; +			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; +		} else { +			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + +			    fasttrap_getreg(rp, RS2(tp->ftt_instr)); +		} + +		fasttrap_putreg(rp, R_O0, target); + +		pc = rp->r_g7 + FASTTRAP_OFF_RETURN; +		fake_restore = 1; +		break; +	} + +	case FASTTRAP_T_OR: +	{ +		ulong_t value; + +		if (I(tp->ftt_instr)) { +			int32_t imm; + +			imm = tp->ftt_instr << 19; +			imm >>= 19; +			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; +		} else { +			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | +			    fasttrap_getreg(rp, RS2(tp->ftt_instr)); +		} + +		fasttrap_putreg(rp, RD(tp->ftt_instr), value); +		pc = rp->r_npc; +		npc = pc + 4; +		break; +	} + +	case FASTTRAP_T_SETHI: +		if (RD(tp->ftt_instr) != R_G0) { +			uint32_t imm32 = tp->ftt_instr << 10; +			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); +		} +		pc = rp->r_npc; +		npc = pc + 4; +		break; + +	case FASTTRAP_T_CCR: +	{ +		uint_t c, v, z, n, taken; +		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; + +		if (tp->ftt_cc != 0) +			ccr >>= 4; + +		c = (ccr >> 0) & 1; +		v = (ccr >> 1) & 1; +		z = (ccr >> 2) & 1; +		n = (ccr >> 3) & 1; + +		switch (tp->ftt_code) { +		case 0x0:	/* BN */ +			taken = 0;		break; +		case 0x1:	/* BE */ +			taken = z;		break; +		case 0x2:	/* BLE */ +			taken = z | (n ^ v);	break; +		case 0x3:	/* BL */ +			taken = n ^ v;		break; +		case 0x4:	/* BLEU */ +			taken = c | z;		break; +		case 0x5:	/* BCS (BLU) */ +			taken = c;		break; +		case 0x6:	/* BNEG */ +			taken = n;		break; +		case 0x7:	/* BVS */ +			taken = v;		break; +		case 0x8:	/* BA */ +			/* +			 * We handle the BA case differently since the annul +			 * bit means something slightly different. +			 */ +			panic("fasttrap: mishandled a branch"); +			taken = 1;		break; +		case 0x9:	/* BNE */ +			taken = ~z;		break; +		case 0xa:	/* BG */ +			taken = ~(z | (n ^ v));	break; +		case 0xb:	/* BGE */ +			taken = ~(n ^ v);	break; +		case 0xc:	/* BGU */ +			taken = ~(c | z);	break; +		case 0xd:	/* BCC (BGEU) */ +			taken = ~c;		break; +		case 0xe:	/* BPOS */ +			taken = ~n;		break; +		case 0xf:	/* BVC */ +			taken = ~v;		break; +		} + +		if (taken & 1) { +			pc = rp->r_npc; +			npc = tp->ftt_dest; +		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { +			/* +			 * Untaken annulled branches don't execute the +			 * instruction in the delay slot. +			 */ +			pc = rp->r_npc + 4; +			npc = pc + 4; +		} else { +			pc = rp->r_npc; +			npc = pc + 4; +		} +		break; +	} + +	case FASTTRAP_T_FCC: +	{ +		uint_t fcc; +		uint_t taken; +		uint64_t fsr; + +		dtrace_getfsr(&fsr); + +		if (tp->ftt_cc == 0) { +			fcc = (fsr >> 10) & 0x3; +		} else { +			uint_t shift; +			ASSERT(tp->ftt_cc <= 3); +			shift = 30 + tp->ftt_cc * 2; +			fcc = (fsr >> shift) & 0x3; +		} + +		switch (tp->ftt_code) { +		case 0x0:	/* FBN */ +			taken = (1 << fcc) & (0|0|0|0);	break; +		case 0x1:	/* FBNE */ +			taken = (1 << fcc) & (8|4|2|0);	break; +		case 0x2:	/* FBLG */ +			taken = (1 << fcc) & (0|4|2|0);	break; +		case 0x3:	/* FBUL */ +			taken = (1 << fcc) & (8|0|2|0);	break; +		case 0x4:	/* FBL */ +			taken = (1 << fcc) & (0|0|2|0);	break; +		case 0x5:	/* FBUG */ +			taken = (1 << fcc) & (8|4|0|0);	break; +		case 0x6:	/* FBG */ +			taken = (1 << fcc) & (0|4|0|0);	break; +		case 0x7:	/* FBU */ +			taken = (1 << fcc) & (8|0|0|0);	break; +		case 0x8:	/* FBA */ +			/* +			 * We handle the FBA case differently since the annul +			 * bit means something slightly different. +			 */ +			panic("fasttrap: mishandled a branch"); +			taken = (1 << fcc) & (8|4|2|1);	break; +		case 0x9:	/* FBE */ +			taken = (1 << fcc) & (0|0|0|1);	break; +		case 0xa:	/* FBUE */ +			taken = (1 << fcc) & (8|0|0|1);	break; +		case 0xb:	/* FBGE */ +			taken = (1 << fcc) & (0|4|0|1);	break; +		case 0xc:	/* FBUGE */ +			taken = (1 << fcc) & (8|4|0|1);	break; +		case 0xd:	/* FBLE */ +			taken = (1 << fcc) & (0|0|2|1);	break; +		case 0xe:	/* FBULE */ +			taken = (1 << fcc) & (8|0|2|1);	break; +		case 0xf:	/* FBO */ +			taken = (1 << fcc) & (0|4|2|1);	break; +		} + +		if (taken) { +			pc = rp->r_npc; +			npc = tp->ftt_dest; +		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { +			/* +			 * Untaken annulled branches don't execute the +			 * instruction in the delay slot. +			 */ +			pc = rp->r_npc + 4; +			npc = pc + 4; +		} else { +			pc = rp->r_npc; +			npc = pc + 4; +		} +		break; +	} + +	case FASTTRAP_T_REG: +	{ +		int64_t value; +		uint_t taken; +		uint_t reg = RS1(tp->ftt_instr); + +		/* +		 * An ILP32 process shouldn't be using a branch predicated on +		 * an %i or an %l since it would violate the ABI. It's a +		 * violation of the ABI because we can't ensure deterministic +		 * behavior. We should have identified this case when we +		 * enabled the probe. +		 */ +		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); + +		value = (int64_t)fasttrap_getreg(rp, reg); + +		switch (tp->ftt_code) { +		case 0x1:	/* BRZ */ +			taken = (value == 0);	break; +		case 0x2:	/* BRLEZ */ +			taken = (value <= 0);	break; +		case 0x3:	/* BRLZ */ +			taken = (value < 0);	break; +		case 0x5:	/* BRNZ */ +			taken = (value != 0);	break; +		case 0x6:	/* BRGZ */ +			taken = (value > 0);	break; +		case 0x7:	/* BRGEZ */ +			taken = (value >= 0);	break; +		default: +		case 0x0: +		case 0x4: +			panic("fasttrap: mishandled a branch"); +		} + +		if (taken) { +			pc = rp->r_npc; +			npc = tp->ftt_dest; +		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { +			/* +			 * Untaken annulled branches don't execute the +			 * instruction in the delay slot. +			 */ +			pc = rp->r_npc + 4; +			npc = pc + 4; +		} else { +			pc = rp->r_npc; +			npc = pc + 4; +		} +		break; +	} + +	case FASTTRAP_T_ALWAYS: +		/* +		 * BAs, BA,As... +		 */ + +		if (tp->ftt_flags & FASTTRAP_F_ANNUL) { +			/* +			 * Annulled branch always instructions never execute +			 * the instruction in the delay slot. +			 */ +			pc = tp->ftt_dest; +			npc = tp->ftt_dest + 4; +		} else { +			pc = rp->r_npc; +			npc = tp->ftt_dest; +		} +		break; + +	case FASTTRAP_T_RDPC: +		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); +		pc = rp->r_npc; +		npc = pc + 4; +		break; + +	case FASTTRAP_T_CALL: +		/* +		 * It's a call _and_ link remember... +		 */ +		rp->r_o7 = rp->r_pc; +		pc = rp->r_npc; +		npc = tp->ftt_dest; +		break; + +	case FASTTRAP_T_JMPL: +		pc = rp->r_npc; + +		if (I(tp->ftt_instr)) { +			uint_t rs1 = RS1(tp->ftt_instr); +			int32_t imm; + +			imm = tp->ftt_instr << 19; +			imm >>= 19; +			npc = fasttrap_getreg(rp, rs1) + imm; +		} else { +			uint_t rs1 = RS1(tp->ftt_instr); +			uint_t rs2 = RS2(tp->ftt_instr); + +			npc = fasttrap_getreg(rp, rs1) + +			    fasttrap_getreg(rp, rs2); +		} + +		/* +		 * Do the link part of the jump-and-link instruction. +		 */ +		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); + +		break; + +	case FASTTRAP_T_COMMON: +	{ +		curthread->t_dtrace_scrpc = rp->r_g7; +		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; + +		/* +		 * Copy the instruction to a reserved location in the +		 * user-land thread structure, then set the PC to that +		 * location and leave the NPC alone. We take pains to ensure +		 * consistency in the instruction stream (See SPARC +		 * Architecture Manual Version 9, sections 8.4.7, A.20, and +		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, +		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the +		 * instruction into the user's address space without +		 * bypassing the I$. There's no AS_USER version of this ASI +		 * (as exist for other ASIs) so we use the lofault +		 * mechanism to catch faults. +		 */ +		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { +			/* +			 * If the copyout fails, then the process's state +			 * is not consistent (the effects of the traced +			 * instruction will never be seen). This process +			 * cannot be allowed to continue execution. +			 */ +			fasttrap_sigtrap(curproc, curthread, pc); +			return (0); +		} + +		curthread->t_dtrace_pc = pc; +		curthread->t_dtrace_npc = npc; +		curthread->t_dtrace_on = 1; + +		pc = curthread->t_dtrace_scrpc; + +		if (tp->ftt_retids != NULL) { +			curthread->t_dtrace_step = 1; +			curthread->t_dtrace_ret = 1; +			npc = curthread->t_dtrace_astpc; +		} +		break; +	} + +	default: +		panic("fasttrap: mishandled an instruction"); +	} + +	/* +	 * This bit me in the ass a couple of times, so lets toss this +	 * in as a cursory sanity check. +	 */ +	ASSERT(pc != rp->r_g7 + 4); +	ASSERT(pc != rp->r_g7 + 8); + +done: +	/* +	 * If there were no return probes when we first found the tracepoint, +	 * we should feel no obligation to honor any return probes that were +	 * subsequently enabled -- they'll just have to wait until the next +	 * time around. +	 */ +	if (tp->ftt_retids != NULL) { +		/* +		 * We need to wait until the results of the instruction are +		 * apparent before invoking any return probes. If this +		 * instruction was emulated we can just call +		 * fasttrap_return_common(); if it needs to be executed, we +		 * need to wait until we return to the kernel. +		 */ +		if (tp->ftt_type != FASTTRAP_T_COMMON) { +			fasttrap_return_common(rp, orig_pc, pid, fake_restore); +		} else { +			ASSERT(curthread->t_dtrace_ret != 0); +			ASSERT(curthread->t_dtrace_pc == orig_pc); +			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); +			ASSERT(npc == curthread->t_dtrace_astpc); +		} +	} + +	ASSERT(pc != 0); +	rp->r_pc = pc; +	rp->r_npc = npc; + +	return (0); +} + +int +fasttrap_return_probe(struct regs *rp) +{ +	proc_t *p = ttoproc(curthread); +	pid_t pid; +	uintptr_t pc = curthread->t_dtrace_pc; +	uintptr_t npc = curthread->t_dtrace_npc; + +	curthread->t_dtrace_pc = 0; +	curthread->t_dtrace_npc = 0; +	curthread->t_dtrace_scrpc = 0; +	curthread->t_dtrace_astpc = 0; + +	/* +	 * Treat a child created by a call to vfork(2) as if it were its +	 * parent. We know there's only one thread of control in such a +	 * process: this one. +	 */ +	while (p->p_flag & SVFORK) { +		p = p->p_parent; +	} + +	/* +	 * We set the %pc and %npc to their values when the traced +	 * instruction was initially executed so that it appears to +	 * dtrace_probe() that we're on the original instruction, and so that +	 * the user can't easily detect our complex web of lies. +	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc +	 * after we return. +	 */ +	rp->r_pc = pc; +	rp->r_npc = npc; + +	pid = p->p_pid; +	fasttrap_return_common(rp, pc, pid, 0); + +	return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ +	fasttrap_instr_t instr = FASTTRAP_INSTR; + +	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) +		return (-1); + +	return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ +	fasttrap_instr_t instr; + +	/* +	 * Distinguish between read or write failures and a changed +	 * instruction. +	 */ +	if (uread(p, &instr, 4, tp->ftt_pc) != 0) +		return (0); +	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) +		return (0); +	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) +		return (-1); + +	return (0); +} + +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, +    fasttrap_probe_type_t type) +{ +	uint32_t instr; +	int32_t disp; + +	/* +	 * Read the instruction at the given address out of the process's +	 * address space. We don't have to worry about a debugger +	 * changing this instruction before we overwrite it with our trap +	 * instruction since P_PR_LOCK is set. +	 */ +	if (uread(p, &instr, 4, pc) != 0) +		return (-1); + +	/* +	 * Decode the instruction to fill in the probe flags. We can have +	 * the process execute most instructions on its own using a pc/npc +	 * trick, but pc-relative control transfer present a problem since +	 * we're relocating the instruction. We emulate these instructions +	 * in the kernel. We assume a default type and over-write that as +	 * needed. +	 * +	 * pc-relative instructions must be emulated for correctness; +	 * other instructions (which represent a large set of commonly traced +	 * instructions) are emulated or otherwise optimized for performance. +	 */ +	tp->ftt_type = FASTTRAP_T_COMMON; +	if (OP(instr) == 1) { +		/* +		 * Call instructions. +		 */ +		tp->ftt_type = FASTTRAP_T_CALL; +		disp = DISP30(instr) << 2; +		tp->ftt_dest = pc + (intptr_t)disp; + +	} else if (OP(instr) == 0) { +		/* +		 * Branch instructions. +		 * +		 * Unconditional branches need careful attention when they're +		 * annulled: annulled unconditional branches never execute +		 * the instruction in the delay slot. +		 */ +		switch (OP2(instr)) { +		case OP2_ILLTRAP: +		case 0x7: +			/* +			 * The compiler may place an illtrap after a call to +			 * a function that returns a structure. In the case of +			 * a returned structure, the compiler places an illtrap +			 * whose const22 field is the size of the returned +			 * structure immediately following the delay slot of +			 * the call. To stay out of the way, we refuse to +			 * place tracepoints on top of illtrap instructions. +			 * +			 * This is one of the dumbest architectural decisions +			 * I've ever had to work around. +			 * +			 * We also identify the only illegal op2 value (See +			 * SPARC Architecture Manual Version 9, E.2 table 31). +			 */ +			return (-1); + +		case OP2_BPcc: +			if (COND(instr) == 8) { +				tp->ftt_type = FASTTRAP_T_ALWAYS; +			} else { +				/* +				 * Check for an illegal instruction. +				 */ +				if (CC(instr) & 1) +					return (-1); +				tp->ftt_type = FASTTRAP_T_CCR; +				tp->ftt_cc = CC(instr); +				tp->ftt_code = COND(instr); +			} + +			if (A(instr) != 0) +				tp->ftt_flags |= FASTTRAP_F_ANNUL; + +			disp = DISP19(instr); +			disp <<= 13; +			disp >>= 11; +			tp->ftt_dest = pc + (intptr_t)disp; +			break; + +		case OP2_Bicc: +			if (COND(instr) == 8) { +				tp->ftt_type = FASTTRAP_T_ALWAYS; +			} else { +				tp->ftt_type = FASTTRAP_T_CCR; +				tp->ftt_cc = 0; +				tp->ftt_code = COND(instr); +			} + +			if (A(instr) != 0) +				tp->ftt_flags |= FASTTRAP_F_ANNUL; + +			disp = DISP22(instr); +			disp <<= 10; +			disp >>= 8; +			tp->ftt_dest = pc + (intptr_t)disp; +			break; + +		case OP2_BPr: +			/* +			 * Check for an illegal instruction. +			 */ +			if ((RCOND(instr) & 3) == 0) +				return (-1); + +			/* +			 * It's a violation of the v8plus ABI to use a +			 * register-predicated branch in a 32-bit app if +			 * the register used is an %l or an %i (%gs and %os +			 * are legit because they're not saved to the stack +			 * in 32-bit words when we take a trap). +			 */ +			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) +				return (-1); + +			tp->ftt_type = FASTTRAP_T_REG; +			if (A(instr) != 0) +				tp->ftt_flags |= FASTTRAP_F_ANNUL; +			disp = DISP16(instr); +			disp <<= 16; +			disp >>= 14; +			tp->ftt_dest = pc + (intptr_t)disp; +			tp->ftt_code = RCOND(instr); +			break; + +		case OP2_SETHI: +			tp->ftt_type = FASTTRAP_T_SETHI; +			break; + +		case OP2_FBPfcc: +			if (COND(instr) == 8) { +				tp->ftt_type = FASTTRAP_T_ALWAYS; +			} else { +				tp->ftt_type = FASTTRAP_T_FCC; +				tp->ftt_cc = CC(instr); +				tp->ftt_code = COND(instr); +			} + +			if (A(instr) != 0) +				tp->ftt_flags |= FASTTRAP_F_ANNUL; + +			disp = DISP19(instr); +			disp <<= 13; +			disp >>= 11; +			tp->ftt_dest = pc + (intptr_t)disp; +			break; + +		case OP2_FBfcc: +			if (COND(instr) == 8) { +				tp->ftt_type = FASTTRAP_T_ALWAYS; +			} else { +				tp->ftt_type = FASTTRAP_T_FCC; +				tp->ftt_cc = 0; +				tp->ftt_code = COND(instr); +			} + +			if (A(instr) != 0) +				tp->ftt_flags |= FASTTRAP_F_ANNUL; + +			disp = DISP22(instr); +			disp <<= 10; +			disp >>= 8; +			tp->ftt_dest = pc + (intptr_t)disp; +			break; +		} + +	} else if (OP(instr) == 2) { +		switch (OP3(instr)) { +		case OP3_RETURN: +			tp->ftt_type = FASTTRAP_T_RETURN; +			break; + +		case OP3_JMPL: +			tp->ftt_type = FASTTRAP_T_JMPL; +			break; + +		case OP3_RD: +			if (RS1(instr) == 5) +				tp->ftt_type = FASTTRAP_T_RDPC; +			break; + +		case OP3_SAVE: +			/* +			 * We optimize for save instructions at function +			 * entry; see the comment in fasttrap_pid_probe() +			 * (near FASTTRAP_T_SAVE) for details. +			 */ +			if (fasttrap_optimize_save != 0 && +			    type == DTFTP_ENTRY && +			    I(instr) == 1 && RD(instr) == R_SP) +				tp->ftt_type = FASTTRAP_T_SAVE; +			break; + +		case OP3_RESTORE: +			/* +			 * We optimize restore instructions at function +			 * return; see the comment in fasttrap_pid_probe() +			 * (near FASTTRAP_T_RESTORE) for details. +			 * +			 * rd must be an %o or %g register. +			 */ +			if ((RD(instr) & 0x10) == 0) +				tp->ftt_type = FASTTRAP_T_RESTORE; +			break; + +		case OP3_OR: +			/* +			 * A large proportion of instructions in the delay +			 * slot of retl instructions are or's so we emulate +			 * these downstairs as an optimization. +			 */ +			tp->ftt_type = FASTTRAP_T_OR; +			break; + +		case OP3_TCC: +			/* +			 * Breakpoint instructions are effectively position- +			 * dependent since the debugger uses the %pc value +			 * to lookup which breakpoint was executed. As a +			 * result, we can't actually instrument breakpoints. +			 */ +			if (SW_TRAP(instr) == ST_BREAKPOINT) +				return (-1); +			break; + +		case 0x19: +		case 0x1d: +		case 0x29: +		case 0x33: +		case 0x3f: +			/* +			 * Identify illegal instructions (See SPARC +			 * Architecture Manual Version 9, E.2 table 32). +			 */ +			return (-1); +		} +	} else if (OP(instr) == 3) { +		uint32_t op3 = OP3(instr); + +		/* +		 * Identify illegal instructions (See SPARC Architecture +		 * Manual Version 9, E.2 table 33). +		 */ +		if ((op3 & 0x28) == 0x28) { +			if (op3 != OP3_PREFETCH && op3 != OP3_CASA && +			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA) +				return (-1); +		} else { +			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) +				return (-1); +		} +	} + +	tp->ftt_instr = instr; + +	/* +	 * We don't know how this tracepoint is going to be used, but in case +	 * it's used as part of a function return probe, we need to indicate +	 * whether it's always a return site or only potentially a return +	 * site. If it's part of a return probe, it's always going to be a +	 * return from that function if it's a restore instruction or if +	 * the previous instruction was a return. If we could reliably +	 * distinguish jump tables from return sites, this wouldn't be +	 * necessary. +	 */ +	if (tp->ftt_type != FASTTRAP_T_RESTORE && +	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || +	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) +		tp->ftt_flags |= FASTTRAP_F_RETMAYBE; + +	return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, +    int aframes) +{ +	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, +    int aframes) +{ +	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); +} + +static uint64_t fasttrap_getreg_fast_cnt; +static uint64_t fasttrap_getreg_mpcb_cnt; +static uint64_t fasttrap_getreg_slow_cnt; + +static ulong_t +fasttrap_getreg(struct regs *rp, uint_t reg) +{ +	ulong_t value; +	dtrace_icookie_t cookie; +	struct machpcb *mpcb; +	extern ulong_t dtrace_getreg_win(uint_t, uint_t); + +	/* +	 * We have the %os and %gs in our struct regs, but if we need to +	 * snag a %l or %i we need to go scrounging around in the process's +	 * address space. +	 */ +	if (reg == 0) +		return (0); + +	if (reg < 16) +		return ((&rp->r_g1)[reg - 1]); + +	/* +	 * Before we look at the user's stack, we'll check the register +	 * windows to see if the information we want is in there. +	 */ +	cookie = dtrace_interrupt_disable(); +	if (dtrace_getotherwin() > 0) { +		value = dtrace_getreg_win(reg, 1); +		dtrace_interrupt_enable(cookie); + +		atomic_add_64(&fasttrap_getreg_fast_cnt, 1); + +		return (value); +	} +	dtrace_interrupt_enable(cookie); + +	/* +	 * First check the machpcb structure to see if we've already read +	 * in the register window we're looking for; if we haven't, (and +	 * we probably haven't) try to copy in the value of the register. +	 */ +	/* LINTED - alignment */ +	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); + +	if (get_udatamodel() == DATAMODEL_NATIVE) { +		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + +		if (mpcb->mpcb_wbcnt > 0) { +			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; +			int i = mpcb->mpcb_wbcnt; +			do { +				i--; +				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) +					continue; + +				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); +				return (rwin[i].rw_local[reg - 16]); +			} while (i > 0); +		} + +		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) +			goto err; +	} else { +		struct frame32 *fr = +		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; +		uint32_t *v32 = (uint32_t *)&value; + +		if (mpcb->mpcb_wbcnt > 0) { +			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; +			int i = mpcb->mpcb_wbcnt; +			do { +				i--; +				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) +					continue; + +				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); +				return (rwin[i].rw_local[reg - 16]); +			} while (i > 0); +		} + +		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) +			goto err; + +		v32[0] = 0; +	} + +	atomic_add_64(&fasttrap_getreg_slow_cnt, 1); +	return (value); + +err: +	/* +	 * If the copy in failed, the process will be in a irrecoverable +	 * state, and we have no choice but to kill it. +	 */ +	psignal(ttoproc(curthread), SIGILL); +	return (0); +} + +static uint64_t fasttrap_putreg_fast_cnt; +static uint64_t fasttrap_putreg_mpcb_cnt; +static uint64_t fasttrap_putreg_slow_cnt; + +static void +fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) +{ +	dtrace_icookie_t cookie; +	struct machpcb *mpcb; +	extern void dtrace_putreg_win(uint_t, ulong_t); + +	if (reg == 0) +		return; + +	if (reg < 16) { +		(&rp->r_g1)[reg - 1] = value; +		return; +	} + +	/* +	 * If the user process is still using some register windows, we +	 * can just place the value in the correct window. +	 */ +	cookie = dtrace_interrupt_disable(); +	if (dtrace_getotherwin() > 0) { +		dtrace_putreg_win(reg, value); +		dtrace_interrupt_enable(cookie); +		atomic_add_64(&fasttrap_putreg_fast_cnt, 1); +		return; +	} +	dtrace_interrupt_enable(cookie); + +	/* +	 * First see if there's a copy of the register window in the +	 * machpcb structure that we can modify; if there isn't try to +	 * copy out the value. If that fails, we try to create a new +	 * register window in the machpcb structure. While this isn't +	 * _precisely_ the intended use of the machpcb structure, it +	 * can't cause any problems since we know at this point in the +	 * code that all of the user's data have been flushed out of the +	 * register file (since %otherwin is 0). +	 */ +	/* LINTED - alignment */ +	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); + +	if (get_udatamodel() == DATAMODEL_NATIVE) { +		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); +		/* LINTED - alignment */ +		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; + +		if (mpcb->mpcb_wbcnt > 0) { +			int i = mpcb->mpcb_wbcnt; +			do { +				i--; +				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) +					continue; + +				rwin[i].rw_local[reg - 16] = value; +				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); +				return; +			} while (i > 0); +		} + +		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { +			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, +			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) +				goto err; + +			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; +			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; +			mpcb->mpcb_wbcnt++; +			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); +			return; +		} +	} else { +		struct frame32 *fr = +		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; +		/* LINTED - alignment */ +		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; +		uint32_t v32 = (uint32_t)value; + +		if (mpcb->mpcb_wbcnt > 0) { +			int i = mpcb->mpcb_wbcnt; +			do { +				i--; +				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) +					continue; + +				rwin[i].rw_local[reg - 16] = v32; +				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); +				return; +			} while (i > 0); +		} + +		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { +			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, +			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) +				goto err; + +			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; +			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; +			mpcb->mpcb_wbcnt++; +			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); +			return; +		} +	} + +	atomic_add_64(&fasttrap_putreg_slow_cnt, 1); +	return; + +err: +	/* +	 * If we couldn't record this register's value, the process is in an +	 * irrecoverable state and we have no choice but to euthanize it. +	 */ +	psignal(ttoproc(curthread), SIGILL); +} | 
