diff options
Diffstat (limited to 'sys/powerpc/fpu')
| -rw-r--r-- | sys/powerpc/fpu/fpu_add.c | 222 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_arith.h | 150 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_compare.c | 158 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_div.c | 288 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_emu.c | 786 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_emu.h | 193 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_explode.c | 259 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_extern.h | 55 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_implode.c | 453 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_instr.h | 383 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_mul.c | 235 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_sqrt.c | 411 | ||||
| -rw-r--r-- | sys/powerpc/fpu/fpu_subr.c | 216 | 
13 files changed, 3809 insertions, 0 deletions
| diff --git a/sys/powerpc/fpu/fpu_add.c b/sys/powerpc/fpu/fpu_add.c new file mode 100644 index 000000000000..d0fa995f8067 --- /dev/null +++ b/sys/powerpc/fpu/fpu_add.c @@ -0,0 +1,222 @@ +/*	$NetBSD: fpu_add.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Perform an FPU add (return x + y). + * + * To subtract, negate y and call add. + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> +#include <machine/ieeefp.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> + +struct fpn * +fpu_add(struct fpemu *fe) +{ +	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r; +	u_int r0, r1, r2, r3; +	int rd; + +	/* +	 * Put the `heavier' operand on the right (see fpu_emu.h). +	 * Then we will have one of the following cases, taken in the +	 * following order: +	 * +	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is. +	 *	The result is y. +	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN +	 *    case was taken care of earlier). +	 *	If x = -y, the result is NaN.  Otherwise the result +	 *	is y (an Inf of whichever sign). +	 *  - y is 0.  Implied: x = 0. +	 *	If x and y differ in sign (one positive, one negative), +	 *	the result is +0 except when rounding to -Inf.  If same: +	 *	+0 + +0 = +0; -0 + -0 = -0. +	 *  - x is 0.  Implied: y != 0. +	 *	Result is y. +	 *  - other.  Implied: both x and y are numbers. +	 *	Do addition a la Hennessey & Patterson. +	 */ +	DPRINTF(FPE_REG, ("fpu_add:\n")); +	DUMPFPN(FPE_REG, x); +	DUMPFPN(FPE_REG, y); +	DPRINTF(FPE_REG, ("=>\n")); +	ORDER(x, y); +	if (ISNAN(y)) { +		fe->fe_cx |= FPSCR_VXSNAN; +		DUMPFPN(FPE_REG, y); +		return (y); +	} +	if (ISINF(y)) { +		if (ISINF(x) && x->fp_sign != y->fp_sign) { +			fe->fe_cx |= FPSCR_VXISI; +			return (fpu_newnan(fe)); +		} +		DUMPFPN(FPE_REG, y); +		return (y); +	} +	rd = ((fe->fe_fpscr) & FPSCR_RN); +	if (ISZERO(y)) { +		if (rd != FP_RM)	/* only -0 + -0 gives -0 */ +			y->fp_sign &= x->fp_sign; +		else			/* any -0 operand gives -0 */ +			y->fp_sign |= x->fp_sign; +		DUMPFPN(FPE_REG, y); +		return (y); +	} +	if (ISZERO(x)) { +		DUMPFPN(FPE_REG, y); +		return (y); +	} +	/* +	 * We really have two numbers to add, although their signs may +	 * differ.  Make the exponents match, by shifting the smaller +	 * number right (e.g., 1.011 => 0.1011) and increasing its +	 * exponent (2^3 => 2^4).  Note that we do not alter the exponents +	 * of x and y here. +	 */ +	r = &fe->fe_f3; +	r->fp_class = FPC_NUM; +	if (x->fp_exp == y->fp_exp) { +		r->fp_exp = x->fp_exp; +		r->fp_sticky = 0; +	} else { +		if (x->fp_exp < y->fp_exp) { +			/* +			 * Try to avoid subtract case iii (see below). +			 * This also guarantees that x->fp_sticky = 0. +			 */ +			SWAP(x, y); +		} +		/* now x->fp_exp > y->fp_exp */ +		r->fp_exp = x->fp_exp; +		r->fp_sticky = fpu_shr(y, x->fp_exp - y->fp_exp); +	} +	r->fp_sign = x->fp_sign; +	if (x->fp_sign == y->fp_sign) { +		FPU_DECL_CARRY + +		/* +		 * The signs match, so we simply add the numbers.  The result +		 * may be `supernormal' (as big as 1.111...1 + 1.111...1, or +		 * 11.111...0).  If so, a single bit shift-right will fix it +		 * (but remember to adjust the exponent). +		 */ +		/* r->fp_mant = x->fp_mant + y->fp_mant */ +		FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]); +		FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]); +		FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]); +		FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]); +		if ((r->fp_mant[0] = r0) >= FP_2) { +			(void) fpu_shr(r, 1); +			r->fp_exp++; +		} +	} else { +		FPU_DECL_CARRY + +		/* +		 * The signs differ, so things are rather more difficult. +		 * H&P would have us negate the negative operand and add; +		 * this is the same as subtracting the negative operand. +		 * This is quite a headache.  Instead, we will subtract +		 * y from x, regardless of whether y itself is the negative +		 * operand.  When this is done one of three conditions will +		 * hold, depending on the magnitudes of x and y: +		 *   case i)   |x| > |y|.  The result is just x - y, +		 *	with x's sign, but it may need to be normalized. +		 *   case ii)  |x| = |y|.  The result is 0 (maybe -0) +		 *	so must be fixed up. +		 *   case iii) |x| < |y|.  We goofed; the result should +		 *	be (y - x), with the same sign as y. +		 * We could compare |x| and |y| here and avoid case iii, +		 * but that would take just as much work as the subtract. +		 * We can tell case iii has occurred by an overflow. +		 * +		 * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0. +		 */ +		/* r->fp_mant = x->fp_mant - y->fp_mant */ +		FPU_SET_CARRY(y->fp_sticky); +		FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]); +		FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]); +		FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]); +		FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]); +		if (r0 < FP_2) { +			/* cases i and ii */ +			if ((r0 | r1 | r2 | r3) == 0) { +				/* case ii */ +				r->fp_class = FPC_ZERO; +				r->fp_sign = rd == FP_RM; +				return (r); +			} +		} else { +			/* +			 * Oops, case iii.  This can only occur when the +			 * exponents were equal, in which case neither +			 * x nor y have sticky bits set.  Flip the sign +			 * (to y's sign) and negate the result to get y - x. +			 */ +#ifdef DIAGNOSTIC +			if (x->fp_exp != y->fp_exp || r->fp_sticky) +				panic("fpu_add"); +#endif +			r->fp_sign = y->fp_sign; +			FPU_SUBS(r3, 0, r3); +			FPU_SUBCS(r2, 0, r2); +			FPU_SUBCS(r1, 0, r1); +			FPU_SUBC(r0, 0, r0); +		} +		r->fp_mant[3] = r3; +		r->fp_mant[2] = r2; +		r->fp_mant[1] = r1; +		r->fp_mant[0] = r0; +		if (r0 < FP_1) +			fpu_norm(r); +	} +	DUMPFPN(FPE_REG, r); +	return (r); +} diff --git a/sys/powerpc/fpu/fpu_arith.h b/sys/powerpc/fpu/fpu_arith.h new file mode 100644 index 000000000000..d7734f7cb209 --- /dev/null +++ b/sys/powerpc/fpu/fpu_arith.h @@ -0,0 +1,150 @@ +/*	$NetBSD: fpu_arith.h,v 1.4 2005/12/24 20:07:28 perry Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Extended-precision arithmetic. + * + * We hold the notion of a `carry register', which may or may not be a + * machine carry bit or register.  On the SPARC, it is just the machine's + * carry bit. + * + * In the worst case, you can compute the carry from x+y as + *	(unsigned)(x + y) < (unsigned)x + * and from x+y+c as + *	((unsigned)(x + y + c) <= (unsigned)x && (y|c) != 0) + * for example. + */ + +#ifndef FPE_USE_ASM + +/* set up for extended-precision arithemtic */ +#define	FPU_DECL_CARRY quad_t fpu_carry, fpu_tmp; + +/* + * We have three kinds of add: + *	add with carry:					  r = x + y + c + *	add (ignoring current carry) and set carry:	c'r = x + y + 0 + *	add with carry and set carry:			c'r = x + y + c + * The macros use `C' for `use carry' and `S' for `set carry'. + * Note that the state of the carry is undefined after ADDC and SUBC, + * so if all you have for these is `add with carry and set carry', + * that is OK. + * + * The same goes for subtract, except that we compute x - y - c. + * + * Finally, we have a way to get the carry into a `regular' variable, + * or set it from a value.  SET_CARRY turns 0 into no-carry, nonzero + * into carry; GET_CARRY sets its argument to 0 or 1. + */ +#define	FPU_ADDC(r, x, y) \ +	(r) = (x) + (y) + (!!fpu_carry) +#define	FPU_ADDS(r, x, y) \ +	{ \ +		fpu_tmp = (quad_t)(x) + (quad_t)(y); \ +		(r) = (u_int)fpu_tmp; \ +		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ +	} +#define	FPU_ADDCS(r, x, y) \ +	{ \ +		fpu_tmp = (quad_t)(x) + (quad_t)(y) + (!!fpu_carry); \ +		(r) = (u_int)fpu_tmp; \ +		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ +	} +#define	FPU_SUBC(r, x, y) \ +	(r) = (x) - (y) - (!!fpu_carry) +#define	FPU_SUBS(r, x, y) \ +	{ \ +		fpu_tmp = (quad_t)(x) - (quad_t)(y); \ +		(r) = (u_int)fpu_tmp; \ +		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ +	} +#define	FPU_SUBCS(r, x, y) \ +	{ \ +		fpu_tmp = (quad_t)(x) - (quad_t)(y) - (!!fpu_carry); \ +		(r) = (u_int)fpu_tmp; \ +		fpu_carry = ((fpu_tmp & 0xffffffff00000000LL) != 0); \ +	} + +#define	FPU_GET_CARRY(r) (r) = (!!fpu_carry) +#define	FPU_SET_CARRY(v) fpu_carry = ((v) != 0) + +#else +/* set up for extended-precision arithemtic */ +#define	FPU_DECL_CARRY + +/* + * We have three kinds of add: + *	add with carry:					  r = x + y + c + *	add (ignoring current carry) and set carry:	c'r = x + y + 0 + *	add with carry and set carry:			c'r = x + y + c + * The macros use `C' for `use carry' and `S' for `set carry'. + * Note that the state of the carry is undefined after ADDC and SUBC, + * so if all you have for these is `add with carry and set carry', + * that is OK. + * + * The same goes for subtract, except that we compute x - y - c. + * + * Finally, we have a way to get the carry into a `regular' variable, + * or set it from a value.  SET_CARRY turns 0 into no-carry, nonzero + * into carry; GET_CARRY sets its argument to 0 or 1. + */ +#define	FPU_ADDC(r, x, y) \ +	__asm volatile("adde %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) +#define	FPU_ADDS(r, x, y) \ +	__asm volatile("addc %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) +#define	FPU_ADDCS(r, x, y) \ +	__asm volatile("adde %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) +#define	FPU_SUBC(r, x, y) \ +	__asm volatile("subfe %0,%2,%1" : "=r"(r) : "r"(x), "r"(y)) +#define	FPU_SUBS(r, x, y) \ +	__asm volatile("subfc %0,%2,%1" : "=r"(r) : "r"(x), "r"(y)) +#define	FPU_SUBCS(r, x, y) \ +	__asm volatile("subfe %0,%2,%1" : "=r"(r) : "r"(x), "r"(y)) + +#define	FPU_GET_CARRY(r) __asm volatile("li %0,0; addie %0,%0,0" : "=r"(r)) +/* This one needs to destroy a temp register. */ +#define	FPU_SET_CARRY(v) do { int __tmp;				\ +		__asm volatile("addic %0,%0,-1" : "r"(__tmp) : "r"(v)); \ +	} while (0) + +#define	FPU_SHL1_BY_ADD	/* shift left 1 faster by ADDC than (a<<1)|(b>>31) */ +#endif diff --git a/sys/powerpc/fpu/fpu_compare.c b/sys/powerpc/fpu/fpu_compare.c new file mode 100644 index 000000000000..6629796d9c1f --- /dev/null +++ b/sys/powerpc/fpu/fpu_compare.c @@ -0,0 +1,158 @@ +/*	$NetBSD: fpu_compare.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FCMPU and FCMPO instructions. + * + * These rely on the fact that our internal wide format is achieved by + * adding zero bits to the end of narrower mantissas. + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> + +/* + * Perform a compare instruction (with or without unordered exception). + * This updates the fcc field in the fsr. + * + * If either operand is NaN, the result is unordered.  For ordered, this + * causes an NV exception.  Everything else is ordered: + *	|Inf| > |numbers| > |0|. + * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0), + * so we get this directly.  Note, however, that two zeros compare equal + * regardless of sign, while everything else depends on sign. + * + * Incidentally, two Infs of the same sign compare equal (per the 80387 + * manual---it would be nice if the SPARC documentation were more + * complete). + */ +void +fpu_compare(struct fpemu *fe, int ordered) +{ +	struct fpn *a, *b, *r; +	int cc; + +	a = &fe->fe_f1; +	b = &fe->fe_f2; +	r = &fe->fe_f3; + +	if (ISNAN(a) || ISNAN(b)) { +		/* +		 * In any case, we already got an exception for signalling +		 * NaNs; here we may replace that one with an identical +		 * exception, but so what?. +		 */ +		cc = FPSCR_FU; +		if (ISSNAN(a) || ISSNAN(b)) +			cc |= FPSCR_VXSNAN; +		if (ordered) { +			if (fe->fe_fpscr & FPSCR_VE || ISQNAN(a) || ISQNAN(b)) +				cc |= FPSCR_VXVC; +		} +		goto done; +	} + +	/* +	 * Must handle both-zero early to avoid sign goofs.  Otherwise, +	 * at most one is 0, and if the signs differ we are done. +	 */ +	if (ISZERO(a) && ISZERO(b)) { +		cc = FPSCR_FE; +		goto done; +	} +	if (a->fp_sign) {		/* a < 0 (or -0) */ +		if (!b->fp_sign) {	/* b >= 0 (or if a = -0, b > 0) */ +			cc = FPSCR_FL; +			goto done; +		} +	} else {			/* a > 0 (or +0) */ +		if (b->fp_sign) {	/* b <= -0 (or if a = +0, b < 0) */ +			cc = FPSCR_FG; +			goto done; +		} +	} + +	/* +	 * Now the signs are the same (but may both be negative).  All +	 * we have left are these cases: +	 * +	 *	|a| < |b|		[classes or values differ] +	 *	|a| > |b|		[classes or values differ] +	 *	|a| == |b|		[classes and values identical] +	 * +	 * We define `diff' here to expand these as: +	 * +	 *	|a| < |b|, a,b >= 0: a < b => FSR_CC_LT +	 *	|a| < |b|, a,b < 0:  a > b => FSR_CC_GT +	 *	|a| > |b|, a,b >= 0: a > b => FSR_CC_GT +	 *	|a| > |b|, a,b < 0:  a < b => FSR_CC_LT +	 */ +#define opposite_cc(cc) ((cc) == FPSCR_FL ? FPSCR_FG : FPSCR_FL) +#define	diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) :  (magnitude)) +	if (a->fp_class < b->fp_class) {	/* |a| < |b| */ +		cc = diff(FPSCR_FL); +		goto done; +	} +	if (a->fp_class > b->fp_class) {	/* |a| > |b| */ +		cc = diff(FPSCR_FG); +		goto done; +	} +	/* now none can be 0: only Inf and numbers remain */ +	if (ISINF(a)) {				/* |Inf| = |Inf| */ +		cc = FPSCR_FE; +		goto done; +	} +	fpu_sub(fe); +	if (ISZERO(r)) +		cc = FPSCR_FE; +	else if (r->fp_sign) +		cc = FPSCR_FL; +	else +		cc = FPSCR_FG; +done: +	fe->fe_cx = cc; +} diff --git a/sys/powerpc/fpu/fpu_div.c b/sys/powerpc/fpu/fpu_div.c new file mode 100644 index 000000000000..a464be050c25 --- /dev/null +++ b/sys/powerpc/fpu/fpu_div.c @@ -0,0 +1,288 @@ +/*	$NetBSD: fpu_div.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Perform an FPU divide (return x / y). + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> + +/* + * Division of normal numbers is done as follows: + * + * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e. + * If X and Y are the mantissas (1.bbbb's), the quotient is then: + * + *	q = (X / Y) * 2^((x exponent) - (y exponent)) + * + * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y) + * will be in [0.5,2.0).  Moreover, it will be less than 1.0 if and only + * if X < Y.  In that case, it will have to be shifted left one bit to + * become a normal number, and the exponent decremented.  Thus, the + * desired exponent is: + * + *	left_shift = x->fp_mant < y->fp_mant; + *	result_exp = x->fp_exp - y->fp_exp - left_shift; + * + * The quotient mantissa X/Y can then be computed one bit at a time + * using the following algorithm: + * + *	Q = 0;			-- Initial quotient. + *	R = X;			-- Initial remainder, + *	if (left_shift)		--   but fixed up in advance. + *		R *= 2; + *	for (bit = FP_NMANT; --bit >= 0; R *= 2) { + *		if (R >= Y) { + *			Q |= 1 << bit; + *			R -= Y; + *		} + *	} + * + * The subtraction R -= Y always removes the uppermost bit from R (and + * can sometimes remove additional lower-order 1 bits); this proof is + * left to the reader. + * + * This loop correctly calculates the guard and round bits since they are + * included in the expanded internal representation.  The sticky bit + * is to be set if and only if any other bits beyond guard and round + * would be set.  From the above it is obvious that this is true if and + * only if the remainder R is nonzero when the loop terminates. + * + * Examining the loop above, we can see that the quotient Q is built + * one bit at a time ``from the top down''.  This means that we can + * dispense with the multi-word arithmetic and just build it one word + * at a time, writing each result word when it is done. + * + * Furthermore, since X and Y are both in [1.0,2.0), we know that, + * initially, R >= Y.  (Recall that, if X < Y, R is set to X * 2 and + * is therefore at in [2.0,4.0).)  Thus Q is sure to have bit FP_NMANT-1 + * set, and R can be set initially to either X - Y (when X >= Y) or + * 2X - Y (when X < Y).  In addition, comparing R and Y is difficult, + * so we will simply calculate R - Y and see if that underflows. + * This leads to the following revised version of the algorithm: + * + *	R = X; + *	bit = FP_1; + *	D = R - Y; + *	if (D >= 0) { + *		result_exp = x->fp_exp - y->fp_exp; + *		R = D; + *		q = bit; + *		bit >>= 1; + *	} else { + *		result_exp = x->fp_exp - y->fp_exp - 1; + *		q = 0; + *	} + *	R <<= 1; + *	do  { + *		D = R - Y; + *		if (D >= 0) { + *			q |= bit; + *			R = D; + *		} + *		R <<= 1; + *	} while ((bit >>= 1) != 0); + *	Q[0] = q; + *	for (i = 1; i < 4; i++) { + *		q = 0, bit = 1 << 31; + *		do { + *			D = R - Y; + *			if (D >= 0) { + *				q |= bit; + *				R = D; + *			} + *			R <<= 1; + *		} while ((bit >>= 1) != 0); + *		Q[i] = q; + *	} + * + * This can be refined just a bit further by moving the `R <<= 1' + * calculations to the front of the do-loops and eliding the first one. + * The process can be terminated immediately whenever R becomes 0, but + * this is relatively rare, and we do not bother. + */ + +struct fpn * +fpu_div(struct fpemu *fe) +{ +	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; +	u_int q, bit; +	u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3; +	FPU_DECL_CARRY + +	/* +	 * Since divide is not commutative, we cannot just use ORDER. +	 * Check either operand for NaN first; if there is at least one, +	 * order the signalling one (if only one) onto the right, then +	 * return it.  Otherwise we have the following cases: +	 * +	 *	Inf / Inf = NaN, plus NV exception +	 *	Inf / num = Inf [i.e., return x] +	 *	Inf / 0   = Inf [i.e., return x] +	 *	0 / Inf = 0 [i.e., return x] +	 *	0 / num = 0 [i.e., return x] +	 *	0 / 0   = NaN, plus NV exception +	 *	num / Inf = 0 +	 *	num / num = num (do the divide) +	 *	num / 0   = Inf, plus DZ exception +	 */ +	DPRINTF(FPE_REG, ("fpu_div:\n")); +	DUMPFPN(FPE_REG, x); +	DUMPFPN(FPE_REG, y); +	DPRINTF(FPE_REG, ("=>\n")); +	if (ISNAN(x) || ISNAN(y)) { +		ORDER(x, y); +		fe->fe_cx |= FPSCR_VXSNAN; +		DUMPFPN(FPE_REG, y); +		return (y); +	} +	/* +	 * Need to split the following out cause they generate different +	 * exceptions.  +	 */ +	if (ISINF(x)) { +		if (x->fp_class == y->fp_class) { +			fe->fe_cx |= FPSCR_VXIDI; +			return (fpu_newnan(fe)); +		} +		DUMPFPN(FPE_REG, x); +		return (x); +	} +	if (ISZERO(x)) { +		fe->fe_cx |= FPSCR_ZX; +		if (x->fp_class == y->fp_class) { +			fe->fe_cx |= FPSCR_VXZDZ; +			return (fpu_newnan(fe)); +		} +		DUMPFPN(FPE_REG, x); +		return (x); +	} + +	/* all results at this point use XOR of operand signs */ +	x->fp_sign ^= y->fp_sign; +	if (ISINF(y)) { +		x->fp_class = FPC_ZERO; +		DUMPFPN(FPE_REG, x); +		return (x); +	} +	if (ISZERO(y)) { +		fe->fe_cx = FPSCR_ZX; +		x->fp_class = FPC_INF; +		DUMPFPN(FPE_REG, x); +		return (x); +	} + +	/* +	 * Macros for the divide.  See comments at top for algorithm. +	 * Note that we expand R, D, and Y here. +	 */ + +#define	SUBTRACT		/* D = R - Y */ \ +	FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \ +	FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0) + +#define	NONNEGATIVE		/* D >= 0 */ \ +	((int)d0 >= 0) + +#ifdef FPU_SHL1_BY_ADD +#define	SHL1			/* R <<= 1 */ \ +	FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \ +	FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0) +#else +#define	SHL1 \ +	r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \ +	r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1 +#endif + +#define	LOOP			/* do ... while (bit >>= 1) */ \ +	do { \ +		SHL1; \ +		SUBTRACT; \ +		if (NONNEGATIVE) { \ +			q |= bit; \ +			r0 = d0, r1 = d1, r2 = d2, r3 = d3; \ +		} \ +	} while ((bit >>= 1) != 0) + +#define	WORD(r, i)			/* calculate r->fp_mant[i] */ \ +	q = 0; \ +	bit = 1 << 31; \ +	LOOP; \ +	(x)->fp_mant[i] = q + +	/* Setup.  Note that we put our result in x. */ +	r0 = x->fp_mant[0]; +	r1 = x->fp_mant[1]; +	r2 = x->fp_mant[2]; +	r3 = x->fp_mant[3]; +	y0 = y->fp_mant[0]; +	y1 = y->fp_mant[1]; +	y2 = y->fp_mant[2]; +	y3 = y->fp_mant[3]; + +	bit = FP_1; +	SUBTRACT; +	if (NONNEGATIVE) { +		x->fp_exp -= y->fp_exp; +		r0 = d0, r1 = d1, r2 = d2, r3 = d3; +		q = bit; +		bit >>= 1; +	} else { +		x->fp_exp -= y->fp_exp + 1; +		q = 0; +	} +	LOOP; +	x->fp_mant[0] = q; +	WORD(x, 1); +	WORD(x, 2); +	WORD(x, 3); +	x->fp_sticky = r0 | r1 | r2 | r3; + +	DUMPFPN(FPE_REG, x); +	return (x); +} diff --git a/sys/powerpc/fpu/fpu_emu.c b/sys/powerpc/fpu/fpu_emu.c new file mode 100644 index 000000000000..9072fa78466f --- /dev/null +++ b/sys/powerpc/fpu/fpu_emu.c @@ -0,0 +1,786 @@ +/*	$NetBSD: fpu_emu.c,v 1.14 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright 2001 Wasabi Systems, Inc. + * All rights reserved. + * + * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + *    must display the following acknowledgement: + *      This product includes software developed for the NetBSD Project by + *      Wasabi Systems, Inc. + * 4. The name of Wasabi Systems, Inc. may not be used to endorse + *    or promote products derived from this software without specific prior + *    written permission. + * + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include "opt_ddb.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/signal.h> +#include <sys/syslog.h> +#include <sys/signalvar.h> + +#include <machine/fpu.h> + +#include <powerpc/fpu/fpu_emu.h> +#include <powerpc/fpu/fpu_extern.h> +#include <powerpc/fpu/fpu_instr.h> + +static SYSCTL_NODE(_hw, OID_AUTO, fpu_emu, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, +    "FPU emulator"); + +#define	FPU_EMU_EVCNT_DECL(name)					\ +static u_int fpu_emu_evcnt_##name;					\ +SYSCTL_INT(_hw_fpu_emu, OID_AUTO, evcnt_##name, CTLFLAG_RD,		\ +    &fpu_emu_evcnt_##name, 0, "") + +#define	FPU_EMU_EVCNT_INCR(name)	fpu_emu_evcnt_##name++ + +FPU_EMU_EVCNT_DECL(stfiwx); +FPU_EMU_EVCNT_DECL(fpstore); +FPU_EMU_EVCNT_DECL(fpload); +FPU_EMU_EVCNT_DECL(fcmpu); +FPU_EMU_EVCNT_DECL(frsp); +FPU_EMU_EVCNT_DECL(fctiw); +FPU_EMU_EVCNT_DECL(fcmpo); +FPU_EMU_EVCNT_DECL(mtfsb1); +FPU_EMU_EVCNT_DECL(fnegabs); +FPU_EMU_EVCNT_DECL(mcrfs); +FPU_EMU_EVCNT_DECL(mtfsb0); +FPU_EMU_EVCNT_DECL(fmr); +FPU_EMU_EVCNT_DECL(mtfsfi); +FPU_EMU_EVCNT_DECL(fnabs); +FPU_EMU_EVCNT_DECL(fabs); +FPU_EMU_EVCNT_DECL(mffs); +FPU_EMU_EVCNT_DECL(mtfsf); +FPU_EMU_EVCNT_DECL(fctid); +FPU_EMU_EVCNT_DECL(fcfid); +FPU_EMU_EVCNT_DECL(fdiv); +FPU_EMU_EVCNT_DECL(fsub); +FPU_EMU_EVCNT_DECL(fadd); +FPU_EMU_EVCNT_DECL(fsqrt); +FPU_EMU_EVCNT_DECL(fsel); +FPU_EMU_EVCNT_DECL(fpres); +FPU_EMU_EVCNT_DECL(fmul); +FPU_EMU_EVCNT_DECL(frsqrte); +FPU_EMU_EVCNT_DECL(fmulsub); +FPU_EMU_EVCNT_DECL(fmuladd); +FPU_EMU_EVCNT_DECL(fnmsub); +FPU_EMU_EVCNT_DECL(fnmadd); + +/* FPSR exception masks */ +#define FPSR_EX_MSK	(FPSCR_VX|FPSCR_OX|FPSCR_UX|FPSCR_ZX|		\ +			FPSCR_XX|FPSCR_VXSNAN|FPSCR_VXISI|FPSCR_VXIDI|	\ +			FPSCR_VXZDZ|FPSCR_VXIMZ|FPSCR_VXVC|FPSCR_VXSOFT|\ +			FPSCR_VXSQRT|FPSCR_VXCVI) +#define	FPSR_EX		(FPSCR_VE|FPSCR_OE|FPSCR_UE|FPSCR_ZE|FPSCR_XE) +#define	FPSR_EXOP	(FPSR_EX_MSK&(~FPSR_EX)) + +int fpe_debug = 0; + +#ifdef DEBUG +vm_offset_t opc_disasm(vm_offset_t, int); + +/* + * Dump a `fpn' structure. + */ +void +fpu_dumpfpn(struct fpn *fp) +{ +	static const char *class[] = { +		"SNAN", "QNAN", "ZERO", "NUM", "INF" +	}; + +	printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2], +		fp->fp_sign ? '-' : ' ', +		fp->fp_mant[0],	fp->fp_mant[1], +		fp->fp_mant[2], fp->fp_mant[3],  +		fp->fp_exp); +} +#endif + +/* + * fpu_execute returns the following error numbers (0 = no error): + */ +#define	FPE		1	/* take a floating point exception */ +#define	NOTFPU		2	/* not an FPU instruction */ +#define	FAULT		3 + +/* + * Emulate a floating-point instruction. + * Return zero for success, else signal number. + * (Typically: zero, SIGFPE, SIGILL, SIGSEGV) + */ +int +fpu_emulate(struct trapframe *frame, struct fpu *fpf) +{ +	union instr insn; +	struct fpemu fe; +	int sig; + +	/* initialize insn.is_datasize to tell it is *not* initialized */ +	fe.fe_fpstate = fpf; +	fe.fe_cx = 0; + +	/* always set this (to avoid a warning) */ + +	if (copyin((void *) (frame->srr0), &insn.i_int, sizeof (insn.i_int))) { +#ifdef DEBUG +		printf("fpu_emulate: fault reading opcode\n"); +#endif +		return SIGSEGV; +	} + +	DPRINTF(FPE_EX, ("fpu_emulate: emulating insn %x at %p\n", +	    insn.i_int, (void *)frame->srr0)); + +	if ((insn.i_any.i_opcd == OPC_TWI) || +	    ((insn.i_any.i_opcd == OPC_integer_31) && +	    (insn.i_x.i_xo == OPC31_TW))) { +		/* Check for the two trap insns. */ +		DPRINTF(FPE_EX, ("fpu_emulate: SIGTRAP\n")); +		return (SIGTRAP); +	} +	sig = 0; +	switch (fpu_execute(frame, &fe, &insn)) { +	case 0: +		DPRINTF(FPE_EX, ("fpu_emulate: success\n")); +		frame->srr0 += 4; +		break; + +	case FPE: +		DPRINTF(FPE_EX, ("fpu_emulate: SIGFPE\n")); +		sig = SIGFPE; +		break; + +	case FAULT: +		DPRINTF(FPE_EX, ("fpu_emulate: SIGSEGV\n")); +		sig = SIGSEGV; +		break; + +	case NOTFPU: +	default: +		DPRINTF(FPE_EX, ("fpu_emulate: SIGILL\n")); +#ifdef DEBUG +		if (fpe_debug & FPE_EX) { +			printf("fpu_emulate:  illegal insn %x at %p:", +			insn.i_int, (void *) (frame->srr0)); +			opc_disasm(frame->srr0, insn.i_int); +		} +#endif +		sig = SIGILL; +#ifdef DEBUG +		if (fpe_debug & FPE_EX) +			kdb_enter(KDB_WHY_UNSET, "illegal instruction"); +#endif +		break; +	} + +	return (sig); +} + +/* + * Execute an FPU instruction (one that runs entirely in the FPU; not + * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be + * modified to reflect the setting the hardware would have left. + * + * Note that we do not catch all illegal opcodes, so you can, for instance, + * multiply two integers this way. + */ +int +fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn) +{ +	struct fpn *fp; +	union instr instr = *insn; +	int *a; +	vm_offset_t addr; +	int ra, rb, rc, rt, type, mask, fsr, cx, bf, setcr; +	unsigned int cond; +	struct fpu *fs; + +	/* Setup work. */ +	fp = NULL; +	fs = fe->fe_fpstate; +	fe->fe_fpscr = ((int *)&fs->fpscr)[1]; + +	/* +	 * On PowerPC all floating point values are stored in registers +	 * as doubles, even when used for single precision operations. +	 */ +	type = FTYPE_DBL; +	cond = instr.i_any.i_rc; +	setcr = 0; +	bf = 0;	/* XXX gcc */ + +#if defined(DDB) && defined(DEBUG) +	if (fpe_debug & FPE_EX) { +		vm_offset_t loc = tf->srr0; + +		printf("Trying to emulate: %p ", (void *)loc); +		opc_disasm(loc, instr.i_int); +	} +#endif + +	/* +	 * `Decode' and execute instruction. +	 */ + +	if ((instr.i_any.i_opcd >= OPC_LFS && instr.i_any.i_opcd <= OPC_STFDU) || +	    instr.i_any.i_opcd == OPC_integer_31) { +		/* +		 * Handle load/store insns: +		 * +		 * Convert to/from single if needed, calculate addr, +		 * and update index reg if needed. +		 */ +		double buf; +		size_t size = sizeof(float); +		int store, update; + +		cond = 0; /* ld/st never set condition codes */ + +		if (instr.i_any.i_opcd == OPC_integer_31) { +			if (instr.i_x.i_xo == OPC31_STFIWX) { +				FPU_EMU_EVCNT_INCR(stfiwx); + +				/* Store as integer */ +				ra = instr.i_x.i_ra; +				rb = instr.i_x.i_rb; +				DPRINTF(FPE_INSN, +					("reg %d has %jx reg %d has %jx\n", +					ra, (uintmax_t)tf->fixreg[ra], rb, +					(uintmax_t)tf->fixreg[rb])); + +				addr = tf->fixreg[rb]; +				if (ra != 0) +					addr += tf->fixreg[ra]; +				rt = instr.i_x.i_rt; +				a = (int *)&fs->fpr[rt].fpr; +				DPRINTF(FPE_INSN, +					("fpu_execute: Store INT %x at %p\n", +						a[1], (void *)addr)); +				if (copyout(&a[1], (void *)addr, sizeof(int))) +					return (FAULT); +				return (0); +			} + +			if ((instr.i_x.i_xo & OPC31_FPMASK) != OPC31_FPOP) +				/* Not an indexed FP load/store op */ +				return (NOTFPU); + +			store = (instr.i_x.i_xo & 0x80); +			if (instr.i_x.i_xo & 0x40) +				size = sizeof(double); +			else +				type = FTYPE_SNG; +			update = (instr.i_x.i_xo & 0x20); +			 +			/* calculate EA of load/store */ +			ra = instr.i_x.i_ra; +			rb = instr.i_x.i_rb; +			DPRINTF(FPE_INSN, ("reg %d has %jx reg %d has %jx\n", +				ra, (uintmax_t)tf->fixreg[ra], rb, +				(uintmax_t)tf->fixreg[rb])); +			addr = tf->fixreg[rb]; +			if (ra != 0) +				addr += tf->fixreg[ra]; +			rt = instr.i_x.i_rt; +		} else { +			store = instr.i_d.i_opcd & 0x4; +			if (instr.i_d.i_opcd & 0x2) +				size = sizeof(double); +			else +				type = FTYPE_SNG; +			update = instr.i_d.i_opcd & 0x1; + +			/* calculate EA of load/store */ +			ra = instr.i_d.i_ra; +			addr = instr.i_d.i_d; +			DPRINTF(FPE_INSN, ("reg %d has %jx displ %jx\n", +				ra, (uintmax_t)tf->fixreg[ra], +				(uintmax_t)addr)); +			if (ra != 0) +				addr += tf->fixreg[ra]; +			rt = instr.i_d.i_rt; +		} + +		if (update && ra == 0) +			return (NOTFPU); + +		if (store) { +			/* Store */ +			FPU_EMU_EVCNT_INCR(fpstore); +			if (type != FTYPE_DBL) { +				DPRINTF(FPE_INSN, +					("fpu_execute: Store SNG at %p\n", +						(void *)addr)); +				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL, rt); +				fpu_implode(fe, fp, type, (void *)&buf); +				if (copyout(&buf, (void *)addr, size)) +					return (FAULT); +			} else { +				DPRINTF(FPE_INSN,  +					("fpu_execute: Store DBL at %p\n", +						(void *)addr)); +				if (copyout(&fs->fpr[rt].fpr, (void *)addr, +				    size)) +					return (FAULT); +			} +		} else { +			/* Load */ +			FPU_EMU_EVCNT_INCR(fpload); +			DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n", +				(void *)addr)); +			if (copyin((const void *)addr, &fs->fpr[rt].fpr, +			    size)) +				return (FAULT); +			if (type != FTYPE_DBL) { +				fpu_explode(fe, fp = &fe->fe_f1, type, rt); +				fpu_implode(fe, fp, FTYPE_DBL,  +					(u_int *)&fs->fpr[rt].fpr); +			} +		} +		if (update)  +			tf->fixreg[ra] = addr; +		/* Complete. */ +		return (0); +#ifdef notyet +	} else if (instr.i_any.i_opcd == OPC_load_st_62) { +		/* These are 64-bit extensions */ +		return (NOTFPU); +#endif +	} else if (instr.i_any.i_opcd == OPC_sp_fp_59 || +		instr.i_any.i_opcd == OPC_dp_fp_63) { +		if (instr.i_any.i_opcd == OPC_dp_fp_63 && +		    !(instr.i_a.i_xo & OPC63M_MASK)) { +			/* Format X */ +			rt = instr.i_x.i_rt; +			ra = instr.i_x.i_ra; +			rb = instr.i_x.i_rb; + +			/* One of the special opcodes.... */ +			switch (instr.i_x.i_xo) { +			case	OPC63_FCMPU: +				FPU_EMU_EVCNT_INCR(fcmpu); +				DPRINTF(FPE_INSN, ("fpu_execute: FCMPU\n")); +				rt >>= 2; +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fpu_compare(fe, 0); +				/* Make sure we do the condition regs. */ +				cond = 0; +				/* N.B.: i_rs is already left shifted by two. */ +				bf = instr.i_x.i_rs & 0xfc; +				setcr = 1; +				break; + +			case	OPC63_FRSP: +				/* +				 * Convert to single:  +				 * +				 * PowerPC uses this to round a double +				 * precision value to single precision, +				 * but values in registers are always  +				 * stored in double precision format. +				 */ +				FPU_EMU_EVCNT_INCR(frsp); +				DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n")); +				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL, rb); +				fpu_implode(fe, fp, FTYPE_SNG,  +					(u_int *)&fs->fpr[rt].fpr); +				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt); +				type = FTYPE_DBL; +				break; +			case	OPC63_FCTIW: +			case	OPC63_FCTIWZ: +				FPU_EMU_EVCNT_INCR(fctiw); +				DPRINTF(FPE_INSN, ("fpu_execute: FCTIW\n")); +				fpu_explode(fe, fp = &fe->fe_f1, type, rb); +				type = FTYPE_INT; +				break; +			case	OPC63_FCMPO: +				FPU_EMU_EVCNT_INCR(fcmpo); +				DPRINTF(FPE_INSN, ("fpu_execute: FCMPO\n")); +				rt >>= 2; +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fpu_compare(fe, 1); +				/* Make sure we do the condition regs. */ +				cond = 0; +				/* N.B.: i_rs is already left shifted by two. */ +				bf = instr.i_x.i_rs & 0xfc; +				setcr = 1; +				break; +			case	OPC63_MTFSB1: +				FPU_EMU_EVCNT_INCR(mtfsb1); +				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB1\n")); +				fe->fe_fpscr |=  +					(~(FPSCR_VX|FPSR_EX) & (1<<(31-rt))); +				break; +			case	OPC63_FNEG: +				FPU_EMU_EVCNT_INCR(fnegabs); +				DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n")); +				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr, +					sizeof(double)); +				a = (int *)&fs->fpr[rt].fpr; +				*a ^= (1U << 31); +				break; +			case	OPC63_MCRFS: +				FPU_EMU_EVCNT_INCR(mcrfs); +				DPRINTF(FPE_INSN, ("fpu_execute: MCRFS\n")); +				cond = 0; +				rt &= 0x1c; +				ra &= 0x1c; +				/* Extract the bits we want */ +				mask = (fe->fe_fpscr >> (28 - ra)) & 0xf; +				/* Clear the bits we copied. */ +				fe->fe_cx = +					(FPSR_EX_MSK | (0xf << (28 - ra))); +				fe->fe_fpscr &= fe->fe_cx; +				/* Now shove them in the right part of cr */ +				tf->cr &= ~(0xf << (28 - rt)); +				tf->cr |= (mask << (28 - rt)); +				break; +			case	OPC63_MTFSB0: +				FPU_EMU_EVCNT_INCR(mtfsb0); +				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB0\n")); +				fe->fe_fpscr &= +					((FPSCR_VX|FPSR_EX) & ~(1<<(31-rt))); +				break; +			case	OPC63_FMR: +				FPU_EMU_EVCNT_INCR(fmr); +				DPRINTF(FPE_INSN, ("fpu_execute: FMR\n")); +				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr, +					sizeof(double)); +				break; +			case	OPC63_MTFSFI: +				FPU_EMU_EVCNT_INCR(mtfsfi); +				DPRINTF(FPE_INSN, ("fpu_execute: MTFSFI\n")); +				rb >>= 1; +				rt &= 0x1c; /* Already left-shifted 4 */ +				fe->fe_cx = rb << (28 - rt); +				mask = 0xf<<(28 - rt); +				fe->fe_fpscr = (fe->fe_fpscr & ~mask) |  +					fe->fe_cx; +/* XXX weird stuff about OX, FX, FEX, and VX should be handled */ +				break; +			case	OPC63_FNABS: +				FPU_EMU_EVCNT_INCR(fnabs); +				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n")); +				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr, +					sizeof(double)); +				a = (int *)&fs->fpr[rt].fpr; +				*a |= (1U << 31); +				break; +			case	OPC63_FABS: +				FPU_EMU_EVCNT_INCR(fabs); +				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n")); +				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rb].fpr, +					sizeof(double)); +				a = (int *)&fs->fpr[rt].fpr; +				*a &= ~(1U << 31); +				break; +			case	OPC63_MFFS: +				FPU_EMU_EVCNT_INCR(mffs); +				DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n")); +				memcpy(&fs->fpr[rt].fpr, &fs->fpscr, +					sizeof(fs->fpscr)); +				break; +			case	OPC63_MTFSF: +				FPU_EMU_EVCNT_INCR(mtfsf); +				DPRINTF(FPE_INSN, ("fpu_execute: MTFSF\n")); +				if ((rt = instr.i_xfl.i_flm) == -1) +					mask = -1; +				else { +					mask = 0; +					/* Convert 1 bit -> 4 bits */ +					for (ra = 0; ra < 8; ra ++) +						if (rt & (1<<ra)) +							mask |= (0xf<<(4*ra)); +				} +				a = (int *)&fs->fpr[rt].fpr; +				fe->fe_cx = mask & a[1]; +				fe->fe_fpscr = (fe->fe_fpscr&~mask) |  +					(fe->fe_cx); +/* XXX weird stuff about OX, FX, FEX, and VX should be handled */ +				break; +			case	OPC63_FCTID: +			case	OPC63_FCTIDZ: +				FPU_EMU_EVCNT_INCR(fctid); +				DPRINTF(FPE_INSN, ("fpu_execute: FCTID\n")); +				fpu_explode(fe, fp = &fe->fe_f1, type, rb); +				type = FTYPE_LNG; +				break; +			case	OPC63_FCFID: +				FPU_EMU_EVCNT_INCR(fcfid); +				DPRINTF(FPE_INSN, ("fpu_execute: FCFID\n")); +				type = FTYPE_LNG; +				fpu_explode(fe, fp = &fe->fe_f1, type, rb); +				type = FTYPE_DBL; +				break; +			default: +				return (NOTFPU); +				break; +			} +		} else { +			/* Format A */ +			rt = instr.i_a.i_frt; +			ra = instr.i_a.i_fra; +			rb = instr.i_a.i_frb; +			rc = instr.i_a.i_frc; + +			/* +			 * All arithmetic operations work on registers, which +			 * are stored as doubles. +			 */ +			type = FTYPE_DBL; +			switch ((unsigned int)instr.i_a.i_xo) { +			case	OPC59_FDIVS: +				FPU_EMU_EVCNT_INCR(fdiv); +				DPRINTF(FPE_INSN, ("fpu_execute: FDIV\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_div(fe); +				break; +			case	OPC59_FSUBS: +				FPU_EMU_EVCNT_INCR(fsub); +				DPRINTF(FPE_INSN, ("fpu_execute: FSUB\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_sub(fe); +				break; +			case	OPC59_FADDS: +				FPU_EMU_EVCNT_INCR(fadd); +				DPRINTF(FPE_INSN, ("fpu_execute: FADD\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_add(fe); +				break; +			case	OPC59_FSQRTS: +				FPU_EMU_EVCNT_INCR(fsqrt); +				DPRINTF(FPE_INSN, ("fpu_execute: FSQRT\n")); +				fpu_explode(fe, &fe->fe_f1, type, rb); +				fp = fpu_sqrt(fe); +				break; +			case	OPC63M_FSEL: +				FPU_EMU_EVCNT_INCR(fsel); +				DPRINTF(FPE_INSN, ("fpu_execute: FSEL\n")); +				a = (int *)&fe->fe_fpstate->fpr[ra].fpr; +				if ((*a & 0x80000000) && (*a & 0x7fffffff))  +					/* fra < 0 */ +					rc = rb; +				DPRINTF(FPE_INSN, ("f%d => f%d\n", rc, rt)); +				memcpy(&fs->fpr[rt].fpr, &fs->fpr[rc].fpr, +					sizeof(double)); +				break; +			case	OPC59_FRES: +				FPU_EMU_EVCNT_INCR(fpres); +				DPRINTF(FPE_INSN, ("fpu_execute: FPRES\n")); +				fpu_explode(fe, &fe->fe_f1, type, rb); +				fp = fpu_sqrt(fe); +				/* now we've gotta overwrite the dest reg */ +				*((int *)&fe->fe_fpstate->fpr[rt].fpr) = 1; +				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt); +				fpu_div(fe); +				break; +			case	OPC59_FMULS: +				FPU_EMU_EVCNT_INCR(fmul); +				DPRINTF(FPE_INSN, ("fpu_execute: FMUL\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rc); +				fp = fpu_mul(fe); +				break; +			case	OPC63M_FRSQRTE: +				/* Reciprocal sqrt() estimate */ +				FPU_EMU_EVCNT_INCR(frsqrte); +				DPRINTF(FPE_INSN, ("fpu_execute: FRSQRTE\n")); +				fpu_explode(fe, &fe->fe_f1, type, rb); +				fp = fpu_sqrt(fe); +				fe->fe_f2 = *fp; +				/* now we've gotta overwrite the dest reg */ +				*((int *)&fe->fe_fpstate->fpr[rt].fpr) = 1; +				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt); +				fpu_div(fe); +				break; +			case	OPC59_FMSUBS: +				FPU_EMU_EVCNT_INCR(fmulsub); +				DPRINTF(FPE_INSN, ("fpu_execute: FMULSUB\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rc); +				fp = fpu_mul(fe); +				fe->fe_f1 = *fp; +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_sub(fe); +				break; +			case	OPC59_FMADDS: +				FPU_EMU_EVCNT_INCR(fmuladd); +				DPRINTF(FPE_INSN, ("fpu_execute: FMULADD\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rc); +				fp = fpu_mul(fe); +				fe->fe_f1 = *fp; +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_add(fe); +				break; +			case	OPC59_FNMSUBS: +				FPU_EMU_EVCNT_INCR(fnmsub); +				DPRINTF(FPE_INSN, ("fpu_execute: FNMSUB\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rc); +				fp = fpu_mul(fe); +				fe->fe_f1 = *fp; +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_sub(fe); +				/* Negate */ +				fp->fp_sign ^= 1; +				break; +			case	OPC59_FNMADDS: +				FPU_EMU_EVCNT_INCR(fnmadd); +				DPRINTF(FPE_INSN, ("fpu_execute: FNMADD\n")); +				fpu_explode(fe, &fe->fe_f1, type, ra); +				fpu_explode(fe, &fe->fe_f2, type, rc); +				fp = fpu_mul(fe); +				fe->fe_f1 = *fp; +				fpu_explode(fe, &fe->fe_f2, type, rb); +				fp = fpu_add(fe); +				/* Negate */ +				fp->fp_sign ^= 1; +				break; +			default: +				return (NOTFPU); +				break; +			} + +			/* If the instruction was single precision, round */ +			if (!(instr.i_any.i_opcd & 0x4)) { +				fpu_implode(fe, fp, FTYPE_SNG,  +					(u_int *)&fs->fpr[rt].fpr); +				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt); +			} +		} +	} else { +		return (NOTFPU); +	} + +	/* +	 * ALU operation is complete.  Collapse the result and then check +	 * for exceptions.  If we got any, and they are enabled, do not +	 * alter the destination register, just stop with an exception. +	 * Otherwise set new current exceptions and accrue. +	 */ +	if (fp) +		fpu_implode(fe, fp, type, (u_int *)&fs->fpr[rt].fpr); +	cx = fe->fe_cx; +	fsr = fe->fe_fpscr; +	if (cx != 0) { +		fsr &= ~FPSCR_FX; +		if ((cx^fsr)&FPSR_EX_MSK) +			fsr |= FPSCR_FX; +		mask = fsr & FPSR_EX; +		mask <<= (25-3); +		if (cx & mask)  +			fsr |= FPSCR_FEX; +		if (cx & FPSCR_FPRF) { +			/* Need to replace CC */ +			fsr &= ~FPSCR_FPRF; +		} +		if (cx & (FPSR_EXOP)) +			fsr |= FPSCR_VX; +		fsr |= cx; +		DPRINTF(FPE_INSN, ("fpu_execute: cx %x, fsr %x\n", cx, fsr)); +	} + +	if (cond) { +		cond = fsr & 0xf0000000; +		/* Isolate condition codes */ +		cond >>= 28; +		/* Move fpu condition codes to cr[1] */ +		tf->cr &= (0x0f000000); +		tf->cr |= (cond<<24); +		DPRINTF(FPE_INSN, ("fpu_execute: cr[1] <= %x\n", cond)); +	} + +	if (setcr) { +		cond = fsr & FPSCR_FPCC; +		/* Isolate condition codes */ +		cond <<= 16; +		/* Move fpu condition codes to cr[1] */ +		tf->cr &= ~(0xf0000000>>bf); +		tf->cr |= (cond>>bf); +		DPRINTF(FPE_INSN, ("fpu_execute: cr[%d] (cr=%jx) <= %x\n", +			bf/4, (uintmax_t)tf->cr, cond)); +	} + +	((int *)&fs->fpscr)[1] = fsr; +	if (fsr & FPSCR_FEX) +		return(FPE); +	return (0);	/* success */ +} diff --git a/sys/powerpc/fpu/fpu_emu.h b/sys/powerpc/fpu/fpu_emu.h new file mode 100644 index 000000000000..850dc3f70b02 --- /dev/null +++ b/sys/powerpc/fpu/fpu_emu.h @@ -0,0 +1,193 @@ +/*	$NetBSD: fpu_emu.h,v 1.3 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Floating point emulator (tailored for SPARC, but structurally + * machine-independent). + * + * Floating point numbers are carried around internally in an `expanded' + * or `unpacked' form consisting of: + *	- sign + *	- unbiased exponent + *	- mantissa (`1.' + 112-bit fraction + guard + round) + *	- sticky bit + * Any implied `1' bit is inserted, giving a 113-bit mantissa that is + * always nonzero.  Additional low-order `guard' and `round' bits are + * scrunched in, making the entire mantissa 115 bits long.  This is divided + * into four 32-bit words, with `spare' bits left over in the upper part + * of the top word (the high bits of fp_mant[0]).  An internal `exploded' + * number is thus kept within the half-open interval [1.0,2.0) (but see + * the `number classes' below).  This holds even for denormalized numbers: + * when we explode an external denorm, we normalize it, introducing low-order + * zero bits, so that the rest of the code always sees normalized values. + * + * Note that a number of our algorithms use the `spare' bits at the top. + * The most demanding algorithm---the one for sqrt---depends on two such + * bits, so that it can represent values up to (but not including) 8.0, + * and then it needs a carry on top of that, so that we need three `spares'. + * + * The sticky-word is 32 bits so that we can use `OR' operators to goosh + * whole words from the mantissa into it. + * + * All operations are done in this internal extended precision.  According + * to Hennesey & Patterson, Appendix A, rounding can be repeated---that is, + * it is OK to do a+b in extended precision and then round the result to + * single precision---provided single, double, and extended precisions are + * `far enough apart' (they always are), but we will try to avoid any such + * extra work where possible. + */ +struct fpn { +	int	fp_class;		/* see below */ +	int	fp_sign;		/* 0 => positive, 1 => negative */ +	int	fp_exp;			/* exponent (unbiased) */ +	int	fp_sticky;		/* nonzero bits lost at right end */ +	u_int	fp_mant[4];		/* 115-bit mantissa */ +}; + +#define	FP_NMANT	115		/* total bits in mantissa (incl g,r) */ +#define	FP_NG		2		/* number of low-order guard bits */ +#define	FP_LG		((FP_NMANT - 1) & 31)	/* log2(1.0) for fp_mant[0] */ +#define	FP_LG2		((FP_NMANT - 1) & 63)	/* log2(1.0) for fp_mant[0] and fp_mant[1] */ +#define	FP_QUIETBIT	(1 << (FP_LG - 1))	/* Quiet bit in NaNs (0.5) */ +#define	FP_1		(1 << FP_LG)		/* 1.0 in fp_mant[0] */ +#define	FP_2		(1 << (FP_LG + 1))	/* 2.0 in fp_mant[0] */ + +/* + * Number classes.  Since zero, Inf, and NaN cannot be represented using + * the above layout, we distinguish these from other numbers via a class. + * In addition, to make computation easier and to follow Appendix N of + * the SPARC Version 8 standard, we give each kind of NaN a separate class. + */ +#define	FPC_SNAN	-2		/* signalling NaN (sign irrelevant) */ +#define	FPC_QNAN	-1		/* quiet NaN (sign irrelevant) */ +#define	FPC_ZERO	0		/* zero (sign matters) */ +#define	FPC_NUM		1		/* number (sign matters) */ +#define	FPC_INF		2		/* infinity (sign matters) */ + +#define	ISSNAN(fp)	((fp)->fp_class == FPC_SNAN) +#define	ISQNAN(fp)	((fp)->fp_class == FPC_QNAN) +#define	ISNAN(fp)	((fp)->fp_class < 0) +#define	ISZERO(fp)	((fp)->fp_class == 0) +#define	ISINF(fp)	((fp)->fp_class == FPC_INF) + +/* + * ORDER(x,y) `sorts' a pair of `fpn *'s so that the right operand (y) points + * to the `more significant' operand for our purposes.  Appendix N says that + * the result of a computation involving two numbers are: + * + *	If both are SNaN: operand 2, converted to Quiet + *	If only one is SNaN: the SNaN operand, converted to Quiet + *	If both are QNaN: operand 2 + *	If only one is QNaN: the QNaN operand + * + * In addition, in operations with an Inf operand, the result is usually + * Inf.  The class numbers are carefully arranged so that if + *	(unsigned)class(op1) > (unsigned)class(op2) + * then op1 is the one we want; otherwise op2 is the one we want. + */ +#define	ORDER(x, y) { \ +	if ((u_int)(x)->fp_class > (u_int)(y)->fp_class) \ +		SWAP(x, y); \ +} +#define	SWAP(x, y) { \ +	struct fpn *swap; \ +	swap = (x), (x) = (y), (y) = swap; \ +} + +/* + * Emulator state. + */ +struct fpemu { +	struct	fpu *fe_fpstate;	/* registers, etc */ +	int	fe_fpscr;		/* fpscr copy (modified during op) */ +	int	fe_cx;			/* keep track of exceptions */ +	struct	fpn fe_f1;		/* operand 1 */ +	struct	fpn fe_f2;		/* operand 2, if required */ +	struct	fpn fe_f3;		/* available storage for result */ +}; + +/* + * Arithmetic functions. + * Each of these may modify its inputs (f1,f2) and/or the temporary. + * Each returns a pointer to the result and/or sets exceptions. + */ +struct	fpn *fpu_add(struct fpemu *); +#define	fpu_sub(fe) ((fe)->fe_f2.fp_sign ^= 1, fpu_add(fe)) +struct	fpn *fpu_mul(struct fpemu *); +struct	fpn *fpu_div(struct fpemu *); +struct	fpn *fpu_sqrt(struct fpemu *); + +/* + * Other functions. + */ + +/* Perform a compare instruction (with or without unordered exception). */ +void	fpu_compare(struct fpemu *, int); + +/* Build a new Quiet NaN (sign=0, frac=all 1's). */ +struct	fpn *fpu_newnan(struct fpemu *); + +void	fpu_norm(struct fpn *); + +/* + * Shift a number right some number of bits, taking care of round/sticky. + * Note that the result is probably not a well-formed number (it will lack + * the normal 1-bit mant[0]&FP_1). + */ +int	fpu_shr(struct fpn *, int); + +void	fpu_explode(struct fpemu *, struct fpn *, int, int); +void	fpu_implode(struct fpemu *, struct fpn *, int, u_int *); + +#ifdef DEBUG +#define	FPE_EX		0x1 +#define	FPE_INSN	0x2 +#define	FPE_OP		0x4 +#define	FPE_REG		0x8 +extern int fpe_debug; +void	fpu_dumpfpn(struct fpn *); +#define	DPRINTF(x, y)	if (fpe_debug & (x)) printf y +#define DUMPFPN(x, f)	if (fpe_debug & (x)) fpu_dumpfpn((f)) +#else +#define	DPRINTF(x, y) +#define DUMPFPN(x, f) +#endif diff --git a/sys/powerpc/fpu/fpu_explode.c b/sys/powerpc/fpu/fpu_explode.c new file mode 100644 index 000000000000..9add521cabb6 --- /dev/null +++ b/sys/powerpc/fpu/fpu_explode.c @@ -0,0 +1,259 @@ +/*	$NetBSD: fpu_explode.c,v 1.6 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FPU subroutines: `explode' the machine's `packed binary' format numbers + * into our internal format. + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> +#include <machine/ieee.h> +#include <machine/pcb.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> +#include <powerpc/fpu/fpu_extern.h> +#include <powerpc/fpu/fpu_instr.h> + +/* + * N.B.: in all of the following, we assume the FP format is + * + *	--------------------------- + *	| s | exponent | fraction | + *	--------------------------- + * + * (which represents -1**s * 1.fraction * 2**exponent), so that the + * sign bit is way at the top (bit 31), the exponent is next, and + * then the remaining bits mark the fraction.  A zero exponent means + * zero or denormalized (0.fraction rather than 1.fraction), and the + * maximum possible exponent, 2bias+1, signals inf (fraction==0) or NaN. + * + * Since the sign bit is always the topmost bit---this holds even for + * integers---we set that outside all the *tof functions.  Each function + * returns the class code for the new number (but note that we use + * FPC_QNAN for all NaNs; fpu_explode will fix this if appropriate). + */ + +/* + * int -> fpn. + */ +int +fpu_itof(struct fpn *fp, u_int i) +{ + +	if (i == 0) +		return (FPC_ZERO); +	/* +	 * The value FP_1 represents 2^FP_LG, so set the exponent +	 * there and let normalization fix it up.  Convert negative +	 * numbers to sign-and-magnitude.  Note that this relies on +	 * fpu_norm()'s handling of `supernormals'; see fpu_subr.c. +	 */ +	fp->fp_exp = FP_LG; +	fp->fp_mant[0] = (int)i < 0 ? -i : i; +	fp->fp_mant[1] = 0; +	fp->fp_mant[2] = 0; +	fp->fp_mant[3] = 0; +	fpu_norm(fp); +	return (FPC_NUM); +} + +/* + * 64-bit int -> fpn. + */ +int +fpu_xtof(struct fpn *fp, u_int64_t i) +{ + +	if (i == 0) +		return (FPC_ZERO); +	/* +	 * The value FP_1 represents 2^FP_LG, so set the exponent +	 * there and let normalization fix it up.  Convert negative +	 * numbers to sign-and-magnitude.  Note that this relies on +	 * fpu_norm()'s handling of `supernormals'; see fpu_subr.c. +	 */ +	fp->fp_exp = FP_LG2; +	*((int64_t*)fp->fp_mant) = (int64_t)i < 0 ? -i : i; +	fp->fp_mant[2] = 0; +	fp->fp_mant[3] = 0; +	fpu_norm(fp); +	return (FPC_NUM); +} + +#define	mask(nbits) ((1L << (nbits)) - 1) + +/* + * All external floating formats convert to internal in the same manner, + * as defined here.  Note that only normals get an implied 1.0 inserted. + */ +#define	FP_TOF(exp, expbias, allfrac, f0, f1, f2, f3) \ +	if (exp == 0) { \ +		if (allfrac == 0) \ +			return (FPC_ZERO); \ +		fp->fp_exp = 1 - expbias; \ +		fp->fp_mant[0] = f0; \ +		fp->fp_mant[1] = f1; \ +		fp->fp_mant[2] = f2; \ +		fp->fp_mant[3] = f3; \ +		fpu_norm(fp); \ +		return (FPC_NUM); \ +	} \ +	if (exp == (2 * expbias + 1)) { \ +		if (allfrac == 0) \ +			return (FPC_INF); \ +		fp->fp_mant[0] = f0; \ +		fp->fp_mant[1] = f1; \ +		fp->fp_mant[2] = f2; \ +		fp->fp_mant[3] = f3; \ +		return (FPC_QNAN); \ +	} \ +	fp->fp_exp = exp - expbias; \ +	fp->fp_mant[0] = FP_1 | f0; \ +	fp->fp_mant[1] = f1; \ +	fp->fp_mant[2] = f2; \ +	fp->fp_mant[3] = f3; \ +	return (FPC_NUM) + +/* + * 32-bit single precision -> fpn. + * We assume a single occupies at most (64-FP_LG) bits in the internal + * format: i.e., needs at most fp_mant[0] and fp_mant[1]. + */ +int +fpu_stof(struct fpn *fp, u_int i) +{ +	int exp; +	u_int frac, f0, f1; +#define SNG_SHIFT (SNG_FRACBITS - FP_LG) + +	exp = (i >> (32 - 1 - SNG_EXPBITS)) & mask(SNG_EXPBITS); +	frac = i & mask(SNG_FRACBITS); +	f0 = frac >> SNG_SHIFT; +	f1 = frac << (32 - SNG_SHIFT); +	FP_TOF(exp, SNG_EXP_BIAS, frac, f0, f1, 0, 0); +} + +/* + * 64-bit double -> fpn. + * We assume this uses at most (96-FP_LG) bits. + */ +int +fpu_dtof(struct fpn *fp, u_int i, u_int j) +{ +	int exp; +	u_int frac, f0, f1, f2; +#define DBL_SHIFT (DBL_FRACBITS - 32 - FP_LG) + +	exp = (i >> (32 - 1 - DBL_EXPBITS)) & mask(DBL_EXPBITS); +	frac = i & mask(DBL_FRACBITS - 32); +	f0 = frac >> DBL_SHIFT; +	f1 = (frac << (32 - DBL_SHIFT)) | (j >> DBL_SHIFT); +	f2 = j << (32 - DBL_SHIFT); +	frac |= j; +	FP_TOF(exp, DBL_EXP_BIAS, frac, f0, f1, f2, 0); +} + +/* + * Explode the contents of a register / regpair / regquad. + * If the input is a signalling NaN, an NV (invalid) exception + * will be set.  (Note that nothing but NV can occur until ALU + * operations are performed.) + */ +void +fpu_explode(struct fpemu *fe, struct fpn *fp, int type, int reg) +{ +	u_int s, *space; +	u_int64_t l, *xspace; + +	xspace = (u_int64_t *)&fe->fe_fpstate->fpr[reg].fpr; +	l = xspace[0]; +	space = (u_int *)&fe->fe_fpstate->fpr[reg].fpr; +	s = space[0]; +	fp->fp_sign = s >> 31; +	fp->fp_sticky = 0; +	switch (type) { +	case FTYPE_LNG: +		s = fpu_xtof(fp, l); +		break; + +	case FTYPE_INT: +		s = fpu_itof(fp, space[1]); +		break; + +	case FTYPE_SNG: +		s = fpu_stof(fp, s); +		break; + +	case FTYPE_DBL: +		s = fpu_dtof(fp, s, space[1]); +		break; + +	default: +		panic("fpu_explode"); +		panic("fpu_explode: invalid type %d", type); +	} + +	if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { +		/* +		 * Input is a signalling NaN.  All operations that return +		 * an input NaN operand put it through a ``NaN conversion'', +		 * which basically just means ``turn on the quiet bit''. +		 * We do this here so that all NaNs internally look quiet +		 * (we can tell signalling ones by their class). +		 */ +		fp->fp_mant[0] |= FP_QUIETBIT; +		fe->fe_cx = FPSCR_VXSNAN;	/* assert invalid operand */ +		s = FPC_SNAN; +	} +	fp->fp_class = s; +	DPRINTF(FPE_REG, ("fpu_explode: %%%c%d => ", (type == FTYPE_LNG) ? 'x' : +		((type == FTYPE_INT) ? 'i' :  +			((type == FTYPE_SNG) ? 's' : +				((type == FTYPE_DBL) ? 'd' : '?'))), +		reg)); +	DUMPFPN(FPE_REG, fp); +	DPRINTF(FPE_REG, ("\n")); +} diff --git a/sys/powerpc/fpu/fpu_extern.h b/sys/powerpc/fpu/fpu_extern.h new file mode 100644 index 000000000000..311a849be0d1 --- /dev/null +++ b/sys/powerpc/fpu/fpu_extern.h @@ -0,0 +1,55 @@ +/*	$NetBSD: fpu_extern.h,v 1.3 2005/12/11 12:18:42 christos Exp $	*/ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 1995 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +struct proc; +struct fpu; +struct trapframe; +union instr; +struct fpemu; +struct fpn; + +/* fpu.c */ +int fpu_emulate(struct trapframe *, struct fpu *); +int fpu_execute(struct trapframe *, struct fpemu *, union instr *); + +/* fpu_explode.c */ +int fpu_itof(struct fpn *, u_int); +int fpu_xtof(struct fpn *, u_int64_t); +int fpu_stof(struct fpn *, u_int); +int fpu_dtof(struct fpn *, u_int, u_int); + +/* fpu_implode.c */ +u_int fpu_ftoi(struct fpemu *, struct fpn *); +u_int fpu_ftox(struct fpemu *, struct fpn *, u_int *); +u_int fpu_ftos(struct fpemu *, struct fpn *); +u_int fpu_ftod(struct fpemu *, struct fpn *, u_int *); diff --git a/sys/powerpc/fpu/fpu_implode.c b/sys/powerpc/fpu/fpu_implode.c new file mode 100644 index 000000000000..51950816b0c3 --- /dev/null +++ b/sys/powerpc/fpu/fpu_implode.c @@ -0,0 +1,453 @@ +/*	$NetBSD: fpu_implode.c,v 1.6 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FPU subroutines: `implode' internal format numbers into the machine's + * `packed binary' format. + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> +#include <machine/ieee.h> +#include <machine/ieeefp.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> +#include <powerpc/fpu/fpu_extern.h> +#include <powerpc/fpu/fpu_instr.h> + +static int round(struct fpemu *, struct fpn *); +static int toinf(struct fpemu *, int); + +/* + * Round a number (algorithm from Motorola MC68882 manual, modified for + * our internal format).  Set inexact exception if rounding is required. + * Return true iff we rounded up. + * + * After rounding, we discard the guard and round bits by shifting right + * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky). + * This saves effort later. + * + * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's + * responsibility to fix this if necessary. + */ +static int +round(struct fpemu *fe, struct fpn *fp) +{ +	u_int m0, m1, m2, m3; +	int gr, s; +	FPU_DECL_CARRY; + +	m0 = fp->fp_mant[0]; +	m1 = fp->fp_mant[1]; +	m2 = fp->fp_mant[2]; +	m3 = fp->fp_mant[3]; +	gr = m3 & 3; +	s = fp->fp_sticky; + +	/* mant >>= FP_NG */ +	m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG)); +	m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG)); +	m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG)); +	m0 >>= FP_NG; + +	if ((gr | s) == 0)	/* result is exact: no rounding needed */ +		goto rounddown; + +	fe->fe_cx |= FPSCR_XX|FPSCR_FI;	/* inexact */ + +	/* Go to rounddown to round down; break to round up. */ +	switch ((fe->fe_fpscr) & FPSCR_RN) { +	case FP_RN: +	default: +		/* +		 * Round only if guard is set (gr & 2).  If guard is set, +		 * but round & sticky both clear, then we want to round +		 * but have a tie, so round to even, i.e., add 1 iff odd. +		 */ +		if ((gr & 2) == 0) +			goto rounddown; +		if ((gr & 1) || fp->fp_sticky || (m3 & 1)) +			break; +		goto rounddown; + +	case FP_RZ: +		/* Round towards zero, i.e., down. */ +		goto rounddown; + +	case FP_RM: +		/* Round towards -Inf: up if negative, down if positive. */ +		if (fp->fp_sign) +			break; +		goto rounddown; + +	case FP_RP: +		/* Round towards +Inf: up if positive, down otherwise. */ +		if (!fp->fp_sign) +			break; +		goto rounddown; +	} + +	/* Bump low bit of mantissa, with carry. */ +	fe->fe_cx |= FPSCR_FR; + +	FPU_ADDS(m3, m3, 1); +	FPU_ADDCS(m2, m2, 0); +	FPU_ADDCS(m1, m1, 0); +	FPU_ADDC(m0, m0, 0); +	fp->fp_mant[0] = m0; +	fp->fp_mant[1] = m1; +	fp->fp_mant[2] = m2; +	fp->fp_mant[3] = m3; +	return (1); + +rounddown: +	fp->fp_mant[0] = m0; +	fp->fp_mant[1] = m1; +	fp->fp_mant[2] = m2; +	fp->fp_mant[3] = m3; +	return (0); +} + +/* + * For overflow: return true if overflow is to go to +/-Inf, according + * to the sign of the overflowing result.  If false, overflow is to go + * to the largest magnitude value instead. + */ +static int +toinf(struct fpemu *fe, int sign) +{ +	int inf; + +	/* look at rounding direction */ +	switch ((fe->fe_fpscr) & FPSCR_RN) { +	default: +	case FP_RN:		/* the nearest value is always Inf */ +		inf = 1; +		break; + +	case FP_RZ:		/* toward 0 => never towards Inf */ +		inf = 0; +		break; + +	case FP_RP:		/* toward +Inf iff positive */ +		inf = sign == 0; +		break; + +	case FP_RM:		/* toward -Inf iff negative */ +		inf = sign; +		break; +	} +	if (inf) +		fe->fe_cx |= FPSCR_OX; +	return (inf); +} + +/* + * fpn -> int (int value returned as return value). + * + * N.B.: this conversion always rounds towards zero (this is a peculiarity + * of the SPARC instruction set). + */ +u_int +fpu_ftoi(struct fpemu *fe, struct fpn *fp) +{ +	u_int i; +	int sign, exp; + +	sign = fp->fp_sign; +	switch (fp->fp_class) { +	case FPC_ZERO: +		return (0); + +	case FPC_NUM: +		/* +		 * If exp >= 2^32, overflow.  Otherwise shift value right +		 * into last mantissa word (this will not exceed 0xffffffff), +		 * shifting any guard and round bits out into the sticky +		 * bit.  Then ``round'' towards zero, i.e., just set an +		 * inexact exception if sticky is set (see round()). +		 * If the result is > 0x80000000, or is positive and equals +		 * 0x80000000, overflow; otherwise the last fraction word +		 * is the result. +		 */ +		if ((exp = fp->fp_exp) >= 32) +			break; +		/* NB: the following includes exp < 0 cases */ +		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0) +			fe->fe_cx |= FPSCR_UX; +		i = fp->fp_mant[3]; +		if (i >= ((u_int)0x80000000 + sign)) +			break; +		return (sign ? -i : i); + +	default:		/* Inf, qNaN, sNaN */ +		break; +	} +	/* overflow: replace any inexact exception with invalid */ +	fe->fe_cx |= FPSCR_VXCVI; +	return (0x7fffffff + sign); +} + +/* + * fpn -> extended int (high bits of int value returned as return value). + * + * N.B.: this conversion always rounds towards zero (this is a peculiarity + * of the SPARC instruction set). + */ +u_int +fpu_ftox(struct fpemu *fe, struct fpn *fp, u_int *res) +{ +	u_int64_t i; +	int sign, exp; + +	sign = fp->fp_sign; +	switch (fp->fp_class) { +	case FPC_ZERO: +		res[1] = 0; +		return (0); + +	case FPC_NUM: +		/* +		 * If exp >= 2^64, overflow.  Otherwise shift value right +		 * into last mantissa word (this will not exceed 0xffffffffffffffff), +		 * shifting any guard and round bits out into the sticky +		 * bit.  Then ``round'' towards zero, i.e., just set an +		 * inexact exception if sticky is set (see round()). +		 * If the result is > 0x8000000000000000, or is positive and equals +		 * 0x8000000000000000, overflow; otherwise the last fraction word +		 * is the result. +		 */ +		if ((exp = fp->fp_exp) >= 64) +			break; +		/* NB: the following includes exp < 0 cases */ +		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0) +			fe->fe_cx |= FPSCR_UX; +		i = ((u_int64_t)fp->fp_mant[2]<<32)|fp->fp_mant[3]; +		if (i >= ((u_int64_t)0x8000000000000000LL + sign)) +			break; +		return (sign ? -i : i); + +	default:		/* Inf, qNaN, sNaN */ +		break; +	} +	/* overflow: replace any inexact exception with invalid */ +	fe->fe_cx |= FPSCR_VXCVI; +	return (0x7fffffffffffffffLL + sign); +} + +/* + * fpn -> single (32 bit single returned as return value). + * We assume <= 29 bits in a single-precision fraction (1.f part). + */ +u_int +fpu_ftos(struct fpemu *fe, struct fpn *fp) +{ +	u_int sign = fp->fp_sign << 31; +	int exp; + +#define	SNG_EXP(e)	((e) << SNG_FRACBITS)	/* makes e an exponent */ +#define	SNG_MASK	(SNG_EXP(1) - 1)	/* mask for fraction */ + +	/* Take care of non-numbers first. */ +	if (ISNAN(fp)) { +		/* +		 * Preserve upper bits of NaN, per SPARC V8 appendix N. +		 * Note that fp->fp_mant[0] has the quiet bit set, +		 * even if it is classified as a signalling NaN. +		 */ +		(void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS); +		exp = SNG_EXP_INFNAN; +		goto done; +	} +	if (ISINF(fp)) +		return (sign | SNG_EXP(SNG_EXP_INFNAN)); +	if (ISZERO(fp)) +		return (sign); + +	/* +	 * Normals (including subnormals).  Drop all the fraction bits +	 * (including the explicit ``implied'' 1 bit) down into the +	 * single-precision range.  If the number is subnormal, move +	 * the ``implied'' 1 into the explicit range as well, and shift +	 * right to introduce leading zeroes.  Rounding then acts +	 * differently for normals and subnormals: the largest subnormal +	 * may round to the smallest normal (1.0 x 2^minexp), or may +	 * remain subnormal.  In the latter case, signal an underflow +	 * if the result was inexact or if underflow traps are enabled. +	 * +	 * Rounding a normal, on the other hand, always produces another +	 * normal (although either way the result might be too big for +	 * single precision, and cause an overflow).  If rounding a +	 * normal produces 2.0 in the fraction, we need not adjust that +	 * fraction at all, since both 1.0 and 2.0 are zero under the +	 * fraction mask. +	 * +	 * Note that the guard and round bits vanish from the number after +	 * rounding. +	 */ +	if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) {	/* subnormal */ +		/* -NG for g,r; -SNG_FRACBITS-exp for fraction */ +		(void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); +		if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) +			return (sign | SNG_EXP(1) | 0); +		if ((fe->fe_cx & FPSCR_FI) || +		    (fe->fe_fpscr & FPSCR_UX)) +			fe->fe_cx |= FPSCR_UX; +		return (sign | SNG_EXP(0) | fp->fp_mant[3]); +	} +	/* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */ +	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS); +#ifdef DIAGNOSTIC +	if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0) +		panic("fpu_ftos"); +#endif +	if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2)) +		exp++; +	if (exp >= SNG_EXP_INFNAN) { +		/* overflow to inf or to max single */ +		if (toinf(fe, sign)) +			return (sign | SNG_EXP(SNG_EXP_INFNAN)); +		return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK); +	} +done: +	/* phew, made it */ +	return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK)); +} + +/* + * fpn -> double (32 bit high-order result returned; 32-bit low order result + * left in res[1]).  Assumes <= 61 bits in double precision fraction. + * + * This code mimics fpu_ftos; see it for comments. + */ +u_int +fpu_ftod(struct fpemu *fe, struct fpn *fp, u_int *res) +{ +	u_int sign = fp->fp_sign << 31; +	int exp; + +#define	DBL_EXP(e)	((e) << (DBL_FRACBITS & 31)) +#define	DBL_MASK	(DBL_EXP(1) - 1) + +	if (ISNAN(fp)) { +		(void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS); +		exp = DBL_EXP_INFNAN; +		goto done; +	} +	if (ISINF(fp)) { +		sign |= DBL_EXP(DBL_EXP_INFNAN); +		goto zero; +	} +	if (ISZERO(fp)) { +zero:		res[1] = 0; +		return (sign); +	} + +	if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { +		(void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); +		if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { +			res[1] = 0; +			return (sign | DBL_EXP(1) | 0); +		} +		if ((fe->fe_cx & FPSCR_FI) || +		    (fe->fe_fpscr & FPSCR_UX)) +			fe->fe_cx |= FPSCR_UX; +		exp = 0; +		goto done; +	} +	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS); +	if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2)) +		exp++; +	if (exp >= DBL_EXP_INFNAN) { +		fe->fe_cx |= FPSCR_OX | FPSCR_UX; +		if (toinf(fe, sign)) { +			res[1] = 0; +			return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); +		} +		res[1] = ~0; +		return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); +	} +done: +	res[1] = fp->fp_mant[3]; +	return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK)); +} + +/* + * Implode an fpn, writing the result into the given space. + */ +void +fpu_implode(struct fpemu *fe, struct fpn *fp, int type, u_int *space) +{ + +	switch (type) { +	case FTYPE_LNG: +		space[0] = fpu_ftox(fe, fp, space); +		DPRINTF(FPE_REG, ("fpu_implode: long %x %x\n", +			space[0], space[1])); +		break; + +	case FTYPE_INT: +		space[0] = 0; +		space[1] = fpu_ftoi(fe, fp); +		DPRINTF(FPE_REG, ("fpu_implode: int %x\n", +			space[1])); +		break; + +	case FTYPE_SNG: +		space[0] = fpu_ftos(fe, fp); +		DPRINTF(FPE_REG, ("fpu_implode: single %x\n", +			space[0])); +		break; + +	case FTYPE_DBL: +		space[0] = fpu_ftod(fe, fp, space); +		DPRINTF(FPE_REG, ("fpu_implode: double %x %x\n", +			space[0], space[1])); +		break;		break; + +	default: +		panic("fpu_implode: invalid type %d", type); +	} +} diff --git a/sys/powerpc/fpu/fpu_instr.h b/sys/powerpc/fpu/fpu_instr.h new file mode 100644 index 000000000000..912ab7659635 --- /dev/null +++ b/sys/powerpc/fpu/fpu_instr.h @@ -0,0 +1,383 @@ +/*	$NetBSD: instr.h,v 1.4 2005/12/11 12:18:43 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * An instruction. + */ +union instr { +	int	i_int;			/* as a whole */ + +	/* +	 * Any instruction type. +	 */ +	struct { +		u_int	i_opcd:6;	/* first-level decode */ +		u_int	:25; +		u_int	i_rc:1; +	} i_any; + +	/* +	 * Format A +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_frt:5; +		u_int	i_fra:5; +		u_int	i_frb:5; +		u_int	i_frc:5; +		u_int	i_xo:5; +		u_int	i_rc:1; +	} i_a; + +	/* +	 * Format B +	 */ +	struct { +		u_int	i_opcd:6; +		int	i_bo:5; +		int	i_bi:5; +		int	i_bd:14; +		int	i_aa:1; +		int	i_lk:1; +	} i_b; + +	/* +	 * Format D +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		int	i_d:16; +	} i_d; + +	/* +	 * Format DE +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		int	i_d:12; +		u_int	i_xo:4; +	} i_de; + +	/* +	 * Format I +	 */ +	struct { +		u_int	i_opcd:6; +		int	i_li:24; +		int	i_aa:1; +		int	i_lk:1; +	} i_i; + +	/* +	 * Format M +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		u_int	i_rb:5; +		int	i_mb:5; +		int	i_me:5; +		u_int	i_rc:1; +	} i_m; + +	/* +	 * Format MD +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		u_int	i_rb:5; +		int	i_sh1_5:5; +		int	i_mb:6; +		u_int	i_xo:3; +		int	i_sh0:2; +		u_int	i_rc:1; +	} i_md; + +	/* +	 * Format MDS +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		u_int	i_rb:5; +		int	i_sh:5; +		int	i_mb:6; +		u_int	i_xo:4; +		u_int	i_rc:1; +	} i_mds; + +	/* +	 * Format S +	 */ +	struct { +		u_int	i_opcd:6; +		int	:24; +		int	i_i:1; +		int	:1; +	} i_s; + +	/* +	 * Format X +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		u_int	i_rb:5; +		u_int	i_xo:10; +		u_int	i_rc:1; +	} i_x; + +	/* +	 * Format XFL +	 */ +	struct { +		u_int	i_opcd:6; +		int	:1; +		int	i_flm:8; +		int	:1; +		int	i_frb:5; +		u_int	i_xo:10; +		int	:1; +	} i_xfl; + +	/* +	 * Format XFX +	 */ +	struct { +		u_int	i_opcd:6; +		int	i_dcrn:10; +		u_int	i_xo:10; +		int	:1; +	} i_xfx; + +	/* +	 * Format XL +	 */ +	struct { +		u_int	i_opcd:6; +		int	i_bt:5; +		int	i_ba:5; +		int	i_bb:5; +		u_int	i_xo:10; +		int	i_lk:1; +	} i_xl; + +	/* +	 * Format XS +	 */ +	struct { +		u_int	i_opcd:6; +		u_int	i_rs:5; +		u_int	i_ra:5; +		int	i_sh0_4:5; +		u_int	i_xo:9; +		int	i_sh5:1; +		u_int	i_rc:1; +	} i_xs; + +}; + +#define	i_rt	i_rs + +/* + * Primary opcode numbers: + */ + +#define	OPC_TDI		0x02 +#define	OPC_TWI		0x03 +#define	OPC_MULLI	0x07 +#define	OPC_SUBFIC	0x08 +#define	OPC_BCE		0x09 +#define	OPC_CMPLI	0x0a +#define	OPC_CMPI	0x0b +#define	OPC_ADDIC	0x0c +#define	OPC_ADDIC_DOT	0x0d +#define	OPC_ADDI	0x0e +#define	OPC_ADDIS	0x0f +#define	OPC_BC		0x10 +#define	OPC_SC		0x11 +#define	OPC_B		0x12 +#define	OPC_branch_19	0x13 +#define	OPC_RLWIMI	0x14 +#define	OPC_RLWINM	0x15 +#define	OPC_BE		0x16 +#define	OPC_RLWNM	0x17 +#define	OPC_ORI		0x18 +#define	OPC_ORIS	0x19 +#define	OPC_XORI	0x1a +#define	OPC_XORIS	0x1b +#define	OPC_ANDI	0x1c +#define	OPC_ANDIS	0x1d +#define	OPC_dwe_rot_30	0x1e +#define	OPC_integer_31	0x1f +#define	OPC_LWZ		0x20 +#define	OPC_LWZU	0x21 +#define	OPC_LBZ		0x22 +#define	OPC_LBZU	0x23 +#define	OPC_STW		0x24 +#define	OPC_STWU	0x25 +#define	OPC_STB		0x26 +#define	OPC_STBU	0x27 +#define	OPC_LHZ		0x28 +#define	OPC_LHZU	0x29 +#define	OPC_LHA		0x2a +#define	OPC_LHAU	0x2b +#define	OPC_STH		0x2c +#define	OPC_STHU	0x2d +#define	OPC_LMW		0x2e +#define	OPC_STMW	0x2f +#define	OPC_LFS		0x30 +#define	OPC_LFSU	0x31 +#define	OPC_LFD		0x32 +#define	OPC_LFDU	0x33 +#define	OPC_STFS	0x34 +#define	OPC_STFSU	0x35 +#define	OPC_STFD	0x36 +#define	OPC_STFDU	0x37 +#define	OPC_load_st_58	0x3a +#define	OPC_sp_fp_59	0x3b +#define	OPC_load_st_62	0x3e +#define	OPC_dp_fp_63	0x3f + +/* + * Opcode 31 sub-types (FP only) + */ +#define	OPC31_TW	0x004 +#define	OPC31_LFSX	0x217 +#define	OPC31_LFSUX	0x237 +#define	OPC31_LFDX	0x257 +#define	OPC31_LFDUX	0x277 +#define	OPC31_STFSX	0x297 +#define	OPC31_STFSUX	0x2b7 +#define	OPC31_STFDX	0x2d7 +#define	OPC31_STFDUX	0x2f7 +#define	OPC31_STFIWX	0x3d7 + +/* Mask for all valid indexed FP load/store ops (except stfiwx) */ +#define	OPC31_FPMASK	0x31f +#define	OPC31_FPOP	0x217 + +/* + * Opcode 59 sub-types: + */ + +#define	OPC59_FDIVS	0x12 +#define	OPC59_FSUBS	0x14 +#define	OPC59_FADDS	0x15 +#define	OPC59_FSQRTS	0x16 +#define	OPC59_FRES	0x18 +#define	OPC59_FMULS	0x19 +#define	OPC59_FMSUBS	0x1c +#define	OPC59_FMADDS	0x1d +#define	OPC59_FNMSUBS	0x1e +#define	OPC59_FNMADDS	0x1f + +/* + * Opcode 62 sub-types: + */ +#define	OPC62_LDE	0x0 +#define	OPC62_LDEU	0x1 +#define	OPC62_LFSE	0x4 +#define	OPC62_LFSEU	0x5 +#define	OPC62_LFDE	0x6 +#define	OPC62_LFDEU	0x7 +#define	OPC62_STDE	0x8 +#define	OPC62_STDEU	0x9 +#define	OPC62_STFSE	0xc +#define	OPC62_STFSEU	0xd +#define	OPC62_STFDE	0xe +#define	OPC62_STFDEU	0xf + +/* + * Opcode 63 sub-types: + * + * (The first group are masks....) + */ + +#define	OPC63M_MASK	0x10 +#define	OPC63M_FDIV	0x12 +#define	OPC63M_FSUB	0x14 +#define	OPC63M_FADD	0x15 +#define	OPC63M_FSQRT	0x16 +#define	OPC63M_FSEL	0x17 +#define	OPC63M_FMUL	0x19 +#define	OPC63M_FRSQRTE	0x1a +#define	OPC63M_FMSUB	0x1c +#define	OPC63M_FMADD	0x1d +#define	OPC63M_FNMSUB	0x1e +#define	OPC63M_FNMADD	0x1f + +#define	OPC63_FCMPU	0x00 +#define	OPC63_FRSP	0x0c +#define	OPC63_FCTIW	0x0e +#define	OPC63_FCTIWZ	0x0f +#define	OPC63_FCMPO	0x20 +#define	OPC63_MTFSB1	0x26 +#define	OPC63_FNEG	0x28 +#define	OPC63_MCRFS	0x40 +#define	OPC63_MTFSB0	0x46 +#define	OPC63_FMR	0x48 +#define	OPC63_MTFSFI	0x86 +#define	OPC63_FNABS	0x88 +#define	OPC63_FABS	0x108 +#define	OPC63_MFFS	0x247 +#define	OPC63_MTFSF	0x2c7 +#define	OPC63_FCTID	0x32e +#define	OPC63_FCTIDZ	0x32f +#define	OPC63_FCFID	0x34e + +/* + * FPU data types. + */ +#define FTYPE_LNG	-1	/* data = 64-bit signed long integer */		 +#define	FTYPE_INT	0	/* data = 32-bit signed integer */ +#define	FTYPE_SNG	1	/* data = 32-bit float */ +#define	FTYPE_DBL	2	/* data = 64-bit double */ diff --git a/sys/powerpc/fpu/fpu_mul.c b/sys/powerpc/fpu/fpu_mul.c new file mode 100644 index 000000000000..3e73c98251e1 --- /dev/null +++ b/sys/powerpc/fpu/fpu_mul.c @@ -0,0 +1,235 @@ +/*	$NetBSD: fpu_mul.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */ + +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Perform an FPU multiply (return x * y). + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> + +/* + * The multiplication algorithm for normal numbers is as follows: + * + * The fraction of the product is built in the usual stepwise fashion. + * Each step consists of shifting the accumulator right one bit + * (maintaining any guard bits) and, if the next bit in y is set, + * adding the multiplicand (x) to the accumulator.  Then, in any case, + * we advance one bit leftward in y.  Algorithmically: + * + *	A = 0; + *	for (bit = 0; bit < FP_NMANT; bit++) { + *		sticky |= A & 1, A >>= 1; + *		if (Y & (1 << bit)) + *			A += X; + *	} + * + * (X and Y here represent the mantissas of x and y respectively.) + * The resultant accumulator (A) is the product's mantissa.  It may + * be as large as 11.11111... in binary and hence may need to be + * shifted right, but at most one bit. + * + * Since we do not have efficient multiword arithmetic, we code the + * accumulator as four separate words, just like any other mantissa. + * We use local variables in the hope that this is faster than memory. + * We keep x->fp_mant in locals for the same reason. + * + * In the algorithm above, the bits in y are inspected one at a time. + * We will pick them up 32 at a time and then deal with those 32, one + * at a time.  Note, however, that we know several things about y: + * + *    - the guard and round bits at the bottom are sure to be zero; + * + *    - often many low bits are zero (y is often from a single or double + *	precision source); + * + *    - bit FP_NMANT-1 is set, and FP_1*2 fits in a word. + * + * We can also test for 32-zero-bits swiftly.  In this case, the center + * part of the loop---setting sticky, shifting A, and not adding---will + * run 32 times without adding X to A.  We can do a 32-bit shift faster + * by simply moving words.  Since zeros are common, we optimize this case. + * Furthermore, since A is initially zero, we can omit the shift as well + * until we reach a nonzero word. + */ +struct fpn * +fpu_mul(struct fpemu *fe) +{ +	struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; +	u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m; +	int sticky; +	FPU_DECL_CARRY; + +	/* +	 * Put the `heavier' operand on the right (see fpu_emu.h). +	 * Then we will have one of the following cases, taken in the +	 * following order: +	 * +	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is. +	 *	The result is y. +	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN +	 *    case was taken care of earlier). +	 *	If x = 0, the result is NaN.  Otherwise the result +	 *	is y, with its sign reversed if x is negative. +	 *  - x = 0.  Implied: y is 0 or number. +	 *	The result is 0 (with XORed sign as usual). +	 *  - other.  Implied: both x and y are numbers. +	 *	The result is x * y (XOR sign, multiply bits, add exponents). +	 */ +	DPRINTF(FPE_REG, ("fpu_mul:\n")); +	DUMPFPN(FPE_REG, x); +	DUMPFPN(FPE_REG, y); +	DPRINTF(FPE_REG, ("=>\n")); + +	ORDER(x, y); +	if (ISNAN(y)) { +		y->fp_sign ^= x->fp_sign; +		fe->fe_cx |= FPSCR_VXSNAN; +		DUMPFPN(FPE_REG, y); +		return (y); +	} +	if (ISINF(y)) { +		if (ISZERO(x)) { +			fe->fe_cx |= FPSCR_VXIMZ; +			return (fpu_newnan(fe)); +		} +		y->fp_sign ^= x->fp_sign; +			DUMPFPN(FPE_REG, y); +		return (y); +	} +	if (ISZERO(x)) { +		x->fp_sign ^= y->fp_sign; +		DUMPFPN(FPE_REG, x); +		return (x); +	} + +	/* +	 * Setup.  In the code below, the mask `m' will hold the current +	 * mantissa byte from y.  The variable `bit' denotes the bit +	 * within m.  We also define some macros to deal with everything. +	 */ +	x3 = x->fp_mant[3]; +	x2 = x->fp_mant[2]; +	x1 = x->fp_mant[1]; +	x0 = x->fp_mant[0]; +	sticky = a3 = a2 = a1 = a0 = 0; + +#define	ADD	/* A += X */ \ +	FPU_ADDS(a3, a3, x3); \ +	FPU_ADDCS(a2, a2, x2); \ +	FPU_ADDCS(a1, a1, x1); \ +	FPU_ADDC(a0, a0, x0) + +#define	SHR1	/* A >>= 1, with sticky */ \ +	sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \ +	a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1 + +#define	SHR32	/* A >>= 32, with sticky */ \ +	sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0 + +#define	STEP	/* each 1-bit step of the multiplication */ \ +	SHR1; if (bit & m) { ADD; }; bit <<= 1 + +	/* +	 * We are ready to begin.  The multiply loop runs once for each +	 * of the four 32-bit words.  Some words, however, are special. +	 * As noted above, the low order bits of Y are often zero.  Even +	 * if not, the first loop can certainly skip the guard bits. +	 * The last word of y has its highest 1-bit in position FP_NMANT-1, +	 * so we stop the loop when we move past that bit. +	 */ +	if ((m = y->fp_mant[3]) == 0) { +		/* SHR32; */			/* unneeded since A==0 */ +	} else { +		bit = 1 << FP_NG; +		do { +			STEP; +		} while (bit != 0); +	} +	if ((m = y->fp_mant[2]) == 0) { +		SHR32; +	} else { +		bit = 1; +		do { +			STEP; +		} while (bit != 0); +	} +	if ((m = y->fp_mant[1]) == 0) { +		SHR32; +	} else { +		bit = 1; +		do { +			STEP; +		} while (bit != 0); +	} +	m = y->fp_mant[0];		/* definitely != 0 */ +	bit = 1; +	do { +		STEP; +	} while (bit <= m); + +	/* +	 * Done with mantissa calculation.  Get exponent and handle +	 * 11.111...1 case, then put result in place.  We reuse x since +	 * it already has the right class (FP_NUM). +	 */ +	m = x->fp_exp + y->fp_exp; +	if (a0 >= FP_2) { +		SHR1; +		m++; +	} +	x->fp_sign ^= y->fp_sign; +	x->fp_exp = m; +	x->fp_sticky = sticky; +	x->fp_mant[3] = a3; +	x->fp_mant[2] = a2; +	x->fp_mant[1] = a1; +	x->fp_mant[0] = a0; + +	DUMPFPN(FPE_REG, x); +	return (x); +} diff --git a/sys/powerpc/fpu/fpu_sqrt.c b/sys/powerpc/fpu/fpu_sqrt.c new file mode 100644 index 000000000000..7d6cdcb1b72e --- /dev/null +++ b/sys/powerpc/fpu/fpu_sqrt.c @@ -0,0 +1,411 @@ +/*	$NetBSD: fpu_sqrt.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Perform an FPU square root (return sqrt(x)). + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> + +/* + * Our task is to calculate the square root of a floating point number x0. + * This number x normally has the form: + * + *		    exp + *	x = mant * 2		(where 1 <= mant < 2 and exp is an integer) + * + * This can be left as it stands, or the mantissa can be doubled and the + * exponent decremented: + * + *			  exp-1 + *	x = (2 * mant) * 2	(where 2 <= 2 * mant < 4) + * + * If the exponent `exp' is even, the square root of the number is best + * handled using the first form, and is by definition equal to: + * + *				exp/2 + *	sqrt(x) = sqrt(mant) * 2 + * + * If exp is odd, on the other hand, it is convenient to use the second + * form, giving: + * + *				    (exp-1)/2 + *	sqrt(x) = sqrt(2 * mant) * 2 + * + * In the first case, we have + * + *	1 <= mant < 2 + * + * and therefore + * + *	sqrt(1) <= sqrt(mant) < sqrt(2) + * + * while in the second case we have + * + *	2 <= 2*mant < 4 + * + * and therefore + * + *	sqrt(2) <= sqrt(2*mant) < sqrt(4) + * + * so that in any case, we are sure that + * + *	sqrt(1) <= sqrt(n * mant) < sqrt(4),	n = 1 or 2 + * + * or + * + *	1 <= sqrt(n * mant) < 2,		n = 1 or 2. + * + * This root is therefore a properly formed mantissa for a floating + * point number.  The exponent of sqrt(x) is either exp/2 or (exp-1)/2 + * as above.  This leaves us with the problem of finding the square root + * of a fixed-point number in the range [1..4). + * + * Though it may not be instantly obvious, the following square root + * algorithm works for any integer x of an even number of bits, provided + * that no overflows occur: + * + *	let q = 0 + *	for k = NBITS-1 to 0 step -1 do -- for each digit in the answer... + *		x *= 2			-- multiply by radix, for next digit + *		if x >= 2q + 2^k then	-- if adding 2^k does not + *			x -= 2q + 2^k	-- exceed the correct root, + *			q += 2^k	-- add 2^k and adjust x + *		fi + *	done + *	sqrt = q / 2^(NBITS/2)		-- (and any remainder is in x) + * + * If NBITS is odd (so that k is initially even), we can just add another + * zero bit at the top of x.  Doing so means that q is not going to acquire + * a 1 bit in the first trip around the loop (since x0 < 2^NBITS).  If the + * final value in x is not needed, or can be off by a factor of 2, this is + * equivalant to moving the `x *= 2' step to the bottom of the loop: + * + *	for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done + * + * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2). + * (Since the algorithm is destructive on x, we will call x's initial + * value, for which q is some power of two times its square root, x0.) + * + * If we insert a loop invariant y = 2q, we can then rewrite this using + * C notation as: + * + *	q = y = 0; x = x0; + *	for (k = NBITS; --k >= 0;) { + * #if (NBITS is even) + *		x *= 2; + * #endif + *		t = y + (1 << k); + *		if (x >= t) { + *			x -= t; + *			q += 1 << k; + *			y += 1 << (k + 1); + *		} + * #if (NBITS is odd) + *		x *= 2; + * #endif + *	} + * + * If x0 is fixed point, rather than an integer, we can simply alter the + * scale factor between q and sqrt(x0).  As it happens, we can easily arrange + * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q. + * + * In our case, however, x0 (and therefore x, y, q, and t) are multiword + * integers, which adds some complication.  But note that q is built one + * bit at a time, from the top down, and is not used itself in the loop + * (we use 2q as held in y instead).  This means we can build our answer + * in an integer, one word at a time, which saves a bit of work.  Also, + * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are + * `new' bits in y and we can set them with an `or' operation rather than + * a full-blown multiword add. + * + * We are almost done, except for one snag.  We must prove that none of our + * intermediate calculations can overflow.  We know that x0 is in [1..4) + * and therefore the square root in q will be in [1..2), but what about x, + * y, and t? + * + * We know that y = 2q at the beginning of each loop.  (The relation only + * fails temporarily while y and q are being updated.)  Since q < 2, y < 4. + * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and. + * Furthermore, we can prove with a bit of work that x never exceeds y by + * more than 2, so that even after doubling, 0 <= x < 8.  (This is left as + * an exercise to the reader, mostly because I have become tired of working + * on this comment.) + * + * If our floating point mantissas (which are of the form 1.frac) occupy + * B+1 bits, our largest intermediary needs at most B+3 bits, or two extra. + * In fact, we want even one more bit (for a carry, to avoid compares), or + * three extra.  There is a comment in fpu_emu.h reminding maintainers of + * this, so we have some justification in assuming it. + */ +struct fpn * +fpu_sqrt(struct fpemu *fe) +{ +	struct fpn *x = &fe->fe_f1; +	u_int bit, q, tt; +	u_int x0, x1, x2, x3; +	u_int y0, y1, y2, y3; +	u_int d0, d1, d2, d3; +	int e; +	FPU_DECL_CARRY; + +	/* +	 * Take care of special cases first.  In order: +	 * +	 *	sqrt(NaN) = NaN +	 *	sqrt(+0) = +0 +	 *	sqrt(-0) = -0 +	 *	sqrt(x < 0) = NaN	(including sqrt(-Inf)) +	 *	sqrt(+Inf) = +Inf +	 * +	 * Then all that remains are numbers with mantissas in [1..2). +	 */ +	DPRINTF(FPE_REG, ("fpu_sqer:\n")); +	DUMPFPN(FPE_REG, x); +	DPRINTF(FPE_REG, ("=>\n")); +	if (ISNAN(x)) { +		fe->fe_cx |= FPSCR_VXSNAN; +		DUMPFPN(FPE_REG, x); +		return (x); +	} +	if (ISZERO(x)) { +		fe->fe_cx |= FPSCR_ZX; +		x->fp_class = FPC_INF; +		DUMPFPN(FPE_REG, x); +		return (x); +	} +	if (x->fp_sign) { +		fe->fe_cx |= FPSCR_VXSQRT; +		return (fpu_newnan(fe)); +	} +	if (ISINF(x)) { +		DUMPFPN(FPE_REG, x); +		return (x); +	} + +	/* +	 * Calculate result exponent.  As noted above, this may involve +	 * doubling the mantissa.  We will also need to double x each +	 * time around the loop, so we define a macro for this here, and +	 * we break out the multiword mantissa. +	 */ +#ifdef FPU_SHL1_BY_ADD +#define	DOUBLE_X { \ +	FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \ +	FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \ +} +#else +#define	DOUBLE_X { \ +	x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \ +	x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \ +} +#endif +#if (FP_NMANT & 1) != 0 +# define ODD_DOUBLE	DOUBLE_X +# define EVEN_DOUBLE	/* nothing */ +#else +# define ODD_DOUBLE	/* nothing */ +# define EVEN_DOUBLE	DOUBLE_X +#endif +	x0 = x->fp_mant[0]; +	x1 = x->fp_mant[1]; +	x2 = x->fp_mant[2]; +	x3 = x->fp_mant[3]; +	e = x->fp_exp; +	if (e & 1)		/* exponent is odd; use sqrt(2mant) */ +		DOUBLE_X; +	/* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */ +	x->fp_exp = e >> 1;	/* calculates (e&1 ? (e-1)/2 : e/2 */ + +	/* +	 * Now calculate the mantissa root.  Since x is now in [1..4), +	 * we know that the first trip around the loop will definitely +	 * set the top bit in q, so we can do that manually and start +	 * the loop at the next bit down instead.  We must be sure to +	 * double x correctly while doing the `known q=1.0'. +	 * +	 * We do this one mantissa-word at a time, as noted above, to +	 * save work.  To avoid `(1U << 31) << 1', we also do the top bit +	 * outside of each per-word loop. +	 * +	 * The calculation `t = y + bit' breaks down into `t0 = y0, ..., +	 * t3 = y3, t? |= bit' for the appropriate word.  Since the bit +	 * is always a `new' one, this means that three of the `t?'s are +	 * just the corresponding `y?'; we use `#define's here for this. +	 * The variable `tt' holds the actual `t?' variable. +	 */ + +	/* calculate q0 */ +#define	t0 tt +	bit = FP_1; +	EVEN_DOUBLE; +	/* if (x >= (t0 = y0 | bit)) { */	/* always true */ +		q = bit; +		x0 -= bit; +		y0 = bit << 1; +	/* } */ +	ODD_DOUBLE; +	while ((bit >>= 1) != 0) {	/* for remaining bits in q0 */ +		EVEN_DOUBLE; +		t0 = y0 | bit;		/* t = y + bit */ +		if (x0 >= t0) {		/* if x >= t then */ +			x0 -= t0;	/*	x -= t */ +			q |= bit;	/*	q += bit */ +			y0 |= bit << 1;	/*	y += bit << 1 */ +		} +		ODD_DOUBLE; +	} +	x->fp_mant[0] = q; +#undef t0 + +	/* calculate q1.  note (y0&1)==0. */ +#define t0 y0 +#define t1 tt +	q = 0; +	y1 = 0; +	bit = 1 << 31; +	EVEN_DOUBLE; +	t1 = bit; +	FPU_SUBS(d1, x1, t1); +	FPU_SUBC(d0, x0, t0);		/* d = x - t */ +	if ((int)d0 >= 0) {		/* if d >= 0 (i.e., x >= t) then */ +		x0 = d0, x1 = d1;	/*	x -= t */ +		q = bit;		/*	q += bit */ +		y0 |= 1;		/*	y += bit << 1 */ +	} +	ODD_DOUBLE; +	while ((bit >>= 1) != 0) {	/* for remaining bits in q1 */ +		EVEN_DOUBLE;		/* as before */ +		t1 = y1 | bit; +		FPU_SUBS(d1, x1, t1); +		FPU_SUBC(d0, x0, t0); +		if ((int)d0 >= 0) { +			x0 = d0, x1 = d1; +			q |= bit; +			y1 |= bit << 1; +		} +		ODD_DOUBLE; +	} +	x->fp_mant[1] = q; +#undef t1 + +	/* calculate q2.  note (y1&1)==0; y0 (aka t0) is fixed. */ +#define t1 y1 +#define t2 tt +	q = 0; +	y2 = 0; +	bit = 1 << 31; +	EVEN_DOUBLE; +	t2 = bit; +	FPU_SUBS(d2, x2, t2); +	FPU_SUBCS(d1, x1, t1); +	FPU_SUBC(d0, x0, t0); +	if ((int)d0 >= 0) { +		x0 = d0, x1 = d1, x2 = d2; +		q = bit; +		y1 |= 1;		/* now t1, y1 are set in concrete */ +	} +	ODD_DOUBLE; +	while ((bit >>= 1) != 0) { +		EVEN_DOUBLE; +		t2 = y2 | bit; +		FPU_SUBS(d2, x2, t2); +		FPU_SUBCS(d1, x1, t1); +		FPU_SUBC(d0, x0, t0); +		if ((int)d0 >= 0) { +			x0 = d0, x1 = d1, x2 = d2; +			q |= bit; +			y2 |= bit << 1; +		} +		ODD_DOUBLE; +	} +	x->fp_mant[2] = q; +#undef t2 + +	/* calculate q3.  y0, t0, y1, t1 all fixed; y2, t2, almost done. */ +#define t2 y2 +#define t3 tt +	q = 0; +	y3 = 0; +	bit = 1 << 31; +	EVEN_DOUBLE; +	t3 = bit; +	FPU_SUBS(d3, x3, t3); +	FPU_SUBCS(d2, x2, t2); +	FPU_SUBCS(d1, x1, t1); +	FPU_SUBC(d0, x0, t0); +	if ((int)d0 >= 0) { +		x0 = d0, x1 = d1, x2 = d2; x3 = d3; +		q = bit; +		y2 |= 1; +	} +	ODD_DOUBLE; +	while ((bit >>= 1) != 0) { +		EVEN_DOUBLE; +		t3 = y3 | bit; +		FPU_SUBS(d3, x3, t3); +		FPU_SUBCS(d2, x2, t2); +		FPU_SUBCS(d1, x1, t1); +		FPU_SUBC(d0, x0, t0); +		if ((int)d0 >= 0) { +			x0 = d0, x1 = d1, x2 = d2; x3 = d3; +			q |= bit; +			y3 |= bit << 1; +		} +		ODD_DOUBLE; +	} +	x->fp_mant[3] = q; + +	/* +	 * The result, which includes guard and round bits, is exact iff +	 * x is now zero; any nonzero bits in x represent sticky bits. +	 */ +	x->fp_sticky = x0 | x1 | x2 | x3; +	DUMPFPN(FPE_REG, x); +	return (x); +} diff --git a/sys/powerpc/fpu/fpu_subr.c b/sys/powerpc/fpu/fpu_subr.c new file mode 100644 index 000000000000..1da4f6e6c03a --- /dev/null +++ b/sys/powerpc/fpu/fpu_subr.c @@ -0,0 +1,216 @@ +/*	$NetBSD: fpu_subr.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */ + +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1992, 1993 + *	The Regents of the University of California.  All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + *	This product includes software developed by the University of + *	California, Lawrence Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FPU subroutines. + */ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/fpu.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> + +/* + * Shift the given number right rsh bits.  Any bits that `fall off' will get + * shoved into the sticky field; we return the resulting sticky.  Note that + * shifting NaNs is legal (this will never shift all bits out); a NaN's + * sticky field is ignored anyway. + */ +int +fpu_shr(struct fpn *fp, int rsh) +{ +	u_int m0, m1, m2, m3, s; +	int lsh; + +#ifdef DIAGNOSTIC +	if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp))) +		panic("fpu_rightshift 1"); +#endif + +	m0 = fp->fp_mant[0]; +	m1 = fp->fp_mant[1]; +	m2 = fp->fp_mant[2]; +	m3 = fp->fp_mant[3]; + +	/* If shifting all the bits out, take a shortcut. */ +	if (rsh >= FP_NMANT) { +#ifdef DIAGNOSTIC +		if ((m0 | m1 | m2 | m3) == 0) +			panic("fpu_rightshift 2"); +#endif +		fp->fp_mant[0] = 0; +		fp->fp_mant[1] = 0; +		fp->fp_mant[2] = 0; +		fp->fp_mant[3] = 0; +#ifdef notdef +		if ((m0 | m1 | m2 | m3) == 0) +			fp->fp_class = FPC_ZERO; +		else +#endif +			fp->fp_sticky = 1; +		return (1); +	} + +	/* Squish out full words. */ +	s = fp->fp_sticky; +	if (rsh >= 32 * 3) { +		s |= m3 | m2 | m1; +		m3 = m0, m2 = 0, m1 = 0, m0 = 0; +	} else if (rsh >= 32 * 2) { +		s |= m3 | m2; +		m3 = m1, m2 = m0, m1 = 0, m0 = 0; +	} else if (rsh >= 32) { +		s |= m3; +		m3 = m2, m2 = m1, m1 = m0, m0 = 0; +	} + +	/* Handle any remaining partial word. */ +	if ((rsh &= 31) != 0) { +		lsh = 32 - rsh; +		s |= m3 << lsh; +		m3 = (m3 >> rsh) | (m2 << lsh); +		m2 = (m2 >> rsh) | (m1 << lsh); +		m1 = (m1 >> rsh) | (m0 << lsh); +		m0 >>= rsh; +	} +	fp->fp_mant[0] = m0; +	fp->fp_mant[1] = m1; +	fp->fp_mant[2] = m2; +	fp->fp_mant[3] = m3; +	fp->fp_sticky = s; +	return (s); +} + +/* + * Force a number to be normal, i.e., make its fraction have all zero + * bits before FP_1, then FP_1, then all 1 bits.  This is used for denorms + * and (sometimes) for intermediate results. + * + * Internally, this may use a `supernormal' -- a number whose fp_mant + * is greater than or equal to 2.0 -- so as a side effect you can hand it + * a supernormal and it will fix it (provided fp->fp_mant[3] == 0). + */ +void +fpu_norm(struct fpn *fp) +{ +	u_int m0, m1, m2, m3, top, sup, nrm; +	int lsh, rsh, exp; + +	exp = fp->fp_exp; +	m0 = fp->fp_mant[0]; +	m1 = fp->fp_mant[1]; +	m2 = fp->fp_mant[2]; +	m3 = fp->fp_mant[3]; + +	/* Handle severe subnormals with 32-bit moves. */ +	if (m0 == 0) { +		if (m1) +			m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32; +		else if (m2) +			m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32; +		else if (m3) +			m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32; +		else { +			fp->fp_class = FPC_ZERO; +			return; +		} +	} + +	/* Now fix any supernormal or remaining subnormal. */ +	nrm = FP_1; +	sup = nrm << 1; +	if (m0 >= sup) { +		/* +		 * We have a supernormal number.  We need to shift it right. +		 * We may assume m3==0. +		 */ +		for (rsh = 1, top = m0 >> 1; top >= sup; rsh++)	/* XXX slow */ +			top >>= 1; +		exp += rsh; +		lsh = 32 - rsh; +		m3 = m2 << lsh; +		m2 = (m2 >> rsh) | (m1 << lsh); +		m1 = (m1 >> rsh) | (m0 << lsh); +		m0 = top; +	} else if (m0 < nrm) { +		/* +		 * We have a regular denorm (a subnormal number), and need +		 * to shift it left. +		 */ +		for (lsh = 1, top = m0 << 1; top < nrm; lsh++)	/* XXX slow */ +			top <<= 1; +		exp -= lsh; +		rsh = 32 - lsh; +		m0 = top | (m1 >> rsh); +		m1 = (m1 << lsh) | (m2 >> rsh); +		m2 = (m2 << lsh) | (m3 >> rsh); +		m3 <<= lsh; +	} + +	fp->fp_exp = exp; +	fp->fp_mant[0] = m0; +	fp->fp_mant[1] = m1; +	fp->fp_mant[2] = m2; +	fp->fp_mant[3] = m3; +} + +/* + * Concoct a `fresh' Quiet NaN per Appendix N. + * As a side effect, we set NV (invalid) for the current exceptions. + */ +struct fpn * +fpu_newnan(struct fpemu *fe) +{ +	struct fpn *fp; + +	fe->fe_cx |= FPSCR_VXSNAN; +	fp = &fe->fe_f3; +	fp->fp_class = FPC_QNAN; +	fp->fp_sign = 0; +	fp->fp_mant[0] = FP_1 - 1; +	fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0; +	DUMPFPN(FPE_REG, fp); +	return (fp); +} | 
