diff options
Diffstat (limited to 'sys/gnu/fpemul/reg_round.s')
| -rw-r--r-- | sys/gnu/fpemul/reg_round.s | 653 |
1 files changed, 653 insertions, 0 deletions
diff --git a/sys/gnu/fpemul/reg_round.s b/sys/gnu/fpemul/reg_round.s new file mode 100644 index 000000000000..7490bbb31d7e --- /dev/null +++ b/sys/gnu/fpemul/reg_round.s @@ -0,0 +1,653 @@ + .file "reg_round.S" +/* + * reg_round.S + * + * Rounding/truncation/etc for FPU basic arithmetic functions. + * + * This code has four possible entry points. + * The following must be entered by a jmp intruction: + * FPU_round, FPU_round_sqrt, and FPU_Arith_exit. + * + * The _round_reg entry point is intended to be used by C code. + * From C, call as: + * void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) + * + * + * Copyright (C) 1992,1993,1994 + * W. Metzenthen, 22 Parker St, Ormond, Vic 3163, + * Australia. E-mail billm@vaxc.cc.monash.edu.au + * All rights reserved. + * + * This copyright notice covers the redistribution and use of the + * FPU emulator developed by W. Metzenthen. It covers only its use + * in the 386BSD, FreeBSD and NetBSD operating systems. Any other + * use is not permitted under this copyright. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must include information specifying + * that source code for the emulator is freely available and include + * either: + * a) an offer to provide the source code for a nominal distribution + * fee, or + * b) list at least two alternative methods whereby the source + * can be obtained, e.g. a publically accessible bulletin board + * and an anonymous ftp site from which the software can be + * downloaded. + * 3. All advertising materials specifically mentioning features or use of + * this emulator must acknowledge that it was developed by W. Metzenthen. + * 4. The name of W. Metzenthen may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * The purpose of this copyright, based upon the Berkeley copyright, is to + * ensure that the covered software remains freely available to everyone. + * + * The software (with necessary differences) is also available, but under + * the terms of the GNU copyleft, for the Linux operating system and for + * the djgpp ms-dos extender. + * + * W. Metzenthen June 1994. + * + * + * $Id: reg_round.s,v 1.3 1994/06/10 07:44:55 rich Exp $ + * + */ + + +/*---------------------------------------------------------------------------+ + | Four entry points. | + | | + | Needed by both the FPU_round and FPU_round_sqrt entry points: | + | %eax:%ebx 64 bit significand | + | %edx 32 bit extension of the significand | + | %edi pointer to an FPU_REG for the result to be stored | + | stack calling function must have set up a C stack frame and | + | pushed %esi, %edi, and %ebx | + | | + | Needed just for the FPU_round_sqrt entry point: | + | %cx A control word in the same format as the FPU control word. | + | Otherwise, PARAM4 must give such a value. | + | | + | | + | The significand and its extension are assumed to be exact in the | + | following sense: | + | If the significand by itself is the exact result then the significand | + | extension (%edx) must contain 0, otherwise the significand extension | + | must be non-zero. | + | If the significand extension is non-zero then the significand is | + | smaller than the magnitude of the correct exact result by an amount | + | greater than zero and less than one ls bit of the significand. | + | The significand extension is only required to have three possible | + | non-zero values: | + | less than 0x80000000 <=> the significand is less than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | + | smaller than the magnitude of the true | + | exact result. | + | greater than 0x80000000 <=> the significand is more than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The code in this module has become quite complex, but it should handle | + | all of the FPU flags which are set at this stage of the basic arithmetic | + | computations. | + | There are a few rare cases where the results are not set identically to | + | a real FPU. These require a bit more thought because at this stage the | + | results of the code here appear to be more consistent... | + | This may be changed in a future version. | + +---------------------------------------------------------------------------*/ + + +#include "fpu_asm.h" +#include "exception.h" +#include "control_w.h" + +#define LOST_DOWN $1 +#define LOST_UP $2 +#define DENORMAL $1 +#define UNMASKED_UNDERFLOW $2 + +.data + .align 2,0 +FPU_bits_lost: + .byte 0 +FPU_denormal: + .byte 0 + +.text + .align 2,144 +.globl FPU_round +.globl FPU_round_sqrt +.globl FPU_Arith_exit +.globl _round_reg + +/* Entry point when called from C */ +_round_reg: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%edi + movl SIGH(%edi),%eax + movl SIGL(%edi),%ebx + movl PARAM2,%edx + movl PARAM3,%ecx + jmp FPU_round_sqrt + +FPU_round: /* Normal entry point */ + movl PARAM4,%ecx + +FPU_round_sqrt: /* Entry point from wm_sqrt.S */ + +#ifdef PARANOID +/* Cannot use this here yet */ +/* orl %eax,%eax */ +/* jns L_entry_bugged */ +#endif PARANOID + + cmpl EXP_UNDER,EXP(%edi) + jle xMake_denorm /* The number is a de-normal*/ + + movb $0,FPU_denormal /* 0 -> not a de-normal*/ + +xDenorm_done: + movb $0,FPU_bits_lost /*No bits yet lost in rounding*/ + + movl %ecx,%esi + andl CW_PC,%ecx + cmpl PR_64_BITS,%ecx + je LRound_To_64 + + cmpl PR_53_BITS,%ecx + je LRound_To_53 + + cmpl PR_24_BITS,%ecx + je LRound_To_24 + +#ifdef PARANOID + jmp L_bugged /* There is no bug, just a bad control word */ +#endif PARANOID + + +/* Round etc to 24 bit precision */ +LRound_To_24: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_24 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_24 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_24 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_24 + +#ifdef PARANOID + jmp L_bugged +#endif PARANOID + +LUp_24: + cmpb SIGN_POS,SIGN(%edi) + jne LCheck_truncate_24 /* If negative then up==truncate */ + + jmp LCheck_24_round_up + +LDown_24: + cmpb SIGN_POS,SIGN(%edi) + je LCheck_truncate_24 /* If positive then down==truncate */ + +LCheck_24_round_up: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jnz LDo_24_round_up + jmp LRe_normalise + +LRound_nearest_24: + /* Do rounding of the 24th bit if needed (nearest or even) */ + movl %eax,%ecx + andl $0x000000ff,%ecx + cmpl $0x00000080,%ecx + jc LCheck_truncate_24 /*less than half, no increment needed*/ + + jne LGreater_Half_24 /* greater than half, increment needed*/ + + /* Possibly half, we need to check the ls bits */ + orl %ebx,%ebx + jnz LGreater_Half_24 /* greater than half, increment needed*/ + + orl %edx,%edx + jnz LGreater_Half_24 /* greater than half, increment needed*/ + + /* Exactly half, increment only if 24th bit is 1 (round to even)*/ + testl $0x00000100,%eax + jz LDo_truncate_24 + +LGreater_Half_24: /*Rounding: increment at the 24th bit*/ +LDo_24_round_up: + andl $0xffffff00,%eax /*Truncate to 24 bits*/ + xorl %ebx,%ebx + movb LOST_UP,FPU_bits_lost + addl $0x00000100,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_24: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jz LRe_normalise /* No truncation needed*/ + +LDo_truncate_24: + andl $0xffffff00,%eax /* Truncate to 24 bits*/ + xorl %ebx,%ebx + movb LOST_DOWN,FPU_bits_lost + jmp LRe_normalise + + +/* Round etc to 53 bit precision */ +LRound_To_53: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_53 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_53 + + cmpl RC_UP,%ecx /* Towards +infinity*/ + je LUp_53 + + cmpl RC_DOWN,%ecx /* Towards -infinity*/ + je LDown_53 + +#ifdef PARANOID + jmp L_bugged +#endif PARANOID + +LUp_53: + cmpb SIGN_POS,SIGN(%edi) + jne LCheck_truncate_53 /* If negative then up==truncate*/ + + jmp LCheck_53_round_up + +LDown_53: + cmpb SIGN_POS,SIGN(%edi) + je LCheck_truncate_53 /* If positive then down==truncate*/ + +LCheck_53_round_up: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jnz LDo_53_round_up + jmp LRe_normalise + +LRound_nearest_53: + /*Do rounding of the 53rd bit if needed (nearest or even)*/ + movl %ebx,%ecx + andl $0x000007ff,%ecx + cmpl $0x00000400,%ecx + jc LCheck_truncate_53 /* less than half, no increment needed*/ + + jnz LGreater_Half_53 /* greater than half, increment needed*/ + + /*Possibly half, we need to check the ls bits*/ + orl %edx,%edx + jnz LGreater_Half_53 /* greater than half, increment needed*/ + + /* Exactly half, increment only if 53rd bit is 1 (round to even)*/ + testl $0x00000800,%ebx + jz LTruncate_53 + +LGreater_Half_53: /*Rounding: increment at the 53rd bit*/ +LDo_53_round_up: + movb LOST_UP,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits*/ + addl $0x00000800,%ebx + adcl $0,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_53: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jz LRe_normalise + +LTruncate_53: + movb LOST_DOWN,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits*/ + jmp LRe_normalise + + +/* Round etc to 64 bit precision*/ +LRound_To_64: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_64 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_64 + + cmpl RC_UP,%ecx /* Towards +infinity*/ + je LUp_64 + + cmpl RC_DOWN,%ecx /* Towards -infinity*/ + je LDown_64 + +#ifdef PARANOID + jmp L_bugged +#endif PARANOID + +LUp_64: + cmpb SIGN_POS,SIGN(%edi) + jne LCheck_truncate_64 /* If negative then up==truncate*/ + + orl %edx,%edx + jnz LDo_64_round_up + jmp LRe_normalise + +LDown_64: + cmpb SIGN_POS,SIGN(%edi) + je LCheck_truncate_64 /*If positive then down==truncate*/ + + orl %edx,%edx + jnz LDo_64_round_up + jmp LRe_normalise + +LRound_nearest_64: + cmpl $0x80000000,%edx + jc LCheck_truncate_64 + + jne LDo_64_round_up + + /* Now test for round-to-even */ + testb $1,%ebx + jz LCheck_truncate_64 + +LDo_64_round_up: + movb LOST_UP,FPU_bits_lost + addl $1,%ebx + adcl $0,%eax + +LCheck_Round_Overflow: + jnc LRe_normalise /* Rounding done, no overflow */ + + /* Overflow, adjust the result (to 1.0) */ + rcrl $1,%eax + rcrl $1,%ebx + incl EXP(%edi) + jmp LRe_normalise + +LCheck_truncate_64: + orl %edx,%edx + jz LRe_normalise + +LTruncate_64: + movb LOST_DOWN,FPU_bits_lost + +LRe_normalise: + testb $0xff,FPU_denormal + jnz xNormalise_result + +xL_Normalised: + cmpb LOST_UP,FPU_bits_lost + je xL_precision_lost_up + + cmpb LOST_DOWN,FPU_bits_lost + je xL_precision_lost_down + +xL_no_precision_loss: + cmpl EXP_OVER,EXP(%edi) + jge L_overflow + + /* store the result */ + movb TW_Valid,TAG(%edi) + +xL_Store_significand: + movl %eax,SIGH(%edi) + movl %ebx,SIGL(%edi) + +FPU_Arith_exit: + popl %ebx + popl %edi + popl %esi + leave + ret + + +/* Set the FPU status flags to represent precision loss due to*/ +/* round-up.*/ +xL_precision_lost_up: + push %eax + call _set_precision_flag_up + popl %eax + jmp xL_no_precision_loss + +/* Set the FPU status flags to represent precision loss due to*/ +/* truncation.*/ +xL_precision_lost_down: + push %eax + call _set_precision_flag_down + popl %eax + jmp xL_no_precision_loss + + +/* The number is a denormal (which might get rounded up to a normal) +// Shift the number right the required number of bits, which will +// have to be undone later...*/ +xMake_denorm: + /* The action to be taken depends upon whether the underflow + // exception is masked*/ + testb CW_Underflow,%cl /* Underflow mask.*/ + jz xUnmasked_underflow /* Do not make a denormal.*/ + + movb DENORMAL,FPU_denormal + + pushl %ecx /* Save*/ + movl EXP(%edi),%ecx + subl EXP_UNDER+1,%ecx + negl %ecx + + cmpl $64,%ecx /* shrd only works for 0..31 bits */ + jnc xDenorm_shift_more_than_63 + + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc xDenorm_shift_more_than_32 + +/* We got here without jumps by assuming that the most common requirement +// is for a small de-normalising shift. +// Shift by [1..31] bits */ + addl %ecx,EXP(%edi) + orl %edx,%edx /* extension*/ + setne %ch + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orb %ch,%dl + popl %ecx + jmp xDenorm_done + +/* Shift by [32..63] bits*/ +xDenorm_shift_more_than_32: + addl %ecx,EXP(%edi) + subb $32,%cl + orl %edx,%edx + setne %ch + orb %ch,%bl + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %edx,%edx /*test these 32 bits*/ + setne %cl + orb %ch,%bl + orb %cl,%bl + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + popl %ecx + jmp xDenorm_done + +/* Shift by [64..) bits*/ +xDenorm_shift_more_than_63: + cmpl $64,%ecx + jne xDenorm_shift_more_than_64 + +/* Exactly 64 bit shift*/ + addl %ecx,EXP(%edi) + xorl %ecx,%ecx + orl %edx,%edx + setne %cl + orl %ebx,%ebx + setne %ch + orb %ch,%cl + orb %cl,%al + movl %eax,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp xDenorm_done + +xDenorm_shift_more_than_64: + movl EXP_UNDER+1,EXP(%edi) +/* This is easy, %eax must be non-zero, so..*/ + movl $1,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp xDenorm_done + + +xUnmasked_underflow: + /* Increase the exponent by the magic number*/ + addl $(3*(1<<13)),EXP(%edi) + movb UNMASKED_UNDERFLOW,FPU_denormal + jmp xDenorm_done + + +/* Undo the de-normalisation.*/ +xNormalise_result: + cmpb UNMASKED_UNDERFLOW,FPU_denormal + je xSignal_underflow + +/* The number must be a denormal if we got here.*/ +#ifdef PARANOID + /* But check it... just in case.*/ + cmpl EXP_UNDER+1,EXP(%edi) + jne L_norm_bugged +#endif PARANOID + + orl %eax,%eax /* ms bits*/ + jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits*/ + + orl %ebx,%ebx + jz L_underflow_to_zero /* The contents are zero*/ + +/* Shift left 32 - 63 bits*/ + movl %ebx,%eax + xorl %ebx,%ebx + subl $32,EXP(%edi) + +LNormalise_shift_up_to_31: + bsrl %eax,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%ebx,%eax + shl %cl,%ebx + subl %ecx,EXP(%edi) + +LNormalise_shift_done: + testb $0xff,FPU_bits_lost /* bits lost == underflow*/ + jz xL_Normalised + + /* There must be a masked underflow*/ + push %eax + pushl EX_Underflow + call _exception + popl %eax + popl %eax + jmp xL_Normalised + + +/* The operations resulted in a number too small to represent. +// Masked response.*/ +L_underflow_to_zero: + push %eax + call _set_precision_flag_down + popl %eax + + push %eax + pushl EX_Underflow + call _exception + popl %eax + popl %eax + + movb TW_Zero,TAG(%edi) + jmp xL_Store_significand + + +/* The operations resulted in a number too large to represent.*/ +L_overflow: + push %edi + call _arith_overflow + pop %edi + jmp FPU_Arith_exit + + +xSignal_underflow: + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + jmp xL_Normalised + + +#ifdef PARANOID +/* If we ever get here then we have problems! */ +L_bugged: + pushl EX_INTERNAL|0x201 + call EXCEPTION + popl %ebx + jmp FPU_Arith_exit + +L_norm_bugged: + pushl EX_INTERNAL|0x216 + call EXCEPTION + popl %ebx + jmp FPU_Arith_exit + +L_entry_bugged: + pushl EX_INTERNAL|0x217 + call EXCEPTION + popl %ebx + jmp FPU_Arith_exit +#endif PARANOID |
