diff options
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfdiv.S')
| -rw-r--r-- | contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfdiv.S | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfdiv.S b/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfdiv.S new file mode 100644 index 000000000000..202965ec4789 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfdiv.S @@ -0,0 +1,491 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Double Precision Divide + +#define A r1:0 +#define AH r1 +#define AL r0 + +#define B r3:2 +#define BH r3 +#define BL r2 + +#define Q r5:4 +#define QH r5 +#define QL r4 + +#define PROD r7:6 +#define PRODHI r7 +#define PRODLO r6 + +#define SFONE r8 +#define SFDEN r9 +#define SFERROR r10 +#define SFRECIP r11 + +#define EXPBA r13:12 +#define EXPB r13 +#define EXPA r12 + +#define REMSUB2 r15:14 + + + +#define SIGN r28 + +#define Q_POSITIVE p3 +#define NORMAL p2 +#define NO_OVF_UNF p1 +#define P_TMP p0 + +#define RECIPEST_SHIFT 3 +#define QADJ 61 + +#define DFCLASS_NORMAL 0x02 +#define DFCLASS_NUMBER 0x0F +#define DFCLASS_INFINITE 0x08 +#define DFCLASS_ZERO 0x01 +#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) +#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) + +#define DF_MANTBITS 52 +#define DF_EXPBITS 11 +#define SF_MANTBITS 23 +#define SF_EXPBITS 8 +#define DF_BIAS 0x3ff + +#define SR_ROUND_OFF 22 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + + .text + .global __hexagon_divdf3 + .type __hexagon_divdf3,@function + Q6_ALIAS(divdf3) + FAST_ALIAS(divdf3) + FAST2_ALIAS(divdf3) + .p2align 5 +__hexagon_divdf3: + { + NORMAL = dfclass(A,#DFCLASS_NORMAL) + NORMAL = dfclass(B,#DFCLASS_NORMAL) + EXPBA = combine(BH,AH) + SIGN = xor(AH,BH) + } +#undef A +#undef AH +#undef AL +#undef B +#undef BH +#undef BL +#define REM r1:0 +#define REMHI r1 +#define REMLO r0 +#define DENOM r3:2 +#define DENOMHI r3 +#define DENOMLO r2 + { + if (!NORMAL) jump .Ldiv_abnormal + PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) + SFONE = ##0x3f800001 + } + { + SFDEN = or(SFONE,PRODLO) + EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) + EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) + Q_POSITIVE = cmp.gt(SIGN,#-1) + } +#undef SIGN +#define ONE r28 +.Ldenorm_continue: + { + SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) + SFERROR = and(SFONE,#-2) + ONE = #1 + EXPA = sub(EXPA,EXPB) + } +#undef EXPB +#define RECIPEST r13 + { + SFERROR -= sfmpy(SFRECIP,SFDEN):lib + REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) + RECIPEST = ##0x00800000 << RECIPEST_SHIFT + } + { + SFRECIP += sfmpy(SFRECIP,SFERROR):lib + DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) + SFERROR = and(SFONE,#-2) + } + { + SFERROR -= sfmpy(SFRECIP,SFDEN):lib + QH = #-DF_BIAS+1 + QL = #DF_BIAS-1 + } + { + SFRECIP += sfmpy(SFRECIP,SFERROR):lib + NO_OVF_UNF = cmp.gt(EXPA,QH) + NO_OVF_UNF = !cmp.gt(EXPA,QL) + } + { + RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) + Q = #0 + EXPA = add(EXPA,#-QADJ) + } +#undef SFERROR +#undef SFRECIP +#define TMP r10 +#define TMP1 r11 + { + RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) + } + +#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ + { \ + PROD = mpyu(RECIPEST,REMHI); \ + REM = asl(REM,# ## ( REMSHIFT )); \ + }; \ + { \ + PRODLO = # ## 0; \ + REM -= mpyu(PRODHI,DENOMLO); \ + REMSUB2 = mpyu(PRODHI,DENOMHI); \ + }; \ + { \ + Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ + REM -= asl(REMSUB2, # ## 32); \ + EXTRA \ + } + + + DIV_ITER1B(ASL,14,15,) + DIV_ITER1B(ASR,1,15,) + DIV_ITER1B(ASR,16,15,) + DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) + +#undef REMSUB2 +#define TMPPAIR r15:14 +#define TMPPAIRHI r15 +#define TMPPAIRLO r14 +#undef RECIPEST +#define EXPB r13 + { + // compare or sub with carry + TMPPAIR = sub(REM,DENOM) + P_TMP = cmp.gtu(DENOM,REM) + // set up amt to add to q + if (!P_TMP.new) PRODLO = #2 + } + { + Q = add(Q,PROD) + if (!P_TMP) REM = TMPPAIR + TMPPAIR = #0 + } + { + P_TMP = cmp.eq(REM,TMPPAIR) + if (!P_TMP.new) QL = or(QL,ONE) + } + { + PROD = neg(Q) + } + { + if (!Q_POSITIVE) Q = PROD + } +#undef REM +#undef REMHI +#undef REMLO +#undef DENOM +#undef DENOMLO +#undef DENOMHI +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 + { + A = convert_d2df(Q) + if (!NO_OVF_UNF) jump .Ldiv_ovf_unf + } + { + AH += asl(EXPA,#DF_MANTBITS-32) + jumpr r31 + } + +.Ldiv_ovf_unf: + { + AH += asl(EXPA,#DF_MANTBITS-32) + EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) + } + { + PROD = abs(Q) + EXPA = add(EXPA,EXPB) + } + { + P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow + if (P_TMP.new) jump:nt .Ldiv_ovf + } + { + P_TMP = cmp.gt(EXPA,#0) + if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... + } + // Underflow + // We know what the infinite range exponent should be (EXPA) + // Q is 2's complement, PROD is abs(Q) + // Normalize Q, shift right, add a high bit, convert, change exponent + +#define FUDGE1 7 // how much to shift right +#define FUDGE2 4 // how many guard/round to keep at lsbs + + { + EXPB = add(clb(PROD),#-1) // doesn't need to be added in since + EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent + TMP = USR + TMP1 = #63 + } + { + EXPB = min(EXPA,TMP1) + TMP1 = or(TMP,#0x030) + PROD = asl(PROD,EXPB) + EXPA = #0 + } + { + TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out + PROD = lsr(PROD,EXPB) // shift out bits + B = #1 + } + { + P_TMP = cmp.gtu(B,TMPPAIR) + if (!P_TMP.new) PRODLO = or(BL,PRODLO) + PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) + } + { + Q = neg(PROD) + P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) + if (!P_TMP.new) TMP = TMP1 + } + { + USR = TMP + if (Q_POSITIVE) Q = PROD + TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) + } + { + A = convert_d2df(Q) + } + { + AH += asl(TMP,#DF_MANTBITS-32) + jumpr r31 + } + + +.Lpossible_unf: + // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal + // The answer is correct, but we need to raise Underflow + { + B = extractu(A,#63,#0) + TMPPAIR = combine(##0x00100000,#0) // min normal + TMP = #0x7FFF + } + { + P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... + P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? + } + +#if (__HEXAGON_ARCH__ == 60) + TMP = USR // If not, just return + if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact + // Note that inexact is already set... +#else + { + if (!P_TMP) jumpr r31 // If not, just return + TMP = USR // Else, we want to set Unf+Inexact + } // Note that inexact is already set... +#endif + { + TMP = or(TMP,#0x30) + } + { + USR = TMP + } + { + p0 = dfcmp.eq(A,A) + jumpr r31 + } + +.Ldiv_ovf: + + // Raise Overflow, and choose the correct overflow value (saturated normal or infinity) + + { + TMP = USR + B = combine(##0x7fefffff,#-1) + AH = mux(Q_POSITIVE,#0,#-1) + } + { + PROD = combine(##0x7ff00000,#0) + QH = extractu(TMP,#2,#SR_ROUND_OFF) + TMP = or(TMP,#0x28) + } + { + USR = TMP + QH ^= lsr(AH,#31) + QL = QH + } + { + p0 = !cmp.eq(QL,#1) // if not round-to-zero + p0 = !cmp.eq(QH,#2) // and not rounding the other way + if (p0.new) B = PROD // go to inf + p0 = dfcmp.eq(B,B) // get exceptions + } + { + A = insert(B,#63,#0) + jumpr r31 + } + +#undef ONE +#define SIGN r28 +#undef NORMAL +#undef NO_OVF_UNF +#define P_INF p1 +#define P_ZERO p2 +.Ldiv_abnormal: + { + P_TMP = dfclass(A,#DFCLASS_NUMBER) + P_TMP = dfclass(B,#DFCLASS_NUMBER) + Q_POSITIVE = cmp.gt(SIGN,#-1) + } + { + P_INF = dfclass(A,#DFCLASS_INFINITE) + P_INF = dfclass(B,#DFCLASS_INFINITE) + } + { + P_ZERO = dfclass(A,#DFCLASS_ZERO) + P_ZERO = dfclass(B,#DFCLASS_ZERO) + } + { + if (!P_TMP) jump .Ldiv_nan + if (P_INF) jump .Ldiv_invalid + } + { + if (P_ZERO) jump .Ldiv_invalid + } + { + P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero + P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite + } + { + P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite + P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero + } + { + if (!P_ZERO) jump .Ldiv_zero_result + if (!P_INF) jump .Ldiv_inf_result + } + // Now we've narrowed it down to (de)normal / (de)normal + // Set up A/EXPA B/EXPB and go back +#undef P_ZERO +#undef P_INF +#define P_TMP2 p1 + { + P_TMP = dfclass(A,#DFCLASS_NORMAL) + P_TMP2 = dfclass(B,#DFCLASS_NORMAL) + TMP = ##0x00100000 + } + { + EXPBA = combine(BH,AH) + AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit + BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit + } + { + if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit + if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit + } + { + QH = add(clb(A),#-DF_EXPBITS) + QL = add(clb(B),#-DF_EXPBITS) + TMP = #1 + } + { + EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) + EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) + } + { + A = asl(A,QH) + B = asl(B,QL) + if (!P_TMP) EXPA = sub(TMP,QH) + if (!P_TMP2) EXPB = sub(TMP,QL) + } // recreate values needed by resume coke + { + PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) + } + { + SFDEN = or(SFONE,PRODLO) + jump .Ldenorm_continue + } + +.Ldiv_zero_result: + { + AH = xor(AH,BH) + B = #0 + } + { + A = insert(B,#63,#0) + jumpr r31 + } +.Ldiv_inf_result: + { + p2 = dfclass(B,#DFCLASS_ZERO) + p2 = dfclass(A,#DFCLASS_NONINFINITE) + } + { + TMP = USR + if (!p2) jump 1f + AH = xor(AH,BH) + } + { + TMP = or(TMP,#0x04) // DBZ + } + { + USR = TMP + } +1: + { + B = combine(##0x7ff00000,#0) + p0 = dfcmp.uo(B,B) // take possible exception + } + { + A = insert(B,#63,#0) + jumpr r31 + } +.Ldiv_nan: + { + p0 = dfclass(A,#0x10) + p1 = dfclass(B,#0x10) + if (!p0.new) A = B + if (!p1.new) B = A + } + { + QH = convert_df2sf(A) // get possible invalid exceptions + QL = convert_df2sf(B) + } + { + A = #-1 + jumpr r31 + } + +.Ldiv_invalid: + { + TMP = ##0x7f800001 + } + { + A = convert_sf2df(TMP) // get invalid, get DF qNaN + jumpr r31 + } +END(__hexagon_divdf3) |
