1 files changed, 413 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfmul.S b/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfmul.S
new file mode 100644
index 000000000000..e6f62c3515f4
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfmul.S
@@ -0,0 +1,413 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Double Precision Multiply
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define BTMP r5:4
+#define BTMPH r5
+#define BTMPL r4
+
+#define PP_ODD r7:6
+#define PP_ODD_H r7
+#define PP_ODD_L r6
+
+#define ONE r9:8
+#define S_ONE r8
+#define S_ZERO r9
+
+#define PP_HH r11:10
+#define PP_HH_H r11
+#define PP_HH_L r10
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define PP_LL r15:14
+#define PP_LL_H r15
+#define PP_LL_L r14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+
+// Some constant to adjust normalization amount in error code
+// Amount to right shift the partial product to get to a denorm
+#define FUDGE 5
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+#define SR_ROUND_OFF 22
+	.text
+	.global __hexagon_muldf3
+	.type __hexagon_muldf3,@function
+	Q6_ALIAS(muldf3)
+  FAST_ALIAS(muldf3)
+  FAST2_ALIAS(muldf3)
+	.p2align 5
+__hexagon_muldf3:
+	{
+		p0 = dfclass(A,#2)
+		p0 = dfclass(B,#2)
+		ATMP = combine(##0x40000000,#0)
+	}
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-1)
+		BTMP = asl(B,#EXPBITS-1)
+		TMP = #-BIAS
+		ONE = #1
+	}
+	{
+		PP_ODD = mpyu(BTMPL,ATMPH)
+		BTMP = insert(ONE,#2,#62)
+	}
+	// since we know that the MSB of the H registers is zero, we should never carry
+	// H <= 2^31-1.  L <= 2^32-1.  Therefore, HL <= 2^63-2^32-2^31+1
+	// Adding 2 HLs, we get 2^64-3*2^32+2 maximum.
+	// Therefore, we can add 3 2^32-1 values safely without carry.  We only need one.
+	{
+		PP_LL = mpyu(ATMPL,BTMPL)
+		PP_ODD += mpyu(ATMPL,BTMPH)
+	}
+	{
+		PP_ODD += lsr(PP_LL,#32)
+		PP_HH = mpyu(ATMPH,BTMPH)
+		BTMP = combine(##BIAS+BIAS-4,#0)
+	}
+	{
+		PP_HH += lsr(PP_ODD,#32)
+		if (!p0) jump .Lmul_abnormal
+		p1 = cmp.eq(PP_LL_L,#0)		// 64 lsb's 0?
+		p1 = cmp.eq(PP_ODD_L,#0)	// 64 lsb's 0?
+	}
+
+	// PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
+	// PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
+
+#undef PP_ODD
+#undef PP_ODD_H
+#undef PP_ODD_L
+#define EXP10 r7:6
+#define EXP1 r7
+#define EXP0 r6
+	{
+		if (!p1) PP_HH_L = or(PP_HH_L,S_ONE)
+		EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS)
+	}
+	{
+		PP_LL = neg(PP_HH)
+		EXP0 += add(TMP,EXP1)
+		TMP = xor(AH,BH)
+	}
+	{
+		if (!p2.new) PP_HH = PP_LL
+		p2 = cmp.gt(TMP,#-1)
+		p0 = !cmp.gt(EXP0,BTMPH)
+		p0 = cmp.gt(EXP0,BTMPL)
+		if (!p0.new) jump:nt .Lmul_ovf_unf
+	}
+	{
+		A = convert_d2df(PP_HH)
+		EXP0 = add(EXP0,#-BIAS-58)
+	}
+	{
+		AH += asl(EXP0,#HI_MANTBITS)
+		jumpr r31
+	}
+
+	.falign
+.Lpossible_unf:
+	// We end up with a positive exponent
+	// But we may have rounded up to an exponent of 1.
+	// If the exponent is 1, if we rounded up to it
+	// we need to also raise underflow
+	// Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000
+	// And the PP should also have more than one bit set
+	//
+	// Note: ATMP should have abs(PP_HH)
+	// Note: BTMPL should have 0x7FEFFFFF
+	{
+		p0 = cmp.eq(AL,#0)
+		p0 = bitsclr(AH,BTMPL)
+		if (!p0.new) jumpr:t r31
+		BTMPH = #0x7fff
+	}
+	{
+		p0 = bitsset(ATMPH,BTMPH)
+		BTMPL = USR
+		BTMPH = #0x030
+	}
+	{
+		if (p0) BTMPL = or(BTMPL,BTMPH)
+	}
+	{
+		USR = BTMPL
+	}
+	{
+		p0 = dfcmp.eq(A,A)
+		jumpr r31
+	}
+	.falign
+.Lmul_ovf_unf:
+	{
+		A = convert_d2df(PP_HH)
+		ATMP = abs(PP_HH)			// take absolute value
+		EXP1 = add(EXP0,#-BIAS-58)
+	}
+	{
+		AH += asl(EXP1,#HI_MANTBITS)
+		EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		BTMPL = ##0x7FEFFFFF
+	}
+	{
+		EXP1 += add(EXP0,##-BIAS-58)
+		//BTMPH = add(clb(ATMP),#-2)
+		BTMPH = #0
+	}
+	{
+		p0 = cmp.gt(EXP1,##BIAS+BIAS-2)	// overflow
+		if (p0.new) jump:nt .Lmul_ovf
+	}
+	{
+		p0 = cmp.gt(EXP1,#0)
+		if (p0.new) jump:nt .Lpossible_unf
+		BTMPH = sub(EXP0,BTMPH)
+		TMP = #63				// max amount to shift
+	}
+	// Underflow
+	//
+	// PP_HH has the partial product with sticky LSB.
+	// PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
+	// PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
+	// The exponent of PP_HH is in  EXP1, which is non-positive (0 or negative)
+	// That's the exponent that happens after the normalization
+	//
+	// EXP0 has the exponent that, when added to the normalized value, is out of range.
+	//
+	// Strategy:
+	//
+	// * Shift down bits, with sticky bit, such that the bits are aligned according
+	//   to the LZ count and appropriate exponent, but not all the way to mantissa
+	//   field, keep around the last few bits.
+	// * Put a 1 near the MSB
+	// * Check the LSBs for inexact; if inexact also set underflow
+	// * Convert [u]d2df -- will correctly round according to rounding mode
+	// * Replace exponent field with zero
+
+	{
+		BTMPL = #0	 			// offset for extract
+		BTMPH = sub(#FUDGE,BTMPH)		// amount to right shift
+	}
+	{
+		p3 = cmp.gt(PP_HH_H,#-1)		// is it positive?
+		BTMPH = min(BTMPH,TMP)			// Don't shift more than 63
+		PP_HH = ATMP
+	}
+	{
+		TMP = USR
+		PP_LL = extractu(PP_HH,BTMP)
+	}
+	{
+		PP_HH = asr(PP_HH,BTMPH)
+		BTMPL = #0x0030					// underflow flag
+		AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS)
+	}
+	{
+		p0 = cmp.gtu(ONE,PP_LL)				// Did we extract all zeros?
+		if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE)	// add sticky bit
+		PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3)	// Add back in a bit so we can use convert instruction
+	}
+	{
+		PP_LL = neg(PP_HH)
+		p1 = bitsclr(PP_HH_L,#0x7)		// Are the LSB's clear?
+		if (!p1.new) TMP = or(BTMPL,TMP)	// If not, Inexact+Underflow
+	}
+	{
+		if (!p3) PP_HH = PP_LL
+		USR = TMP
+	}
+	{
+		A = convert_d2df(PP_HH)			// Do rounding
+		p0 = dfcmp.eq(A,A)			// realize exception
+	}
+	{
+		AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1)		// Insert correct exponent
+		jumpr r31
+	}
+	.falign
+.Lmul_ovf:
+	// We get either max finite value or infinity.  Either way, overflow+inexact
+	{
+		TMP = USR
+		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
+		A = PP_HH
+	}
+	{
+		PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
+		TMP = or(TMP,#0x28)			// inexact + overflow
+		BTMP = combine(##0x7ff00000,#0)		// positive infinity
+	}
+	{
+		USR = TMP
+		PP_LL_L ^= lsr(AH,#31)			// Does sign match rounding?
+		TMP = PP_LL_L				// unmodified rounding mode
+	}
+	{
+		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
+		p0 = !cmp.eq(PP_LL_L,#2)		// Not rounding the other way,
+		if (p0.new) ATMP = BTMP			// we should get infinity
+		p0 = dfcmp.eq(A,A)			// Realize FP exception if enabled
+	}
+	{
+		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
+		jumpr r31
+	}
+
+.Lmul_abnormal:
+	{
+		ATMP = extractu(A,#63,#0)		// strip off sign
+		BTMP = extractu(B,#63,#0)		// strip off sign
+	}
+	{
+		p3 = cmp.gtu(ATMP,BTMP)
+		if (!p3.new) A = B			// sort values
+		if (!p3.new) B = A			// sort values
+	}
+	{
+		// Any NaN --> NaN, possibly raise invalid if sNaN
+		p0 = dfclass(A,#0x0f)		// A not NaN?
+		if (!p0.new) jump:nt .Linvalid_nan
+		if (!p3) ATMP = BTMP
+		if (!p3) BTMP = ATMP
+	}
+	{
+		// Infinity * nonzero number is infinity
+		p1 = dfclass(A,#0x08)		// A is infinity
+		p1 = dfclass(B,#0x0e)		// B is nonzero
+	}
+	{
+		// Infinity * zero --> NaN, raise invalid
+		// Other zeros return zero
+		p0 = dfclass(A,#0x08)		// A is infinity
+		p0 = dfclass(B,#0x01)		// B is zero
+	}
+	{
+		if (p1) jump .Ltrue_inf
+		p2 = dfclass(B,#0x01)
+	}
+	{
+		if (p0) jump .Linvalid_zeroinf
+		if (p2) jump .Ltrue_zero		// so return zero
+		TMP = ##0x7c000000
+	}
+	// We are left with a normal or subnormal times a subnormal. A > B
+	// If A and B are both very small (exp(a) < BIAS-MANTBITS),
+	// we go to a single sticky bit, which we can round easily.
+	// If A and B might multiply to something bigger, decrease A exponent and increase
+	// B exponent and try again
+	{
+		p0 = bitsclr(AH,TMP)
+		if (p0.new) jump:nt .Lmul_tiny
+	}
+	{
+		TMP = cl0(BTMP)
+	}
+	{
+		TMP = add(TMP,#-EXPBITS)
+	}
+	{
+		BTMP = asl(BTMP,TMP)
+	}
+	{
+		B = insert(BTMP,#63,#0)
+		AH -= asl(TMP,#HI_MANTBITS)
+	}
+	jump __hexagon_muldf3
+.Lmul_tiny:
+	{
+		TMP = USR
+		A = xor(A,B)				// get sign bit
+	}
+	{
+		TMP = or(TMP,#0x30)			// Inexact + Underflow
+		A = insert(ONE,#63,#0)			// put in rounded up value
+		BTMPH = extractu(TMP,#2,#SR_ROUND_OFF)	// get rounding mode
+	}
+	{
+		USR = TMP
+		p0 = cmp.gt(BTMPH,#1)			// Round towards pos/neg inf?
+		if (!p0.new) AL = #0			// If not, zero
+		BTMPH ^= lsr(AH,#31)			// rounding my way --> set LSB
+	}
+	{
+		p0 = cmp.eq(BTMPH,#3)			// if rounding towards right inf
+		if (!p0.new) AL = #0			// don't go to zero
+		jumpr r31
+	}
+.Linvalid_zeroinf:
+	{
+		TMP = USR
+	}
+	{
+		A = #-1
+		TMP = or(TMP,#2)
+	}
+	{
+		USR = TMP
+	}
+	{
+		p0 = dfcmp.uo(A,A)			// force exception if enabled
+		jumpr r31
+	}
+.Linvalid_nan:
+	{
+		p0 = dfclass(B,#0x0f)			// if B is not NaN
+		TMP = convert_df2sf(A)			// will generate invalid if sNaN
+		if (p0.new) B = A 			// make it whatever A is
+	}
+	{
+		BL = convert_df2sf(B)			// will generate invalid if sNaN
+		A = #-1
+		jumpr r31
+	}
+	.falign
+.Ltrue_zero:
+	{
+		A = B
+		B = A
+	}
+.Ltrue_inf:
+	{
+		BH = extract(BH,#1,#31)
+	}
+	{
+		AH ^= asl(BH,#31)
+		jumpr r31
+	}
+END(__hexagon_muldf3)
+
+#undef ATMP
+#undef ATMPL
+#undef ATMPH
+#undef BTMP
+#undef BTMPL
+#undef BTMPH