24 files changed, 287 insertions, 80 deletions
diff --git a/lib/builtins/aarch64/fp_mode.c b/lib/builtins/aarch64/fp_mode.c
new file mode 100644
index 0000000000000..5a413689d2c8e
--- /dev/null
+++ b/lib/builtins/aarch64/fp_mode.c
@@ -0,0 +1,59 @@
+//===----- lib/aarch64/fp_mode.c - Floaing-point mode utilities ---*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#include "../fp_mode.h"
+
+#define AARCH64_TONEAREST  0x0
+#define AARCH64_UPWARD     0x1
+#define AARCH64_DOWNWARD   0x2
+#define AARCH64_TOWARDZERO 0x3
+#define AARCH64_RMODE_MASK (AARCH64_TONEAREST | AARCH64_UPWARD | \
+                            AARCH64_DOWNWARD | AARCH64_TOWARDZERO)
+#define AARCH64_RMODE_SHIFT 22
+
+#define AARCH64_INEXACT     0x10
+
+#ifndef __ARM_FP
+// For soft float targets, allow changing rounding mode by overriding the weak
+// __aarch64_fe_default_rmode symbol.
+FE_ROUND_MODE __attribute__((weak)) __aarch64_fe_default_rmode = FE_TONEAREST;
+#endif
+
+FE_ROUND_MODE __fe_getround() {
+#ifdef __ARM_FP
+  uint64_t fpcr;
+  __asm__ __volatile__("mrs  %0, fpcr" : "=r" (fpcr));
+  fpcr = fpcr >> AARCH64_RMODE_SHIFT & AARCH64_RMODE_MASK;
+  switch (fpcr) {
+    case AARCH64_UPWARD:
+      return FE_UPWARD;
+    case AARCH64_DOWNWARD:
+      return FE_DOWNWARD;
+    case AARCH64_TOWARDZERO:
+      return FE_TOWARDZERO;
+    case AARCH64_TONEAREST:
+    default:
+      return FE_TONEAREST;
+  }
+#else
+  return __aarch64_fe_default_rmode;
+#endif
+}
+
+int __fe_raise_inexact() {
+#ifdef __ARM_FP
+  uint64_t fpsr;
+  __asm__ __volatile__("mrs  %0, fpsr" : "=r" (fpsr));
+  __asm__ __volatile__("msr  fpsr, %0" : : "ri" (fpsr | AARCH64_INEXACT));
+  return 0;
+#else
+  return 0;
+#endif
+}
diff --git a/lib/builtins/adddf3.c b/lib/builtins/adddf3.c
index f2727fafcabef..26f11bfa2216a 100644
--- a/lib/builtins/adddf3.c
+++ b/lib/builtins/adddf3.c
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements double-precision soft-float addition with the IEEE-754
-// default rounding (to nearest, ties to even).
+// This file implements double-precision soft-float addition.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/addsf3.c b/lib/builtins/addsf3.c
index 8fe8622aadd96..9f1d517c1fa1c 100644
--- a/lib/builtins/addsf3.c
+++ b/lib/builtins/addsf3.c
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements single-precision soft-float addition with the IEEE-754
-// default rounding (to nearest, ties to even).
+// This file implements single-precision soft-float addition.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/addtf3.c b/lib/builtins/addtf3.c
index 570472a145548..86e4f4cfc3fc6 100644
--- a/lib/builtins/addtf3.c
+++ b/lib/builtins/addtf3.c
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements quad-precision soft-float addition with the IEEE-754
-// default rounding (to nearest, ties to even).
+// This file implements quad-precision soft-float addition.
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,7 +16,7 @@
 #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
 #include "fp_add_impl.inc"
 
-COMPILER_RT_ABI long double __addtf3(long double a, long double b) {
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) {
   return __addXf3__(a, b);
 }
 
diff --git a/lib/builtins/arm/fp_mode.c b/lib/builtins/arm/fp_mode.c
new file mode 100644
index 0000000000000..300b71935ad4e
--- /dev/null
+++ b/lib/builtins/arm/fp_mode.c
@@ -0,0 +1,59 @@
+//===----- lib/arm/fp_mode.c - Floaing-point mode utilities -------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#include "../fp_mode.h"
+
+#define ARM_TONEAREST  0x0
+#define ARM_UPWARD     0x1
+#define ARM_DOWNWARD   0x2
+#define ARM_TOWARDZERO 0x3
+#define ARM_RMODE_MASK (ARM_TONEAREST | ARM_UPWARD | \
+                        ARM_DOWNWARD | ARM_TOWARDZERO)
+#define ARM_RMODE_SHIFT 22
+
+#define ARM_INEXACT     0x1000
+
+#ifndef __ARM_FP
+// For soft float targets, allow changing rounding mode by overriding the weak
+// __arm_fe_default_rmode symbol.
+FE_ROUND_MODE __attribute__((weak)) __arm_fe_default_rmode = FE_TONEAREST;
+#endif
+
+FE_ROUND_MODE __fe_getround() {
+#ifdef __ARM_FP
+  uint32_t fpscr;
+  __asm__ __volatile__("vmrs  %0, fpscr" : "=r" (fpscr));
+  fpscr = fpscr >> ARM_RMODE_SHIFT & ARM_RMODE_MASK;
+  switch (fpscr) {
+    case ARM_UPWARD:
+      return FE_UPWARD;
+    case ARM_DOWNWARD:
+      return FE_DOWNWARD;
+    case ARM_TOWARDZERO:
+      return FE_TOWARDZERO;
+    case ARM_TONEAREST:
+    default:
+      return FE_TONEAREST;
+  }
+#else
+  return __arm_fe_default_rmode;
+#endif
+}
+
+int __fe_raise_inexact() {
+#ifdef __ARM_FP
+  uint32_t fpscr;
+  __asm__ __volatile__("vmrs  %0, fpscr" : "=r" (fpscr));
+  __asm__ __volatile__("vmsr  fpscr, %0" : : "ri" (fpscr | ARM_INEXACT));
+  return 0;
+#else
+  return 0;
+#endif
+}
diff --git a/lib/builtins/atomic.c b/lib/builtins/atomic.c
index 0f82803a64163..32b3a0f9ad239 100644
--- a/lib/builtins/atomic.c
+++ b/lib/builtins/atomic.c
@@ -51,9 +51,11 @@ static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
 ////////////////////////////////////////////////////////////////////////////////
 #ifdef __FreeBSD__
 #include <errno.h>
-#include <machine/atomic.h>
+// clang-format off
 #include <sys/types.h>
+#include <machine/atomic.h>
 #include <sys/umtx.h>
+// clang-format on
 typedef struct _usem Lock;
 __inline static void unlock(Lock *l) {
   __c11_atomic_store((_Atomic(uint32_t) *)&l->_count, 1, __ATOMIC_RELEASE);
diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c
index 76dc1968cc7ed..80d3b2f9f17d4 100644
--- a/lib/builtins/clear_cache.c
+++ b/lib/builtins/clear_cache.c
@@ -23,8 +23,10 @@ uintptr_t GetCurrentProcess(void);
 #endif
 
 #if defined(__FreeBSD__) && defined(__arm__)
-#include <machine/sysarch.h>
+// clang-format off
 #include <sys/types.h>
+#include <machine/sysarch.h>
+// clang-format on
 #endif
 
 #if defined(__NetBSD__) && defined(__arm__)
@@ -32,54 +34,16 @@ uintptr_t GetCurrentProcess(void);
 #endif
 
 #if defined(__OpenBSD__) && defined(__mips__)
-#include <machine/sysarch.h>
+// clang-format off
 #include <sys/types.h>
+#include <machine/sysarch.h>
+// clang-format on
 #endif
 
 #if defined(__linux__) && defined(__mips__)
 #include <sys/cachectl.h>
 #include <sys/syscall.h>
 #include <unistd.h>
-#if defined(__ANDROID__) && defined(__LP64__)
-// clear_mips_cache - Invalidates instruction cache for Mips.
-static void clear_mips_cache(const void *Addr, size_t Size) {
-  __asm__ volatile(
-      ".set push\n"
-      ".set noreorder\n"
-      ".set noat\n"
-      "beq %[Size], $zero, 20f\n" // If size == 0, branch around.
-      "nop\n"
-      "daddu %[Size], %[Addr], %[Size]\n" // Calculate end address + 1
-      "rdhwr $v0, $1\n"                   // Get step size for SYNCI.
-                                          // $1 is $HW_SYNCI_Step
-      "beq $v0, $zero, 20f\n"             // If no caches require
-                                          // synchronization, branch
-                                          // around.
-      "nop\n"
-      "10:\n"
-      "synci 0(%[Addr])\n"            // Synchronize all caches around
-                                      // address.
-      "daddu %[Addr], %[Addr], $v0\n" // Add step size.
-      "sltu $at, %[Addr], %[Size]\n"  // Compare current with end
-                                      // address.
-      "bne $at, $zero, 10b\n"         // Branch if more to do.
-      "nop\n"
-      "sync\n" // Clear memory hazards.
-      "20:\n"
-      "bal 30f\n"
-      "nop\n"
-      "30:\n"
-      "daddiu $ra, $ra, 12\n" // $ra has a value of $pc here.
-                              // Add offset of 12 to point to the
-                              // instruction after the last nop.
-                              //
-      "jr.hb $ra\n"           // Return, clearing instruction
-                              // hazards.
-      "nop\n"
-      ".set pop\n"
-      : [ Addr ] "+r"(Addr), [ Size ] "+r"(Size)::"at", "ra", "v0", "memory");
-}
-#endif
 #endif
 
 // The compiler generates calls to __clear_cache() when creating
@@ -123,17 +87,7 @@ void __clear_cache(void *start, void *end) {
 #elif defined(__linux__) && defined(__mips__)
   const uintptr_t start_int = (uintptr_t)start;
   const uintptr_t end_int = (uintptr_t)end;
-#if defined(__ANDROID__) && defined(__LP64__)
-  // Call synci implementation for short address range.
-  const uintptr_t address_range_limit = 256;
-  if ((end_int - start_int) <= address_range_limit) {
-    clear_mips_cache(start, (end_int - start_int));
-  } else {
-    syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
-  }
-#else
   syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
-#endif
 #elif defined(__mips__) && defined(__OpenBSD__)
   cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
 #elif defined(__aarch64__) && !defined(__APPLE__)
@@ -173,6 +127,16 @@ void __clear_cache(void *start, void *end) {
   for (uintptr_t line = start_line; line < end_line; line += line_size)
     __asm__ volatile("icbi 0, %0" : : "r"(line));
   __asm__ volatile("isync");
+#elif defined(__sparc__)
+  const size_t dword_size = 8;
+  const size_t len = (uintptr_t)end - (uintptr_t)start;
+
+  const uintptr_t mask = ~(dword_size - 1);
+  const uintptr_t start_dword = ((uintptr_t)start) & mask;
+  const uintptr_t end_dword = ((uintptr_t)start + len + dword_size - 1) & mask;
+
+  for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size)
+    __asm__ volatile("flush %0" : : "r"(dword));
 #else
 #if __APPLE__
   // On Darwin, sys_icache_invalidate() provides this functionality
diff --git a/lib/builtins/cpu_model.c b/lib/builtins/cpu_model.c
index f953aed959e55..cdeb03794ecc8 100644
--- a/lib/builtins/cpu_model.c
+++ b/lib/builtins/cpu_model.c
@@ -121,7 +121,8 @@ enum ProcessorFeatures {
   FEATURE_GFNI,
   FEATURE_VPCLMULQDQ,
   FEATURE_AVX512VNNI,
-  FEATURE_AVX512BITALG
+  FEATURE_AVX512BITALG,
+  FEATURE_AVX512BF16
 };
 
 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@@ -415,8 +416,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
 
     default: // Unknown family 6 CPU.
       break;
-      break;
     }
+    break;
   default:
     break; // Unknown.
   }
@@ -543,7 +544,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(FEATURE_BMI);
   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
     setFeature(FEATURE_AVX2);
-  if (HasLeaf7 && ((EBX >> 9) & 1))
+  if (HasLeaf7 && ((EBX >> 8) & 1))
     setFeature(FEATURE_BMI2);
   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
     setFeature(FEATURE_AVX512F);
@@ -582,6 +583,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
     setFeature(FEATURE_AVX5124FMAPS);
 
+  bool HasLeaf7Subleaf1 =
+      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+  if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
+    setFeature(FEATURE_AVX512BF16);
+
   unsigned MaxExtLevel;
   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
 
diff --git a/lib/builtins/divtf3.c b/lib/builtins/divtf3.c
index 6e61d2e31b756..ce462d4d46c12 100644
--- a/lib/builtins/divtf3.c
+++ b/lib/builtins/divtf3.c
@@ -213,7 +213,7 @@ COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
     // Round.
     absResult += round;
     // Insert the sign and return.
-    const long double result = fromRep(absResult | quotientSign);
+    const fp_t result = fromRep(absResult | quotientSign);
     return result;
   }
 }
diff --git a/lib/builtins/emutls.c b/lib/builtins/emutls.c
index da58feb7b906a..e0aa19155f7d3 100644
--- a/lib/builtins/emutls.c
+++ b/lib/builtins/emutls.c
@@ -26,12 +26,23 @@
 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
 #endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC raises a warning about a nonstandard extension being used for the 0
+// sized element in this array. Disable this for warn-as-error builds.
+#pragma warning(push)
+#pragma warning(disable : 4206)
+#endif
+
 typedef struct emutls_address_array {
   uintptr_t skip_destructor_rounds;
   uintptr_t size; // number of elements in the 'data' array
   void *data[];
 } emutls_address_array;
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(pop)
+#endif
+
 static void emutls_shutdown(emutls_address_array *array);
 
 #ifndef _WIN32
diff --git a/lib/builtins/extenddftf2.c b/lib/builtins/extenddftf2.c
index 849a39da1915b..ddf470ecd6293 100644
--- a/lib/builtins/extenddftf2.c
+++ b/lib/builtins/extenddftf2.c
@@ -14,7 +14,7 @@
 #define DST_QUAD
 #include "fp_extend_impl.inc"
 
-COMPILER_RT_ABI long double __extenddftf2(double a) {
+COMPILER_RT_ABI fp_t __extenddftf2(double a) {
   return __extendXfYf2__(a);
 }
 
diff --git a/lib/builtins/extendsftf2.c b/lib/builtins/extendsftf2.c
index c6368406dde11..cf1fd2face20a 100644
--- a/lib/builtins/extendsftf2.c
+++ b/lib/builtins/extendsftf2.c
@@ -14,7 +14,7 @@
 #define DST_QUAD
 #include "fp_extend_impl.inc"
 
-COMPILER_RT_ABI long double __extendsftf2(float a) {
+COMPILER_RT_ABI fp_t __extendsftf2(float a) {
   return __extendXfYf2__(a);
 }
 
diff --git a/lib/builtins/fixunsxfdi.c b/lib/builtins/fixunsxfdi.c
index 75c4f093794f4..097a4e55e931b 100644
--- a/lib/builtins/fixunsxfdi.c
+++ b/lib/builtins/fixunsxfdi.c
@@ -25,6 +25,13 @@
 // eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
 // mmmm mmmm mmmm
 
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC throws a warning about 'unitialized variable use' here,
+// disable it for builds that warn-as-error
+#pragma warning(push)
+#pragma warning(disable : 4700)
+#endif
+
 COMPILER_RT_ABI du_int __fixunsxfdi(long double a) {
   long_double_bits fb;
   fb.f = a;
@@ -36,4 +43,8 @@ COMPILER_RT_ABI du_int __fixunsxfdi(long double a) {
   return fb.u.low.all >> (63 - e);
 }
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(pop)
 #endif
+
+#endif //!_ARCH_PPC
diff --git a/lib/builtins/fixunsxfsi.c b/lib/builtins/fixunsxfsi.c
index 1432d8ba92d26..3bc1288d38a19 100644
--- a/lib/builtins/fixunsxfsi.c
+++ b/lib/builtins/fixunsxfsi.c
@@ -25,6 +25,13 @@
 // eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
 // mmmm mmmm mmmm
 
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC throws a warning about 'unitialized variable use' here,
+// disable it for builds that warn-as-error
+#pragma warning(push)
+#pragma warning(disable : 4700)
+#endif
+
 COMPILER_RT_ABI su_int __fixunsxfsi(long double a) {
   long_double_bits fb;
   fb.f = a;
@@ -36,4 +43,8 @@ COMPILER_RT_ABI su_int __fixunsxfsi(long double a) {
   return fb.u.low.s.high >> (31 - e);
 }
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(pop)
+#endif
+
 #endif // !_ARCH_PPC
diff --git a/lib/builtins/fixxfdi.c b/lib/builtins/fixxfdi.c
index 4783c01017400..a7a0464feb9d9 100644
--- a/lib/builtins/fixxfdi.c
+++ b/lib/builtins/fixxfdi.c
@@ -24,6 +24,13 @@
 // eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
 // mmmm mmmm mmmm
 
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC throws a warning about 'unitialized variable use' here,
+// disable it for builds that warn-as-error
+#pragma warning(push)
+#pragma warning(disable : 4700)
+#endif
+
 COMPILER_RT_ABI di_int __fixxfdi(long double a) {
   const di_int di_max = (di_int)((~(du_int)0) / 2);
   const di_int di_min = -di_max - 1;
@@ -40,4 +47,8 @@ COMPILER_RT_ABI di_int __fixxfdi(long double a) {
   return (r ^ s) - s;
 }
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(pop)
+#endif
+
 #endif // !_ARCH_PPC
diff --git a/lib/builtins/fp_add_impl.inc b/lib/builtins/fp_add_impl.inc
index da86393417033..ab63213490324 100644
--- a/lib/builtins/fp_add_impl.inc
+++ b/lib/builtins/fp_add_impl.inc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "fp_lib.h"
+#include "fp_mode.h"
 
 static __inline fp_t __addXf3__(fp_t a, fp_t b) {
   rep_t aRep = toRep(a);
@@ -93,7 +94,7 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) {
   const unsigned int align = aExponent - bExponent;
   if (align) {
     if (align < typeWidth) {
-      const bool sticky = bSignificand << (typeWidth - align);
+      const bool sticky = (bSignificand << (typeWidth - align)) != 0;
       bSignificand = bSignificand >> align | sticky;
     } else {
       bSignificand = 1; // Set the sticky bit.  b is known to be non-zero.
@@ -132,7 +133,7 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) {
     // The result is denormal before rounding.  The exponent is zero and we
     // need to shift the significand.
     const int shift = 1 - aExponent;
-    const bool sticky = aSignificand << (typeWidth - shift);
+    const bool sticky = (aSignificand << (typeWidth - shift)) != 0;
     aSignificand = aSignificand >> shift | sticky;
     aExponent = 0;
   }
@@ -149,9 +150,23 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) {
 
   // Perform the final rounding.  The result may overflow to infinity, but
   // that is the correct result in that case.
-  if (roundGuardSticky > 0x4)
-    result++;
-  if (roundGuardSticky == 0x4)
-    result += result & 1;
+  switch (__fe_getround()) {
+  case FE_TONEAREST:
+    if (roundGuardSticky > 0x4)
+      result++;
+    if (roundGuardSticky == 0x4)
+      result += result & 1;
+    break;
+  case FE_DOWNWARD:
+    if (resultSign && roundGuardSticky) result++;
+    break;
+  case FE_UPWARD:
+    if (!resultSign && roundGuardSticky) result++;
+    break;
+  case FE_TOWARDZERO:
+    break;
+  }
+  if (roundGuardSticky)
+    __fe_raise_inexact();
   return fromRep(result);
 }
diff --git a/lib/builtins/fp_lib.h b/lib/builtins/fp_lib.h
index d1a988ea47133..e2a906681c46d 100644
--- a/lib/builtins/fp_lib.h
+++ b/lib/builtins/fp_lib.h
@@ -245,7 +245,7 @@ static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
 static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo,
                                               unsigned int count) {
   if (count < typeWidth) {
-    const bool sticky = *lo << (typeWidth - count);
+    const bool sticky = (*lo << (typeWidth - count)) != 0;
     *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
     *hi = *hi >> count;
   } else if (count < 2 * typeWidth) {
diff --git a/lib/builtins/fp_mode.c b/lib/builtins/fp_mode.c
new file mode 100644
index 0000000000000..c1b6c1f6b8a3d
--- /dev/null
+++ b/lib/builtins/fp_mode.c
@@ -0,0 +1,24 @@
+//===----- lib/fp_mode.c - Floaing-point environment mode utilities --C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a default implementation of fp_mode.h for architectures
+// that does not support or does not have an implementation of floating point
+// environment mode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_mode.h"
+
+// IEEE-754 default rounding (to nearest, ties to even).
+FE_ROUND_MODE __fe_getround() {
+  return FE_TONEAREST;
+}
+
+int __fe_raise_inexact() {
+  return 0;
+}
diff --git a/lib/builtins/fp_mode.h b/lib/builtins/fp_mode.h
new file mode 100644
index 0000000000000..51bec0431a402
--- /dev/null
+++ b/lib/builtins/fp_mode.h
@@ -0,0 +1,29 @@
+//===----- lib/fp_mode.h - Floaing-point environment mode utilities --C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is not part of the interface of this library.
+//
+// This file defines an interface for accessing hardware floating point
+// environment mode.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_MODE
+#define FP_MODE
+
+typedef enum {
+  FE_TONEAREST,
+  FE_DOWNWARD,
+  FE_UPWARD,
+  FE_TOWARDZERO
+} FE_ROUND_MODE;
+
+FE_ROUND_MODE __fe_getround();
+int __fe_raise_inexact();
+
+#endif // FP_MODE_H
diff --git a/lib/builtins/fp_trunc_impl.inc b/lib/builtins/fp_trunc_impl.inc
index 133c8bbe5c2fe..6662be7607e70 100644
--- a/lib/builtins/fp_trunc_impl.inc
+++ b/lib/builtins/fp_trunc_impl.inc
@@ -113,7 +113,7 @@ static __inline dst_t __truncXfYf2__(src_t a) {
     if (shift > srcSigBits) {
       absResult = 0;
     } else {
-      const bool sticky = significand << (srcBits - shift);
+      const bool sticky = (significand << (srcBits - shift)) != 0;
       src_rep_t denormalizedSignificand = significand >> shift | sticky;
       absResult = denormalizedSignificand >> (srcSigBits - dstSigBits);
       const src_rep_t roundBits = denormalizedSignificand & roundMask;
diff --git a/lib/builtins/subdf3.c b/lib/builtins/subdf3.c
index 5346dbc970fa2..2100fd39c4ef0 100644
--- a/lib/builtins/subdf3.c
+++ b/lib/builtins/subdf3.c
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements double-precision soft-float subtraction with the
-// IEEE-754 default rounding (to nearest, ties to even).
+// This file implements double-precision soft-float subtraction.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/subsf3.c b/lib/builtins/subsf3.c
index 85bde029b5bcd..ecfc24f7dd301 100644
--- a/lib/builtins/subsf3.c
+++ b/lib/builtins/subsf3.c
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements single-precision soft-float subtraction with the
-// IEEE-754 default rounding (to nearest, ties to even).
+// This file implements single-precision soft-float subtraction.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/subtf3.c b/lib/builtins/subtf3.c
index c96814692d2c3..3364c28f81792 100644
--- a/lib/builtins/subtf3.c
+++ b/lib/builtins/subtf3.c
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements quad-precision soft-float subtraction with the
-// IEEE-754 default rounding (to nearest, ties to even).
+// This file implements quad-precision soft-float subtraction.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/builtins/udivmoddi4.c b/lib/builtins/udivmoddi4.c
index 2914cc0fb46d5..5b297c32d7901 100644
--- a/lib/builtins/udivmoddi4.c
+++ b/lib/builtins/udivmoddi4.c
@@ -17,6 +17,13 @@
 
 // Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
 
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC throws a warning about mod 0 here, disable it for builds that
+// warn-as-error
+#pragma warning(push)
+#pragma warning(disable : 4724)
+#endif
+
 COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
   const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
   const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
@@ -187,3 +194,7 @@ COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
     *rem = r.all;
   return q.all;
 }
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(pop)
+#endif