diff options
Diffstat (limited to 'llvm/lib/Support')
158 files changed, 56424 insertions, 0 deletions
diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp new file mode 100644 index 0000000000000..6f1d6d50eee21 --- /dev/null +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -0,0 +1,215 @@ +//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise AArch64 hardware features +// such as FPU/CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/AArch64TargetParser.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include <cctype> + +using namespace llvm; + +static unsigned checkArchVersion(llvm::StringRef Arch) { + if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1])) + return (Arch[1] - 48); + return 0; +} + +unsigned AArch64::getDefaultFPU(StringRef CPU, AArch64::ArchKind AK) { + if (CPU == "generic") + return AArch64ARCHNames[static_cast<unsigned>(AK)].DefaultFPU; + + return StringSwitch<unsigned>(CPU) +#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, ARM::DEFAULT_FPU) +#include "../../include/llvm/Support/AArch64TargetParser.def" + .Default(ARM::FK_INVALID); +} + +unsigned AArch64::getDefaultExtensions(StringRef CPU, AArch64::ArchKind AK) { + if (CPU == "generic") + return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions; + + return StringSwitch<unsigned>(CPU) +#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, AArch64ARCHNames[static_cast<unsigned>(ArchKind::ID)] \ + .ArchBaseExtensions | \ + DEFAULT_EXT) +#include "../../include/llvm/Support/AArch64TargetParser.def" + .Default(AArch64::AEK_INVALID); +} + +AArch64::ArchKind AArch64::getCPUArchKind(StringRef CPU) { + if (CPU == "generic") + return ArchKind::ARMV8A; + + return StringSwitch<AArch64::ArchKind>(CPU) +#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, ArchKind::ID) +#include "../../include/llvm/Support/AArch64TargetParser.def" + .Default(ArchKind::INVALID); +} + +bool AArch64::getExtensionFeatures(unsigned Extensions, + std::vector<StringRef> &Features) { + if (Extensions == AArch64::AEK_INVALID) + return false; + + if (Extensions & AEK_FP) + Features.push_back("+fp-armv8"); + if (Extensions & AEK_SIMD) + Features.push_back("+neon"); + if (Extensions & AEK_CRC) + Features.push_back("+crc"); + if (Extensions & AEK_CRYPTO) + Features.push_back("+crypto"); + if (Extensions & AEK_DOTPROD) + Features.push_back("+dotprod"); + if (Extensions & AEK_FP16FML) + Features.push_back("+fp16fml"); + if (Extensions & AEK_FP16) + Features.push_back("+fullfp16"); + if (Extensions & AEK_PROFILE) + Features.push_back("+spe"); + if (Extensions & AEK_RAS) + Features.push_back("+ras"); + if (Extensions & AEK_LSE) + Features.push_back("+lse"); + if (Extensions & AEK_RDM) + Features.push_back("+rdm"); + if (Extensions & AEK_SVE) + Features.push_back("+sve"); + if (Extensions & AEK_SVE2) + Features.push_back("+sve2"); + if (Extensions & AEK_SVE2AES) + Features.push_back("+sve2-aes"); + if (Extensions & AEK_SVE2SM4) + Features.push_back("+sve2-sm4"); + if (Extensions & AEK_SVE2SHA3) + Features.push_back("+sve2-sha3"); + if (Extensions & AEK_SVE2BITPERM) + Features.push_back("+sve2-bitperm"); + if (Extensions & AEK_RCPC) + Features.push_back("+rcpc"); + + return true; +} + +bool AArch64::getArchFeatures(AArch64::ArchKind AK, + std::vector<StringRef> &Features) { + if (AK == ArchKind::ARMV8_1A) + Features.push_back("+v8.1a"); + if (AK == ArchKind::ARMV8_2A) + Features.push_back("+v8.2a"); + if (AK == ArchKind::ARMV8_3A) + Features.push_back("+v8.3a"); + if (AK == ArchKind::ARMV8_4A) + Features.push_back("+v8.4a"); + if (AK == ArchKind::ARMV8_5A) + Features.push_back("+v8.5a"); + + return AK != ArchKind::INVALID; +} + +StringRef AArch64::getArchName(AArch64::ArchKind AK) { + return AArch64ARCHNames[static_cast<unsigned>(AK)].getName(); +} + +StringRef AArch64::getCPUAttr(AArch64::ArchKind AK) { + return AArch64ARCHNames[static_cast<unsigned>(AK)].getCPUAttr(); +} + +StringRef AArch64::getSubArch(AArch64::ArchKind AK) { + return AArch64ARCHNames[static_cast<unsigned>(AK)].getSubArch(); +} + +unsigned AArch64::getArchAttr(AArch64::ArchKind AK) { + return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchAttr; +} + +StringRef AArch64::getArchExtName(unsigned ArchExtKind) { + for (const auto &AE : AArch64ARCHExtNames) + if (ArchExtKind == AE.ID) + return AE.getName(); + return StringRef(); +} + +StringRef AArch64::getArchExtFeature(StringRef ArchExt) { + if (ArchExt.startswith("no")) { + StringRef ArchExtBase(ArchExt.substr(2)); + for (const auto &AE : AArch64ARCHExtNames) { + if (AE.NegFeature && ArchExtBase == AE.getName()) + return StringRef(AE.NegFeature); + } + } + + for (const auto &AE : AArch64ARCHExtNames) + if (AE.Feature && ArchExt == AE.getName()) + return StringRef(AE.Feature); + return StringRef(); +} + +StringRef AArch64::getDefaultCPU(StringRef Arch) { + ArchKind AK = parseArch(Arch); + if (AK == ArchKind::INVALID) + return StringRef(); + + // Look for multiple AKs to find the default for pair AK+Name. + for (const auto &CPU : AArch64CPUNames) + if (CPU.ArchID == AK && CPU.Default) + return CPU.getName(); + + // If we can't find a default then target the architecture instead + return "generic"; +} + +void AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) { + for (const auto &Arch : AArch64CPUNames) { + if (Arch.ArchID != ArchKind::INVALID) + Values.push_back(Arch.getName()); + } +} + +bool AArch64::isX18ReservedByDefault(const Triple &TT) { + return TT.isAndroid() || TT.isOSDarwin() || TT.isOSFuchsia() || + TT.isOSWindows(); +} + +// Allows partial match, ex. "v8a" matches "armv8a". +AArch64::ArchKind AArch64::parseArch(StringRef Arch) { + Arch = ARM::getCanonicalArchName(Arch); + if (checkArchVersion(Arch) < 8) + return ArchKind::INVALID; + + StringRef Syn = ARM::getArchSynonym(Arch); + for (const auto A : AArch64ARCHNames) { + if (A.getName().endswith(Syn)) + return A.ID; + } + return ArchKind::INVALID; +} + +AArch64::ArchExtKind AArch64::parseArchExt(StringRef ArchExt) { + for (const auto A : AArch64ARCHExtNames) { + if (ArchExt == A.getName()) + return static_cast<ArchExtKind>(A.ID); + } + return AArch64::AEK_INVALID; +} + +AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) { + for (const auto C : AArch64CPUNames) { + if (CPU == C.getName()) + return C.ArchID; + } + return ArchKind::INVALID; +} diff --git a/llvm/lib/Support/ABIBreak.cpp b/llvm/lib/Support/ABIBreak.cpp new file mode 100644 index 0000000000000..247b635e02b8d --- /dev/null +++ b/llvm/lib/Support/ABIBreak.cpp @@ -0,0 +1,24 @@ +//===----- lib/Support/ABIBreak.cpp - EnableABIBreakingChecks -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/abi-breaking.h" + +#ifndef _MSC_VER +namespace llvm { + +// One of these two variables will be referenced by a symbol defined in +// llvm-config.h. We provide a link-time (or load time for DSO) failure when +// there is a mismatch in the build configuration of the API client and LLVM. +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +int EnableABIBreakingChecks; +#else +int DisableABIBreakingChecks; +#endif + +} // end namespace llvm +#endif diff --git a/llvm/lib/Support/AMDGPUMetadata.cpp b/llvm/lib/Support/AMDGPUMetadata.cpp new file mode 100644 index 0000000000000..5f8102299f47a --- /dev/null +++ b/llvm/lib/Support/AMDGPUMetadata.cpp @@ -0,0 +1,224 @@ +//===--- AMDGPUMetadata.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU metadata definitions and in-memory representations. +/// +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/AMDGPUMetadata.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::HSAMD; + +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { +namespace yaml { + +template <> +struct ScalarEnumerationTraits<AccessQualifier> { + static void enumeration(IO &YIO, AccessQualifier &EN) { + YIO.enumCase(EN, "Default", AccessQualifier::Default); + YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); + YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); + YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); + } +}; + +template <> +struct ScalarEnumerationTraits<AddressSpaceQualifier> { + static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { + YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); + YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); + YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); + YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); + YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); + YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); + } +}; + +template <> +struct ScalarEnumerationTraits<ValueKind> { + static void enumeration(IO &YIO, ValueKind &EN) { + YIO.enumCase(EN, "ByValue", ValueKind::ByValue); + YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); + YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); + YIO.enumCase(EN, "Sampler", ValueKind::Sampler); + YIO.enumCase(EN, "Image", ValueKind::Image); + YIO.enumCase(EN, "Pipe", ValueKind::Pipe); + YIO.enumCase(EN, "Queue", ValueKind::Queue); + YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); + YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); + YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); + YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); + YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); + YIO.enumCase(EN, "HiddenCompletionAction", + ValueKind::HiddenCompletionAction); + YIO.enumCase(EN, "HiddenMultiGridSyncArg", + ValueKind::HiddenMultiGridSyncArg); + } +}; + +template <> +struct ScalarEnumerationTraits<ValueType> { + static void enumeration(IO &YIO, ValueType &EN) { + YIO.enumCase(EN, "Struct", ValueType::Struct); + YIO.enumCase(EN, "I8", ValueType::I8); + YIO.enumCase(EN, "U8", ValueType::U8); + YIO.enumCase(EN, "I16", ValueType::I16); + YIO.enumCase(EN, "U16", ValueType::U16); + YIO.enumCase(EN, "F16", ValueType::F16); + YIO.enumCase(EN, "I32", ValueType::I32); + YIO.enumCase(EN, "U32", ValueType::U32); + YIO.enumCase(EN, "F32", ValueType::F32); + YIO.enumCase(EN, "I64", ValueType::I64); + YIO.enumCase(EN, "U64", ValueType::U64); + YIO.enumCase(EN, "F64", ValueType::F64); + } +}; + +template <> +struct MappingTraits<Kernel::Attrs::Metadata> { + static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { + YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, + MD.mReqdWorkGroupSize, std::vector<uint32_t>()); + YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, + MD.mWorkGroupSizeHint, std::vector<uint32_t>()); + YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, + MD.mVecTypeHint, std::string()); + YIO.mapOptional(Kernel::Attrs::Key::RuntimeHandle, MD.mRuntimeHandle, + std::string()); + } +}; + +template <> +struct MappingTraits<Kernel::Arg::Metadata> { + static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); + YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); + YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); + YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); + YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, + uint32_t(0)); + YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, + AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::ActualAccQual, MD.mActualAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); + YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); + YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); + } +}; + +template <> +struct MappingTraits<Kernel::CodeProps::Metadata> { + static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { + YIO.mapRequired(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize); + YIO.mapRequired(Kernel::CodeProps::Key::GroupSegmentFixedSize, + MD.mGroupSegmentFixedSize); + YIO.mapRequired(Kernel::CodeProps::Key::PrivateSegmentFixedSize, + MD.mPrivateSegmentFixedSize); + YIO.mapRequired(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign); + YIO.mapRequired(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize); + YIO.mapOptional(Kernel::CodeProps::Key::NumSGPRs, + MD.mNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::NumVGPRs, + MD.mNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::MaxFlatWorkGroupSize, + MD.mMaxFlatWorkGroupSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::IsDynamicCallStack, + MD.mIsDynamicCallStack, false); + YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled, + MD.mIsXNACKEnabled, false); + YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs, + MD.mNumSpilledSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs, + MD.mNumSpilledVGPRs, uint16_t(0)); + } +}; + +template <> +struct MappingTraits<Kernel::DebugProps::Metadata> { + static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { + YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, + MD.mDebuggerABIVersion, std::vector<uint32_t>()); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, + MD.mReservedNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, + MD.mReservedFirstVGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, + MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, + MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); + } +}; + +template <> +struct MappingTraits<Kernel::Metadata> { + static void mapping(IO &YIO, Kernel::Metadata &MD) { + YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapRequired(Kernel::Key::SymbolName, MD.mSymbolName); + YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); + YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, + std::vector<uint32_t>()); + if (!MD.mAttrs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); + if (!MD.mArgs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + if (!MD.mCodeProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); + if (!MD.mDebugProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); + } +}; + +template <> +struct MappingTraits<HSAMD::Metadata> { + static void mapping(IO &YIO, HSAMD::Metadata &MD) { + YIO.mapRequired(Key::Version, MD.mVersion); + YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>()); + if (!MD.mKernels.empty() || !YIO.outputting()) + YIO.mapOptional(Key::Kernels, MD.mKernels); + } +}; + +} // end namespace yaml + +namespace AMDGPU { +namespace HSAMD { + +std::error_code fromString(std::string String, Metadata &HSAMetadata) { + yaml::Input YamlInput(String); + YamlInput >> HSAMetadata; + return YamlInput.error(); +} + +std::error_code toString(Metadata HSAMetadata, std::string &String) { + raw_string_ostream YamlStream(String); + yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max()); + YamlOutput << HSAMetadata; + return std::error_code(); +} + +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp new file mode 100644 index 0000000000000..b79baf1834a78 --- /dev/null +++ b/llvm/lib/Support/APFloat.cpp @@ -0,0 +1,4562 @@ +//===-- APFloat.cpp - Implement APFloat class -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a class to represent arbitrary precision floating +// point values and provide a variety of arithmetic operations on them. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> +#include <limits.h> + +#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ + do { \ + if (usesLayout<IEEEFloat>(getSemantics())) \ + return U.IEEE.METHOD_CALL; \ + if (usesLayout<DoubleAPFloat>(getSemantics())) \ + return U.Double.METHOD_CALL; \ + llvm_unreachable("Unexpected semantics"); \ + } while (false) + +using namespace llvm; + +/// A macro used to combine two fcCategory enums into one key which can be used +/// in a switch statement to classify how the interaction of two APFloat's +/// categories affects an operation. +/// +/// TODO: If clang source code is ever allowed to use constexpr in its own +/// codebase, change this into a static inline function. +#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) + +/* Assumed in hexadecimal significand parsing, and conversion to + hexadecimal strings. */ +static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); + +namespace llvm { + /* Represents floating point arithmetic semantics. */ + struct fltSemantics { + /* The largest E such that 2^E is representable; this matches the + definition of IEEE 754. */ + APFloatBase::ExponentType maxExponent; + + /* The smallest E such that 2^E is a normalized number; this + matches the definition of IEEE 754. */ + APFloatBase::ExponentType minExponent; + + /* Number of bits in the significand. This includes the integer + bit. */ + unsigned int precision; + + /* Number of bits actually used in the semantics. */ + unsigned int sizeInBits; + }; + + static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; + static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; + static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; + static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; + static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; + static const fltSemantics semBogus = {0, 0, 0, 0}; + + /* The IBM double-double semantics. Such a number consists of a pair of IEEE + 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, + (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. + Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent + to each other, and two 11-bit exponents. + + Note: we need to make the value different from semBogus as otherwise + an unsafe optimization may collapse both values to a single address, + and we heavily rely on them having distinct addresses. */ + static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0}; + + /* These are legacy semantics for the fallback, inaccrurate implementation of + IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the + operation. It's equivalent to having an IEEE number with consecutive 106 + bits of mantissa and 11 bits of exponent. + + It's not equivalent to IBM double-double. For example, a legit IBM + double-double, 1 + epsilon: + + 1 + epsilon = 1 + (1 >> 1076) + + is not representable by a consecutive 106 bits of mantissa. + + Currently, these semantics are used in the following way: + + semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> + (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> + semPPCDoubleDoubleLegacy -> IEEE operations + + We use bitcastToAPInt() to get the bit representation (in APInt) of the + underlying IEEEdouble, then use the APInt constructor to construct the + legacy IEEE float. + + TODO: Implement all operations in semPPCDoubleDouble, and delete these + semantics. */ + static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, + 53 + 53, 128}; + + const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { + switch (S) { + case S_IEEEhalf: + return IEEEhalf(); + case S_IEEEsingle: + return IEEEsingle(); + case S_IEEEdouble: + return IEEEdouble(); + case S_x87DoubleExtended: + return x87DoubleExtended(); + case S_IEEEquad: + return IEEEquad(); + case S_PPCDoubleDouble: + return PPCDoubleDouble(); + } + llvm_unreachable("Unrecognised floating semantics"); + } + + APFloatBase::Semantics + APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { + if (&Sem == &llvm::APFloat::IEEEhalf()) + return S_IEEEhalf; + else if (&Sem == &llvm::APFloat::IEEEsingle()) + return S_IEEEsingle; + else if (&Sem == &llvm::APFloat::IEEEdouble()) + return S_IEEEdouble; + else if (&Sem == &llvm::APFloat::x87DoubleExtended()) + return S_x87DoubleExtended; + else if (&Sem == &llvm::APFloat::IEEEquad()) + return S_IEEEquad; + else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) + return S_PPCDoubleDouble; + else + llvm_unreachable("Unknown floating semantics"); + } + + const fltSemantics &APFloatBase::IEEEhalf() { + return semIEEEhalf; + } + const fltSemantics &APFloatBase::IEEEsingle() { + return semIEEEsingle; + } + const fltSemantics &APFloatBase::IEEEdouble() { + return semIEEEdouble; + } + const fltSemantics &APFloatBase::IEEEquad() { + return semIEEEquad; + } + const fltSemantics &APFloatBase::x87DoubleExtended() { + return semX87DoubleExtended; + } + const fltSemantics &APFloatBase::Bogus() { + return semBogus; + } + const fltSemantics &APFloatBase::PPCDoubleDouble() { + return semPPCDoubleDouble; + } + + /* A tight upper bound on number of parts required to hold the value + pow(5, power) is + + power * 815 / (351 * integerPartWidth) + 1 + + However, whilst the result may require only this many parts, + because we are multiplying two values to get it, the + multiplication may require an extra part with the excess part + being zero (consider the trivial case of 1 * 1, tcFullMultiply + requires two parts to hold the single-part result). So we add an + extra one to guarantee enough space whilst multiplying. */ + const unsigned int maxExponent = 16383; + const unsigned int maxPrecision = 113; + const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; + const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); + + unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { + return semantics.precision; + } + APFloatBase::ExponentType + APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { + return semantics.maxExponent; + } + APFloatBase::ExponentType + APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { + return semantics.minExponent; + } + unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { + return semantics.sizeInBits; + } + + unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { + return Sem.sizeInBits; +} + +/* A bunch of private, handy routines. */ + +static inline unsigned int +partCountForBits(unsigned int bits) +{ + return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; +} + +/* Returns 0U-9U. Return values >= 10U are not digits. */ +static inline unsigned int +decDigitValue(unsigned int c) +{ + return c - '0'; +} + +/* Return the value of a decimal exponent of the form + [+-]ddddddd. + + If the exponent overflows, returns a large exponent with the + appropriate sign. */ +static int +readExponent(StringRef::iterator begin, StringRef::iterator end) +{ + bool isNegative; + unsigned int absExponent; + const unsigned int overlargeExponent = 24000; /* FIXME. */ + StringRef::iterator p = begin; + + // Treat no exponent as 0 to match binutils + if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { + return 0; + } + + isNegative = (*p == '-'); + if (*p == '-' || *p == '+') { + p++; + assert(p != end && "Exponent has no digits"); + } + + absExponent = decDigitValue(*p++); + assert(absExponent < 10U && "Invalid character in exponent"); + + for (; p != end; ++p) { + unsigned int value; + + value = decDigitValue(*p); + assert(value < 10U && "Invalid character in exponent"); + + value += absExponent * 10; + if (absExponent >= overlargeExponent) { + absExponent = overlargeExponent; + p = end; /* outwit assert below */ + break; + } + absExponent = value; + } + + assert(p == end && "Invalid exponent in exponent"); + + if (isNegative) + return -(int) absExponent; + else + return (int) absExponent; +} + +/* This is ugly and needs cleaning up, but I don't immediately see + how whilst remaining safe. */ +static int +totalExponent(StringRef::iterator p, StringRef::iterator end, + int exponentAdjustment) +{ + int unsignedExponent; + bool negative, overflow; + int exponent = 0; + + assert(p != end && "Exponent has no digits"); + + negative = *p == '-'; + if (*p == '-' || *p == '+') { + p++; + assert(p != end && "Exponent has no digits"); + } + + unsignedExponent = 0; + overflow = false; + for (; p != end; ++p) { + unsigned int value; + + value = decDigitValue(*p); + assert(value < 10U && "Invalid character in exponent"); + + unsignedExponent = unsignedExponent * 10 + value; + if (unsignedExponent > 32767) { + overflow = true; + break; + } + } + + if (exponentAdjustment > 32767 || exponentAdjustment < -32768) + overflow = true; + + if (!overflow) { + exponent = unsignedExponent; + if (negative) + exponent = -exponent; + exponent += exponentAdjustment; + if (exponent > 32767 || exponent < -32768) + overflow = true; + } + + if (overflow) + exponent = negative ? -32768: 32767; + + return exponent; +} + +static StringRef::iterator +skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, + StringRef::iterator *dot) +{ + StringRef::iterator p = begin; + *dot = end; + while (p != end && *p == '0') + p++; + + if (p != end && *p == '.') { + *dot = p++; + + assert(end - begin != 1 && "Significand has no digits"); + + while (p != end && *p == '0') + p++; + } + + return p; +} + +/* Given a normal decimal floating point number of the form + + dddd.dddd[eE][+-]ddd + + where the decimal point and exponent are optional, fill out the + structure D. Exponent is appropriate if the significand is + treated as an integer, and normalizedExponent if the significand + is taken to have the decimal point after a single leading + non-zero digit. + + If the value is zero, V->firstSigDigit points to a non-digit, and + the return exponent is zero. +*/ +struct decimalInfo { + const char *firstSigDigit; + const char *lastSigDigit; + int exponent; + int normalizedExponent; +}; + +static void +interpretDecimal(StringRef::iterator begin, StringRef::iterator end, + decimalInfo *D) +{ + StringRef::iterator dot = end; + StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); + + D->firstSigDigit = p; + D->exponent = 0; + D->normalizedExponent = 0; + + for (; p != end; ++p) { + if (*p == '.') { + assert(dot == end && "String contains multiple dots"); + dot = p++; + if (p == end) + break; + } + if (decDigitValue(*p) >= 10U) + break; + } + + if (p != end) { + assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); + + /* p points to the first non-digit in the string */ + D->exponent = readExponent(p + 1, end); + + /* Implied decimal point? */ + if (dot == end) + dot = p; + } + + /* If number is all zeroes accept any exponent. */ + if (p != D->firstSigDigit) { + /* Drop insignificant trailing zeroes. */ + if (p != begin) { + do + do + p--; + while (p != begin && *p == '0'); + while (p != begin && *p == '.'); + } + + /* Adjust the exponents for any decimal point. */ + D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); + D->normalizedExponent = (D->exponent + + static_cast<APFloat::ExponentType>((p - D->firstSigDigit) + - (dot > D->firstSigDigit && dot < p))); + } + + D->lastSigDigit = p; +} + +/* Return the trailing fraction of a hexadecimal number. + DIGITVALUE is the first hex digit of the fraction, P points to + the next digit. */ +static lostFraction +trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, + unsigned int digitValue) +{ + unsigned int hexDigit; + + /* If the first trailing digit isn't 0 or 8 we can work out the + fraction immediately. */ + if (digitValue > 8) + return lfMoreThanHalf; + else if (digitValue < 8 && digitValue > 0) + return lfLessThanHalf; + + // Otherwise we need to find the first non-zero digit. + while (p != end && (*p == '0' || *p == '.')) + p++; + + assert(p != end && "Invalid trailing hexadecimal fraction!"); + + hexDigit = hexDigitValue(*p); + + /* If we ran off the end it is exactly zero or one-half, otherwise + a little more. */ + if (hexDigit == -1U) + return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; + else + return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; +} + +/* Return the fraction lost were a bignum truncated losing the least + significant BITS bits. */ +static lostFraction +lostFractionThroughTruncation(const APFloatBase::integerPart *parts, + unsigned int partCount, + unsigned int bits) +{ + unsigned int lsb; + + lsb = APInt::tcLSB(parts, partCount); + + /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ + if (bits <= lsb) + return lfExactlyZero; + if (bits == lsb + 1) + return lfExactlyHalf; + if (bits <= partCount * APFloatBase::integerPartWidth && + APInt::tcExtractBit(parts, bits - 1)) + return lfMoreThanHalf; + + return lfLessThanHalf; +} + +/* Shift DST right BITS bits noting lost fraction. */ +static lostFraction +shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) +{ + lostFraction lost_fraction; + + lost_fraction = lostFractionThroughTruncation(dst, parts, bits); + + APInt::tcShiftRight(dst, parts, bits); + + return lost_fraction; +} + +/* Combine the effect of two lost fractions. */ +static lostFraction +combineLostFractions(lostFraction moreSignificant, + lostFraction lessSignificant) +{ + if (lessSignificant != lfExactlyZero) { + if (moreSignificant == lfExactlyZero) + moreSignificant = lfLessThanHalf; + else if (moreSignificant == lfExactlyHalf) + moreSignificant = lfMoreThanHalf; + } + + return moreSignificant; +} + +/* The error from the true value, in half-ulps, on multiplying two + floating point numbers, which differ from the value they + approximate by at most HUE1 and HUE2 half-ulps, is strictly less + than the returned value. + + See "How to Read Floating Point Numbers Accurately" by William D + Clinger. */ +static unsigned int +HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) +{ + assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); + + if (HUerr1 + HUerr2 == 0) + return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ + else + return inexactMultiply + 2 * (HUerr1 + HUerr2); +} + +/* The number of ulps from the boundary (zero, or half if ISNEAREST) + when the least significant BITS are truncated. BITS cannot be + zero. */ +static APFloatBase::integerPart +ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, + bool isNearest) { + unsigned int count, partBits; + APFloatBase::integerPart part, boundary; + + assert(bits != 0); + + bits--; + count = bits / APFloatBase::integerPartWidth; + partBits = bits % APFloatBase::integerPartWidth + 1; + + part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); + + if (isNearest) + boundary = (APFloatBase::integerPart) 1 << (partBits - 1); + else + boundary = 0; + + if (count == 0) { + if (part - boundary <= boundary - part) + return part - boundary; + else + return boundary - part; + } + + if (part == boundary) { + while (--count) + if (parts[count]) + return ~(APFloatBase::integerPart) 0; /* A lot. */ + + return parts[0]; + } else if (part == boundary - 1) { + while (--count) + if (~parts[count]) + return ~(APFloatBase::integerPart) 0; /* A lot. */ + + return -parts[0]; + } + + return ~(APFloatBase::integerPart) 0; /* A lot. */ +} + +/* Place pow(5, power) in DST, and return the number of parts used. + DST must be at least one part larger than size of the answer. */ +static unsigned int +powerOf5(APFloatBase::integerPart *dst, unsigned int power) { + static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; + APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; + pow5s[0] = 78125 * 5; + + unsigned int partsCount[16] = { 1 }; + APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; + unsigned int result; + assert(power <= maxExponent); + + p1 = dst; + p2 = scratch; + + *p1 = firstEightPowers[power & 7]; + power >>= 3; + + result = 1; + pow5 = pow5s; + + for (unsigned int n = 0; power; power >>= 1, n++) { + unsigned int pc; + + pc = partsCount[n]; + + /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ + if (pc == 0) { + pc = partsCount[n - 1]; + APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); + pc *= 2; + if (pow5[pc - 1] == 0) + pc--; + partsCount[n] = pc; + } + + if (power & 1) { + APFloatBase::integerPart *tmp; + + APInt::tcFullMultiply(p2, p1, pow5, result, pc); + result += pc; + if (p2[result - 1] == 0) + result--; + + /* Now result is in p1 with partsCount parts and p2 is scratch + space. */ + tmp = p1; + p1 = p2; + p2 = tmp; + } + + pow5 += pc; + } + + if (p1 != dst) + APInt::tcAssign(dst, p1, result); + + return result; +} + +/* Zero at the end to avoid modular arithmetic when adding one; used + when rounding up during hexadecimal output. */ +static const char hexDigitsLower[] = "0123456789abcdef0"; +static const char hexDigitsUpper[] = "0123456789ABCDEF0"; +static const char infinityL[] = "infinity"; +static const char infinityU[] = "INFINITY"; +static const char NaNL[] = "nan"; +static const char NaNU[] = "NAN"; + +/* Write out an integerPart in hexadecimal, starting with the most + significant nibble. Write out exactly COUNT hexdigits, return + COUNT. */ +static unsigned int +partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, + const char *hexDigitChars) +{ + unsigned int result = count; + + assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); + + part >>= (APFloatBase::integerPartWidth - 4 * count); + while (count--) { + dst[count] = hexDigitChars[part & 0xf]; + part >>= 4; + } + + return result; +} + +/* Write out an unsigned decimal integer. */ +static char * +writeUnsignedDecimal (char *dst, unsigned int n) +{ + char buff[40], *p; + + p = buff; + do + *p++ = '0' + n % 10; + while (n /= 10); + + do + *dst++ = *--p; + while (p != buff); + + return dst; +} + +/* Write out a signed decimal integer. */ +static char * +writeSignedDecimal (char *dst, int value) +{ + if (value < 0) { + *dst++ = '-'; + dst = writeUnsignedDecimal(dst, -(unsigned) value); + } else + dst = writeUnsignedDecimal(dst, value); + + return dst; +} + +namespace detail { +/* Constructors. */ +void IEEEFloat::initialize(const fltSemantics *ourSemantics) { + unsigned int count; + + semantics = ourSemantics; + count = partCount(); + if (count > 1) + significand.parts = new integerPart[count]; +} + +void IEEEFloat::freeSignificand() { + if (needsCleanup()) + delete [] significand.parts; +} + +void IEEEFloat::assign(const IEEEFloat &rhs) { + assert(semantics == rhs.semantics); + + sign = rhs.sign; + category = rhs.category; + exponent = rhs.exponent; + if (isFiniteNonZero() || category == fcNaN) + copySignificand(rhs); +} + +void IEEEFloat::copySignificand(const IEEEFloat &rhs) { + assert(isFiniteNonZero() || category == fcNaN); + assert(rhs.partCount() >= partCount()); + + APInt::tcAssign(significandParts(), rhs.significandParts(), + partCount()); +} + +/* Make this number a NaN, with an arbitrary but deterministic value + for the significand. If double or longer, this is a signalling NaN, + which may not be ideal. If float, this is QNaN(0). */ +void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { + category = fcNaN; + sign = Negative; + + integerPart *significand = significandParts(); + unsigned numParts = partCount(); + + // Set the significand bits to the fill. + if (!fill || fill->getNumWords() < numParts) + APInt::tcSet(significand, 0, numParts); + if (fill) { + APInt::tcAssign(significand, fill->getRawData(), + std::min(fill->getNumWords(), numParts)); + + // Zero out the excess bits of the significand. + unsigned bitsToPreserve = semantics->precision - 1; + unsigned part = bitsToPreserve / 64; + bitsToPreserve %= 64; + significand[part] &= ((1ULL << bitsToPreserve) - 1); + for (part++; part != numParts; ++part) + significand[part] = 0; + } + + unsigned QNaNBit = semantics->precision - 2; + + if (SNaN) { + // We always have to clear the QNaN bit to make it an SNaN. + APInt::tcClearBit(significand, QNaNBit); + + // If there are no bits set in the payload, we have to set + // *something* to make it a NaN instead of an infinity; + // conventionally, this is the next bit down from the QNaN bit. + if (APInt::tcIsZero(significand, numParts)) + APInt::tcSetBit(significand, QNaNBit - 1); + } else { + // We always have to set the QNaN bit to make it a QNaN. + APInt::tcSetBit(significand, QNaNBit); + } + + // For x87 extended precision, we want to make a NaN, not a + // pseudo-NaN. Maybe we should expose the ability to make + // pseudo-NaNs? + if (semantics == &semX87DoubleExtended) + APInt::tcSetBit(significand, QNaNBit + 1); +} + +IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { + if (this != &rhs) { + if (semantics != rhs.semantics) { + freeSignificand(); + initialize(rhs.semantics); + } + assign(rhs); + } + + return *this; +} + +IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { + freeSignificand(); + + semantics = rhs.semantics; + significand = rhs.significand; + exponent = rhs.exponent; + category = rhs.category; + sign = rhs.sign; + + rhs.semantics = &semBogus; + return *this; +} + +bool IEEEFloat::isDenormal() const { + return isFiniteNonZero() && (exponent == semantics->minExponent) && + (APInt::tcExtractBit(significandParts(), + semantics->precision - 1) == 0); +} + +bool IEEEFloat::isSmallest() const { + // The smallest number by magnitude in our format will be the smallest + // denormal, i.e. the floating point number with exponent being minimum + // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). + return isFiniteNonZero() && exponent == semantics->minExponent && + significandMSB() == 0; +} + +bool IEEEFloat::isSignificandAllOnes() const { + // Test if the significand excluding the integral bit is all ones. This allows + // us to test for binade boundaries. + const integerPart *Parts = significandParts(); + const unsigned PartCount = partCount(); + for (unsigned i = 0; i < PartCount - 1; i++) + if (~Parts[i]) + return false; + + // Set the unused high bits to all ones when we compare. + const unsigned NumHighBits = + PartCount*integerPartWidth - semantics->precision + 1; + assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + "fill than integerPartWidth"); + const integerPart HighBitFill = + ~integerPart(0) << (integerPartWidth - NumHighBits); + if (~(Parts[PartCount - 1] | HighBitFill)) + return false; + + return true; +} + +bool IEEEFloat::isSignificandAllZeros() const { + // Test if the significand excluding the integral bit is all zeros. This + // allows us to test for binade boundaries. + const integerPart *Parts = significandParts(); + const unsigned PartCount = partCount(); + + for (unsigned i = 0; i < PartCount - 1; i++) + if (Parts[i]) + return false; + + const unsigned NumHighBits = + PartCount*integerPartWidth - semantics->precision + 1; + assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + "clear than integerPartWidth"); + const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; + + if (Parts[PartCount - 1] & HighBitMask) + return false; + + return true; +} + +bool IEEEFloat::isLargest() const { + // The largest number by magnitude in our format will be the floating point + // number with maximum exponent and with significand that is all ones. + return isFiniteNonZero() && exponent == semantics->maxExponent + && isSignificandAllOnes(); +} + +bool IEEEFloat::isInteger() const { + // This could be made more efficient; I'm going for obviously correct. + if (!isFinite()) return false; + IEEEFloat truncated = *this; + truncated.roundToIntegral(rmTowardZero); + return compare(truncated) == cmpEqual; +} + +bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { + if (this == &rhs) + return true; + if (semantics != rhs.semantics || + category != rhs.category || + sign != rhs.sign) + return false; + if (category==fcZero || category==fcInfinity) + return true; + + if (isFiniteNonZero() && exponent != rhs.exponent) + return false; + + return std::equal(significandParts(), significandParts() + partCount(), + rhs.significandParts()); +} + +IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { + initialize(&ourSemantics); + sign = 0; + category = fcNormal; + zeroSignificand(); + exponent = ourSemantics.precision - 1; + significandParts()[0] = value; + normalize(rmNearestTiesToEven, lfExactlyZero); +} + +IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { + initialize(&ourSemantics); + category = fcZero; + sign = false; +} + +// Delegate to the previous constructor, because later copy constructor may +// actually inspects category, which can't be garbage. +IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) + : IEEEFloat(ourSemantics) {} + +IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { + initialize(rhs.semantics); + assign(rhs); +} + +IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { + *this = std::move(rhs); +} + +IEEEFloat::~IEEEFloat() { freeSignificand(); } + +unsigned int IEEEFloat::partCount() const { + return partCountForBits(semantics->precision + 1); +} + +const IEEEFloat::integerPart *IEEEFloat::significandParts() const { + return const_cast<IEEEFloat *>(this)->significandParts(); +} + +IEEEFloat::integerPart *IEEEFloat::significandParts() { + if (partCount() > 1) + return significand.parts; + else + return &significand.part; +} + +void IEEEFloat::zeroSignificand() { + APInt::tcSet(significandParts(), 0, partCount()); +} + +/* Increment an fcNormal floating point number's significand. */ +void IEEEFloat::incrementSignificand() { + integerPart carry; + + carry = APInt::tcIncrement(significandParts(), partCount()); + + /* Our callers should never cause us to overflow. */ + assert(carry == 0); + (void)carry; +} + +/* Add the significand of the RHS. Returns the carry flag. */ +IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { + integerPart *parts; + + parts = significandParts(); + + assert(semantics == rhs.semantics); + assert(exponent == rhs.exponent); + + return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); +} + +/* Subtract the significand of the RHS with a borrow flag. Returns + the borrow flag. */ +IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, + integerPart borrow) { + integerPart *parts; + + parts = significandParts(); + + assert(semantics == rhs.semantics); + assert(exponent == rhs.exponent); + + return APInt::tcSubtract(parts, rhs.significandParts(), borrow, + partCount()); +} + +/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it + on to the full-precision result of the multiplication. Returns the + lost fraction. */ +lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, + const IEEEFloat *addend) { + unsigned int omsb; // One, not zero, based MSB. + unsigned int partsCount, newPartsCount, precision; + integerPart *lhsSignificand; + integerPart scratch[4]; + integerPart *fullSignificand; + lostFraction lost_fraction; + bool ignored; + + assert(semantics == rhs.semantics); + + precision = semantics->precision; + + // Allocate space for twice as many bits as the original significand, plus one + // extra bit for the addition to overflow into. + newPartsCount = partCountForBits(precision * 2 + 1); + + if (newPartsCount > 4) + fullSignificand = new integerPart[newPartsCount]; + else + fullSignificand = scratch; + + lhsSignificand = significandParts(); + partsCount = partCount(); + + APInt::tcFullMultiply(fullSignificand, lhsSignificand, + rhs.significandParts(), partsCount, partsCount); + + lost_fraction = lfExactlyZero; + omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; + exponent += rhs.exponent; + + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // *this = a23 . a22 ... a0 * 2^e1 + // rhs = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are three significant bits at the left-hand side of the + // radix point: two for the multiplication, and an overflow bit for the + // addition (that will always be zero at this point). Move the radix point + // toward left by two bits, and adjust exponent accordingly. + exponent += 2; + + if (addend && addend->isNonZero()) { + // The intermediate result of the multiplication has "2 * precision" + // signicant bit; adjust the addend to be consistent with mul result. + // + Significand savedSignificand = significand; + const fltSemantics *savedSemantics = semantics; + fltSemantics extendedSemantics; + opStatus status; + unsigned int extendedPrecision; + + // Normalize our MSB to one below the top bit to allow for overflow. + extendedPrecision = 2 * precision + 1; + if (omsb != extendedPrecision - 1) { + assert(extendedPrecision > omsb); + APInt::tcShiftLeft(fullSignificand, newPartsCount, + (extendedPrecision - 1) - omsb); + exponent -= (extendedPrecision - 1) - omsb; + } + + /* Create new semantics. */ + extendedSemantics = *semantics; + extendedSemantics.precision = extendedPrecision; + + if (newPartsCount == 1) + significand.part = fullSignificand[0]; + else + significand.parts = fullSignificand; + semantics = &extendedSemantics; + + IEEEFloat extendedAddend(*addend); + status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); + assert(status == opOK); + (void)status; + + // Shift the significand of the addend right by one bit. This guarantees + // that the high bit of the significand is zero (same as fullSignificand), + // so the addition will overflow (if it does overflow at all) into the top bit. + lost_fraction = extendedAddend.shiftSignificandRight(1); + assert(lost_fraction == lfExactlyZero && + "Lost precision while shifting addend for fused-multiply-add."); + + lost_fraction = addOrSubtractSignificand(extendedAddend, false); + + /* Restore our state. */ + if (newPartsCount == 1) + fullSignificand[0] = significand.part; + significand = savedSignificand; + semantics = savedSemantics; + + omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; + } + + // Convert the result having "2 * precision" significant-bits back to the one + // having "precision" significant-bits. First, move the radix point from + // poision "2*precision - 1" to "precision - 1". The exponent need to be + // adjusted by "2*precision - 1" - "precision - 1" = "precision". + exponent -= precision + 1; + + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e. "MSB . rest-significant-bits"). + // + // Note that the result is not normalized when "omsb < precision". So, the + // caller needs to call IEEEFloat::normalize() if normalized value is + // expected. + if (omsb > precision) { + unsigned int bits, significantParts; + lostFraction lf; + + bits = omsb - precision; + significantParts = partCountForBits(omsb); + lf = shiftRight(fullSignificand, significantParts, bits); + lost_fraction = combineLostFractions(lf, lost_fraction); + exponent += bits; + } + + APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); + + if (newPartsCount > 4) + delete [] fullSignificand; + + return lost_fraction; +} + +/* Multiply the significands of LHS and RHS to DST. */ +lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { + unsigned int bit, i, partsCount; + const integerPart *rhsSignificand; + integerPart *lhsSignificand, *dividend, *divisor; + integerPart scratch[4]; + lostFraction lost_fraction; + + assert(semantics == rhs.semantics); + + lhsSignificand = significandParts(); + rhsSignificand = rhs.significandParts(); + partsCount = partCount(); + + if (partsCount > 2) + dividend = new integerPart[partsCount * 2]; + else + dividend = scratch; + + divisor = dividend + partsCount; + + /* Copy the dividend and divisor as they will be modified in-place. */ + for (i = 0; i < partsCount; i++) { + dividend[i] = lhsSignificand[i]; + divisor[i] = rhsSignificand[i]; + lhsSignificand[i] = 0; + } + + exponent -= rhs.exponent; + + unsigned int precision = semantics->precision; + + /* Normalize the divisor. */ + bit = precision - APInt::tcMSB(divisor, partsCount) - 1; + if (bit) { + exponent += bit; + APInt::tcShiftLeft(divisor, partsCount, bit); + } + + /* Normalize the dividend. */ + bit = precision - APInt::tcMSB(dividend, partsCount) - 1; + if (bit) { + exponent -= bit; + APInt::tcShiftLeft(dividend, partsCount, bit); + } + + /* Ensure the dividend >= divisor initially for the loop below. + Incidentally, this means that the division loop below is + guaranteed to set the integer bit to one. */ + if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { + exponent--; + APInt::tcShiftLeft(dividend, partsCount, 1); + assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); + } + + /* Long division. */ + for (bit = precision; bit; bit -= 1) { + if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { + APInt::tcSubtract(dividend, divisor, 0, partsCount); + APInt::tcSetBit(lhsSignificand, bit - 1); + } + + APInt::tcShiftLeft(dividend, partsCount, 1); + } + + /* Figure out the lost fraction. */ + int cmp = APInt::tcCompare(dividend, divisor, partsCount); + + if (cmp > 0) + lost_fraction = lfMoreThanHalf; + else if (cmp == 0) + lost_fraction = lfExactlyHalf; + else if (APInt::tcIsZero(dividend, partsCount)) + lost_fraction = lfExactlyZero; + else + lost_fraction = lfLessThanHalf; + + if (partsCount > 2) + delete [] dividend; + + return lost_fraction; +} + +unsigned int IEEEFloat::significandMSB() const { + return APInt::tcMSB(significandParts(), partCount()); +} + +unsigned int IEEEFloat::significandLSB() const { + return APInt::tcLSB(significandParts(), partCount()); +} + +/* Note that a zero result is NOT normalized to fcZero. */ +lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { + /* Our exponent should not overflow. */ + assert((ExponentType) (exponent + bits) >= exponent); + + exponent += bits; + + return shiftRight(significandParts(), partCount(), bits); +} + +/* Shift the significand left BITS bits, subtract BITS from its exponent. */ +void IEEEFloat::shiftSignificandLeft(unsigned int bits) { + assert(bits < semantics->precision); + + if (bits) { + unsigned int partsCount = partCount(); + + APInt::tcShiftLeft(significandParts(), partsCount, bits); + exponent -= bits; + + assert(!APInt::tcIsZero(significandParts(), partsCount)); + } +} + +IEEEFloat::cmpResult +IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { + int compare; + + assert(semantics == rhs.semantics); + assert(isFiniteNonZero()); + assert(rhs.isFiniteNonZero()); + + compare = exponent - rhs.exponent; + + /* If exponents are equal, do an unsigned bignum comparison of the + significands. */ + if (compare == 0) + compare = APInt::tcCompare(significandParts(), rhs.significandParts(), + partCount()); + + if (compare > 0) + return cmpGreaterThan; + else if (compare < 0) + return cmpLessThan; + else + return cmpEqual; +} + +/* Handle overflow. Sign is preserved. We either become infinity or + the largest finite number. */ +IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { + /* Infinity? */ + if (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway || + (rounding_mode == rmTowardPositive && !sign) || + (rounding_mode == rmTowardNegative && sign)) { + category = fcInfinity; + return (opStatus) (opOverflow | opInexact); + } + + /* Otherwise we become the largest finite number. */ + category = fcNormal; + exponent = semantics->maxExponent; + APInt::tcSetLeastSignificantBits(significandParts(), partCount(), + semantics->precision); + + return opInexact; +} + +/* Returns TRUE if, when truncating the current number, with BIT the + new LSB, with the given lost fraction and rounding mode, the result + would need to be rounded away from zero (i.e., by increasing the + signficand). This routine must work for fcZero of both signs, and + fcNormal numbers. */ +bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, + lostFraction lost_fraction, + unsigned int bit) const { + /* NaNs and infinities should not have lost fractions. */ + assert(isFiniteNonZero() || category == fcZero); + + /* Current callers never pass this so we don't handle it. */ + assert(lost_fraction != lfExactlyZero); + + switch (rounding_mode) { + case rmNearestTiesToAway: + return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; + + case rmNearestTiesToEven: + if (lost_fraction == lfMoreThanHalf) + return true; + + /* Our zeroes don't have a significand to test. */ + if (lost_fraction == lfExactlyHalf && category != fcZero) + return APInt::tcExtractBit(significandParts(), bit); + + return false; + + case rmTowardZero: + return false; + + case rmTowardPositive: + return !sign; + + case rmTowardNegative: + return sign; + } + llvm_unreachable("Invalid rounding mode found"); +} + +IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, + lostFraction lost_fraction) { + unsigned int omsb; /* One, not zero, based MSB. */ + int exponentChange; + + if (!isFiniteNonZero()) + return opOK; + + /* Before rounding normalize the exponent of fcNormal numbers. */ + omsb = significandMSB() + 1; + + if (omsb) { + /* OMSB is numbered from 1. We want to place it in the integer + bit numbered PRECISION if possible, with a compensating change in + the exponent. */ + exponentChange = omsb - semantics->precision; + + /* If the resulting exponent is too high, overflow according to + the rounding mode. */ + if (exponent + exponentChange > semantics->maxExponent) + return handleOverflow(rounding_mode); + + /* Subnormal numbers have exponent minExponent, and their MSB + is forced based on that. */ + if (exponent + exponentChange < semantics->minExponent) + exponentChange = semantics->minExponent - exponent; + + /* Shifting left is easy as we don't lose precision. */ + if (exponentChange < 0) { + assert(lost_fraction == lfExactlyZero); + + shiftSignificandLeft(-exponentChange); + + return opOK; + } + + if (exponentChange > 0) { + lostFraction lf; + + /* Shift right and capture any new lost fraction. */ + lf = shiftSignificandRight(exponentChange); + + lost_fraction = combineLostFractions(lf, lost_fraction); + + /* Keep OMSB up-to-date. */ + if (omsb > (unsigned) exponentChange) + omsb -= exponentChange; + else + omsb = 0; + } + } + + /* Now round the number according to rounding_mode given the lost + fraction. */ + + /* As specified in IEEE 754, since we do not trap we do not report + underflow for exact results. */ + if (lost_fraction == lfExactlyZero) { + /* Canonicalize zeroes. */ + if (omsb == 0) + category = fcZero; + + return opOK; + } + + /* Increment the significand if we're rounding away from zero. */ + if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { + if (omsb == 0) + exponent = semantics->minExponent; + + incrementSignificand(); + omsb = significandMSB() + 1; + + /* Did the significand increment overflow? */ + if (omsb == (unsigned) semantics->precision + 1) { + /* Renormalize by incrementing the exponent and shifting our + significand right one. However if we already have the + maximum exponent we overflow to infinity. */ + if (exponent == semantics->maxExponent) { + category = fcInfinity; + + return (opStatus) (opOverflow | opInexact); + } + + shiftSignificandRight(1); + + return opInexact; + } + } + + /* The normal case - we were and are not denormal, and any + significand increment above didn't overflow. */ + if (omsb == semantics->precision) + return opInexact; + + /* We have a non-zero denormal. */ + assert(omsb < semantics->precision); + + /* Canonicalize zeroes. */ + if (omsb == 0) + category = fcZero; + + /* The fcZero case is a denormal that underflowed to zero. */ + return (opStatus) (opUnderflow | opInexact); +} + +IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, + bool subtract) { + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(nullptr); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcZero): + return opOK; + + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + // We need to be sure to flip the sign here for subtraction because we + // don't have a separate negate operation so -NaN becomes 0 - NaN here. + sign = rhs.sign ^ subtract; + category = fcNaN; + copySignificand(rhs); + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcInfinity): + category = fcInfinity; + sign = rhs.sign ^ subtract; + return opOK; + + case PackCategoriesIntoKey(fcZero, fcNormal): + assign(rhs); + sign = rhs.sign ^ subtract; + return opOK; + + case PackCategoriesIntoKey(fcZero, fcZero): + /* Sign depends on rounding mode; handled by caller. */ + return opOK; + + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + /* Differently signed infinities can only be validly + subtracted. */ + if (((sign ^ rhs.sign)!=0) != subtract) { + makeNaN(); + return opInvalidOp; + } + + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + return opDivByZero; + } +} + +/* Add or subtract two normal numbers. */ +lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, + bool subtract) { + integerPart carry; + lostFraction lost_fraction; + int bits; + + /* Determine if the operation on the absolute values is effectively + an addition or subtraction. */ + subtract ^= static_cast<bool>(sign ^ rhs.sign); + + /* Are we bigger exponent-wise than the RHS? */ + bits = exponent - rhs.exponent; + + /* Subtraction is more subtle than one might naively expect. */ + if (subtract) { + IEEEFloat temp_rhs(rhs); + bool reverse; + + if (bits == 0) { + reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; + lost_fraction = lfExactlyZero; + } else if (bits > 0) { + lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); + shiftSignificandLeft(1); + reverse = false; + } else { + lost_fraction = shiftSignificandRight(-bits - 1); + temp_rhs.shiftSignificandLeft(1); + reverse = true; + } + + if (reverse) { + carry = temp_rhs.subtractSignificand + (*this, lost_fraction != lfExactlyZero); + copySignificand(temp_rhs); + sign = !sign; + } else { + carry = subtractSignificand + (temp_rhs, lost_fraction != lfExactlyZero); + } + + /* Invert the lost fraction - it was on the RHS and + subtracted. */ + if (lost_fraction == lfLessThanHalf) + lost_fraction = lfMoreThanHalf; + else if (lost_fraction == lfMoreThanHalf) + lost_fraction = lfLessThanHalf; + + /* The code above is intended to ensure that no borrow is + necessary. */ + assert(!carry); + (void)carry; + } else { + if (bits > 0) { + IEEEFloat temp_rhs(rhs); + + lost_fraction = temp_rhs.shiftSignificandRight(bits); + carry = addSignificand(temp_rhs); + } else { + lost_fraction = shiftSignificandRight(-bits); + carry = addSignificand(rhs); + } + + /* We have a guard bit; generating a carry cannot happen. */ + assert(!carry); + (void)carry; + } + + return lost_fraction; +} + +IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(nullptr); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + sign = false; + return opOK; + + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + sign = false; + category = fcNaN; + copySignificand(rhs); + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + category = fcInfinity; + return opOK; + + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): + category = fcZero; + return opOK; + + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcZero): + makeNaN(); + return opInvalidOp; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + return opOK; + } +} + +IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(nullptr); + + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + category = fcNaN; + copySignificand(rhs); + LLVM_FALLTHROUGH; + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + sign = false; + LLVM_FALLTHROUGH; + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcInfinity): + category = fcZero; + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcZero): + category = fcInfinity; + return opDivByZero; + + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): + makeNaN(); + return opInvalidOp; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + return opOK; + } +} + +IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(nullptr); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + return opOK; + + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + sign = false; + category = fcNaN; + copySignificand(rhs); + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): + makeNaN(); + return opInvalidOp; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + return opOK; + } +} + +/* Change sign. */ +void IEEEFloat::changeSign() { + /* Look mummy, this one's easy. */ + sign = !sign; +} + +/* Normalized addition or subtraction. */ +IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, + roundingMode rounding_mode, + bool subtract) { + opStatus fs; + + fs = addOrSubtractSpecials(rhs, subtract); + + /* This return code means it was not a simple case. */ + if (fs == opDivByZero) { + lostFraction lost_fraction; + + lost_fraction = addOrSubtractSignificand(rhs, subtract); + fs = normalize(rounding_mode, lost_fraction); + + /* Can only be zero if we lost no fraction. */ + assert(category != fcZero || lost_fraction == lfExactlyZero); + } + + /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a + positive zero unless rounding to minus infinity, except that + adding two like-signed zeroes gives that zero. */ + if (category == fcZero) { + if (rhs.category != fcZero || (sign == rhs.sign) == subtract) + sign = (rounding_mode == rmTowardNegative); + } + + return fs; +} + +/* Normalized addition. */ +IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, + roundingMode rounding_mode) { + return addOrSubtract(rhs, rounding_mode, false); +} + +/* Normalized subtraction. */ +IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, + roundingMode rounding_mode) { + return addOrSubtract(rhs, rounding_mode, true); +} + +/* Normalized multiply. */ +IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, + roundingMode rounding_mode) { + opStatus fs; + + sign ^= rhs.sign; + fs = multiplySpecials(rhs); + + if (isFiniteNonZero()) { + lostFraction lost_fraction = multiplySignificand(rhs, nullptr); + fs = normalize(rounding_mode, lost_fraction); + if (lost_fraction != lfExactlyZero) + fs = (opStatus) (fs | opInexact); + } + + return fs; +} + +/* Normalized divide. */ +IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, + roundingMode rounding_mode) { + opStatus fs; + + sign ^= rhs.sign; + fs = divideSpecials(rhs); + + if (isFiniteNonZero()) { + lostFraction lost_fraction = divideSignificand(rhs); + fs = normalize(rounding_mode, lost_fraction); + if (lost_fraction != lfExactlyZero) + fs = (opStatus) (fs | opInexact); + } + + return fs; +} + +/* Normalized remainder. This is not currently correct in all cases. */ +IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { + opStatus fs; + IEEEFloat V = *this; + unsigned int origSign = sign; + + fs = V.divide(rhs, rmNearestTiesToEven); + if (fs == opDivByZero) + return fs; + + int parts = partCount(); + integerPart *x = new integerPart[parts]; + bool ignored; + fs = V.convertToInteger(makeMutableArrayRef(x, parts), + parts * integerPartWidth, true, rmNearestTiesToEven, + &ignored); + if (fs == opInvalidOp) { + delete[] x; + return fs; + } + + fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven); + assert(fs==opOK); // should always work + + fs = V.multiply(rhs, rmNearestTiesToEven); + assert(fs==opOK || fs==opInexact); // should not overflow or underflow + + fs = subtract(V, rmNearestTiesToEven); + assert(fs==opOK || fs==opInexact); // likewise + + if (isZero()) + sign = origSign; // IEEE754 requires this + delete[] x; + return fs; +} + +/* Normalized llvm frem (C fmod). */ +IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { + opStatus fs; + fs = modSpecials(rhs); + unsigned int origSign = sign; + + while (isFiniteNonZero() && rhs.isFiniteNonZero() && + compareAbsoluteValue(rhs) != cmpLessThan) { + IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); + if (compareAbsoluteValue(V) == cmpLessThan) + V = scalbn(V, -1, rmNearestTiesToEven); + V.sign = sign; + + fs = subtract(V, rmNearestTiesToEven); + assert(fs==opOK); + } + if (isZero()) + sign = origSign; // fmod requires this + return fs; +} + +/* Normalized fused-multiply-add. */ +IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, + const IEEEFloat &addend, + roundingMode rounding_mode) { + opStatus fs; + + /* Post-multiplication sign, before addition. */ + sign ^= multiplicand.sign; + + /* If and only if all arguments are normal do we need to do an + extended-precision calculation. */ + if (isFiniteNonZero() && + multiplicand.isFiniteNonZero() && + addend.isFinite()) { + lostFraction lost_fraction; + + lost_fraction = multiplySignificand(multiplicand, &addend); + fs = normalize(rounding_mode, lost_fraction); + if (lost_fraction != lfExactlyZero) + fs = (opStatus) (fs | opInexact); + + /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a + positive zero unless rounding to minus infinity, except that + adding two like-signed zeroes gives that zero. */ + if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) + sign = (rounding_mode == rmTowardNegative); + } else { + fs = multiplySpecials(multiplicand); + + /* FS can only be opOK or opInvalidOp. There is no more work + to do in the latter case. The IEEE-754R standard says it is + implementation-defined in this case whether, if ADDEND is a + quiet NaN, we raise invalid op; this implementation does so. + + If we need to do the addition we can do so with normal + precision. */ + if (fs == opOK) + fs = addOrSubtract(addend, rounding_mode, false); + } + + return fs; +} + +/* Rounding-mode corrrect round to integral value. */ +IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { + opStatus fs; + + // If the exponent is large enough, we know that this value is already + // integral, and the arithmetic below would potentially cause it to saturate + // to +/-Inf. Bail out early instead. + if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) + return opOK; + + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + // NOTE: When the input value is negative, we do subtraction followed by + // addition instead. + APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); + IntegerConstant <<= semanticsPrecision(*semantics)-1; + IEEEFloat MagicConstant(*semantics); + fs = MagicConstant.convertFromAPInt(IntegerConstant, false, + rmNearestTiesToEven); + MagicConstant.sign = sign; + + if (fs != opOK) + return fs; + + // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. + bool inputSign = isNegative(); + + fs = add(MagicConstant, rounding_mode); + if (fs != opOK && fs != opInexact) + return fs; + + fs = subtract(MagicConstant, rounding_mode); + + // Restore the input sign. + if (inputSign != isNegative()) + changeSign(); + + return fs; +} + + +/* Comparison requires normalized numbers. */ +IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { + cmpResult result; + + assert(semantics == rhs.semantics); + + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(nullptr); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + return cmpUnordered; + + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcNormal, fcZero): + if (sign) + return cmpLessThan; + else + return cmpGreaterThan; + + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + if (rhs.sign) + return cmpGreaterThan; + else + return cmpLessThan; + + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + if (sign == rhs.sign) + return cmpEqual; + else if (sign) + return cmpLessThan; + else + return cmpGreaterThan; + + case PackCategoriesIntoKey(fcZero, fcZero): + return cmpEqual; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + break; + } + + /* Two normal numbers. Do they have the same sign? */ + if (sign != rhs.sign) { + if (sign) + result = cmpLessThan; + else + result = cmpGreaterThan; + } else { + /* Compare absolute values; invert result if negative. */ + result = compareAbsoluteValue(rhs); + + if (sign) { + if (result == cmpLessThan) + result = cmpGreaterThan; + else if (result == cmpGreaterThan) + result = cmpLessThan; + } + } + + return result; +} + +/// IEEEFloat::convert - convert a value of one floating point type to another. +/// The return value corresponds to the IEEE754 exceptions. *losesInfo +/// records whether the transformation lost information, i.e. whether +/// converting the result back to the original type will produce the +/// original value (this is almost the same as return value==fsOK, but there +/// are edge cases where this is not so). + +IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, + roundingMode rounding_mode, + bool *losesInfo) { + lostFraction lostFraction; + unsigned int newPartCount, oldPartCount; + opStatus fs; + int shift; + const fltSemantics &fromSemantics = *semantics; + + lostFraction = lfExactlyZero; + newPartCount = partCountForBits(toSemantics.precision + 1); + oldPartCount = partCount(); + shift = toSemantics.precision - fromSemantics.precision; + + bool X86SpecialNan = false; + if (&fromSemantics == &semX87DoubleExtended && + &toSemantics != &semX87DoubleExtended && category == fcNaN && + (!(*significandParts() & 0x8000000000000000ULL) || + !(*significandParts() & 0x4000000000000000ULL))) { + // x86 has some unusual NaNs which cannot be represented in any other + // format; note them here. + X86SpecialNan = true; + } + + // If this is a truncation of a denormal number, and the target semantics + // has larger exponent range than the source semantics (this can happen + // when truncating from PowerPC double-double to double format), the + // right shift could lose result mantissa bits. Adjust exponent instead + // of performing excessive shift. + if (shift < 0 && isFiniteNonZero()) { + int exponentChange = significandMSB() + 1 - fromSemantics.precision; + if (exponent + exponentChange < toSemantics.minExponent) + exponentChange = toSemantics.minExponent - exponent; + if (exponentChange < shift) + exponentChange = shift; + if (exponentChange < 0) { + shift -= exponentChange; + exponent += exponentChange; + } + } + + // If this is a truncation, perform the shift before we narrow the storage. + if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) + lostFraction = shiftRight(significandParts(), oldPartCount, -shift); + + // Fix the storage so it can hold to new value. + if (newPartCount > oldPartCount) { + // The new type requires more storage; make it available. + integerPart *newParts; + newParts = new integerPart[newPartCount]; + APInt::tcSet(newParts, 0, newPartCount); + if (isFiniteNonZero() || category==fcNaN) + APInt::tcAssign(newParts, significandParts(), oldPartCount); + freeSignificand(); + significand.parts = newParts; + } else if (newPartCount == 1 && oldPartCount != 1) { + // Switch to built-in storage for a single part. + integerPart newPart = 0; + if (isFiniteNonZero() || category==fcNaN) + newPart = significandParts()[0]; + freeSignificand(); + significand.part = newPart; + } + + // Now that we have the right storage, switch the semantics. + semantics = &toSemantics; + + // If this is an extension, perform the shift now that the storage is + // available. + if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) + APInt::tcShiftLeft(significandParts(), newPartCount, shift); + + if (isFiniteNonZero()) { + fs = normalize(rounding_mode, lostFraction); + *losesInfo = (fs != opOK); + } else if (category == fcNaN) { + *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; + + // For x87 extended precision, we want to make a NaN, not a special NaN if + // the input wasn't special either. + if (!X86SpecialNan && semantics == &semX87DoubleExtended) + APInt::tcSetBit(significandParts(), semantics->precision - 1); + + // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) + // does not give you back the same bits. This is dubious, and we + // don't currently do it. You're really supposed to get + // an invalid operation signal at runtime, but nobody does that. + fs = opOK; + } else { + *losesInfo = false; + fs = opOK; + } + + return fs; +} + +/* Convert a floating point number to an integer according to the + rounding mode. If the rounded integer value is out of range this + returns an invalid operation exception and the contents of the + destination parts are unspecified. If the rounded value is in + range but the floating point number is not the exact integer, the C + standard doesn't require an inexact exception to be raised. IEEE + 854 does require it so we do that. + + Note that for conversions to integer type the C standard requires + round-to-zero to always be used. */ +IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( + MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, + roundingMode rounding_mode, bool *isExact) const { + lostFraction lost_fraction; + const integerPart *src; + unsigned int dstPartsCount, truncatedBits; + + *isExact = false; + + /* Handle the three special cases first. */ + if (category == fcInfinity || category == fcNaN) + return opInvalidOp; + + dstPartsCount = partCountForBits(width); + assert(dstPartsCount <= parts.size() && "Integer too big"); + + if (category == fcZero) { + APInt::tcSet(parts.data(), 0, dstPartsCount); + // Negative zero can't be represented as an int. + *isExact = !sign; + return opOK; + } + + src = significandParts(); + + /* Step 1: place our absolute value, with any fraction truncated, in + the destination. */ + if (exponent < 0) { + /* Our absolute value is less than one; truncate everything. */ + APInt::tcSet(parts.data(), 0, dstPartsCount); + /* For exponent -1 the integer bit represents .5, look at that. + For smaller exponents leftmost truncated bit is 0. */ + truncatedBits = semantics->precision -1U - exponent; + } else { + /* We want the most significant (exponent + 1) bits; the rest are + truncated. */ + unsigned int bits = exponent + 1U; + + /* Hopelessly large in magnitude? */ + if (bits > width) + return opInvalidOp; + + if (bits < semantics->precision) { + /* We truncate (semantics->precision - bits) bits. */ + truncatedBits = semantics->precision - bits; + APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); + } else { + /* We want at least as many bits as are available. */ + APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, + 0); + APInt::tcShiftLeft(parts.data(), dstPartsCount, + bits - semantics->precision); + truncatedBits = 0; + } + } + + /* Step 2: work out any lost fraction, and increment the absolute + value if we would round away from zero. */ + if (truncatedBits) { + lost_fraction = lostFractionThroughTruncation(src, partCount(), + truncatedBits); + if (lost_fraction != lfExactlyZero && + roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { + if (APInt::tcIncrement(parts.data(), dstPartsCount)) + return opInvalidOp; /* Overflow. */ + } + } else { + lost_fraction = lfExactlyZero; + } + + /* Step 3: check if we fit in the destination. */ + unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; + + if (sign) { + if (!isSigned) { + /* Negative numbers cannot be represented as unsigned. */ + if (omsb != 0) + return opInvalidOp; + } else { + /* It takes omsb bits to represent the unsigned integer value. + We lose a bit for the sign, but care is needed as the + maximally negative integer is a special case. */ + if (omsb == width && + APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) + return opInvalidOp; + + /* This case can happen because of rounding. */ + if (omsb > width) + return opInvalidOp; + } + + APInt::tcNegate (parts.data(), dstPartsCount); + } else { + if (omsb >= width + !isSigned) + return opInvalidOp; + } + + if (lost_fraction == lfExactlyZero) { + *isExact = true; + return opOK; + } else + return opInexact; +} + +/* Same as convertToSignExtendedInteger, except we provide + deterministic values in case of an invalid operation exception, + namely zero for NaNs and the minimal or maximal value respectively + for underflow or overflow. + The *isExact output tells whether the result is exact, in the sense + that converting it back to the original floating point type produces + the original value. This is almost equivalent to result==opOK, + except for negative zeroes. +*/ +IEEEFloat::opStatus +IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, + unsigned int width, bool isSigned, + roundingMode rounding_mode, bool *isExact) const { + opStatus fs; + + fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, + isExact); + + if (fs == opInvalidOp) { + unsigned int bits, dstPartsCount; + + dstPartsCount = partCountForBits(width); + assert(dstPartsCount <= parts.size() && "Integer too big"); + + if (category == fcNaN) + bits = 0; + else if (sign) + bits = isSigned; + else + bits = width - isSigned; + + APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); + if (sign && isSigned) + APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); + } + + return fs; +} + +/* Convert an unsigned integer SRC to a floating point number, + rounding according to ROUNDING_MODE. The sign of the floating + point number is not modified. */ +IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( + const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { + unsigned int omsb, precision, dstCount; + integerPart *dst; + lostFraction lost_fraction; + + category = fcNormal; + omsb = APInt::tcMSB(src, srcCount) + 1; + dst = significandParts(); + dstCount = partCount(); + precision = semantics->precision; + + /* We want the most significant PRECISION bits of SRC. There may not + be that many; extract what we can. */ + if (precision <= omsb) { + exponent = omsb - 1; + lost_fraction = lostFractionThroughTruncation(src, srcCount, + omsb - precision); + APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); + } else { + exponent = precision - 1; + lost_fraction = lfExactlyZero; + APInt::tcExtract(dst, dstCount, src, omsb, 0); + } + + return normalize(rounding_mode, lost_fraction); +} + +IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, + roundingMode rounding_mode) { + unsigned int partCount = Val.getNumWords(); + APInt api = Val; + + sign = false; + if (isSigned && api.isNegative()) { + sign = true; + api = -api; + } + + return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); +} + +/* Convert a two's complement integer SRC to a floating point number, + rounding according to ROUNDING_MODE. ISSIGNED is true if the + integer is signed, in which case it must be sign-extended. */ +IEEEFloat::opStatus +IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, + unsigned int srcCount, bool isSigned, + roundingMode rounding_mode) { + opStatus status; + + if (isSigned && + APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { + integerPart *copy; + + /* If we're signed and negative negate a copy. */ + sign = true; + copy = new integerPart[srcCount]; + APInt::tcAssign(copy, src, srcCount); + APInt::tcNegate(copy, srcCount); + status = convertFromUnsignedParts(copy, srcCount, rounding_mode); + delete [] copy; + } else { + sign = false; + status = convertFromUnsignedParts(src, srcCount, rounding_mode); + } + + return status; +} + +/* FIXME: should this just take a const APInt reference? */ +IEEEFloat::opStatus +IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, + unsigned int width, bool isSigned, + roundingMode rounding_mode) { + unsigned int partCount = partCountForBits(width); + APInt api = APInt(width, makeArrayRef(parts, partCount)); + + sign = false; + if (isSigned && APInt::tcExtractBit(parts, width - 1)) { + sign = true; + api = -api; + } + + return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); +} + +IEEEFloat::opStatus +IEEEFloat::convertFromHexadecimalString(StringRef s, + roundingMode rounding_mode) { + lostFraction lost_fraction = lfExactlyZero; + + category = fcNormal; + zeroSignificand(); + exponent = 0; + + integerPart *significand = significandParts(); + unsigned partsCount = partCount(); + unsigned bitPos = partsCount * integerPartWidth; + bool computedTrailingFraction = false; + + // Skip leading zeroes and any (hexa)decimal point. + StringRef::iterator begin = s.begin(); + StringRef::iterator end = s.end(); + StringRef::iterator dot; + StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); + StringRef::iterator firstSignificantDigit = p; + + while (p != end) { + integerPart hex_value; + + if (*p == '.') { + assert(dot == end && "String contains multiple dots"); + dot = p++; + continue; + } + + hex_value = hexDigitValue(*p); + if (hex_value == -1U) + break; + + p++; + + // Store the number while we have space. + if (bitPos) { + bitPos -= 4; + hex_value <<= bitPos % integerPartWidth; + significand[bitPos / integerPartWidth] |= hex_value; + } else if (!computedTrailingFraction) { + lost_fraction = trailingHexadecimalFraction(p, end, hex_value); + computedTrailingFraction = true; + } + } + + /* Hex floats require an exponent but not a hexadecimal point. */ + assert(p != end && "Hex strings require an exponent"); + assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); + + /* Ignore the exponent if we are zero. */ + if (p != firstSignificantDigit) { + int expAdjustment; + + /* Implicit hexadecimal point? */ + if (dot == end) + dot = p; + + /* Calculate the exponent adjustment implicit in the number of + significant digits. */ + expAdjustment = static_cast<int>(dot - firstSignificantDigit); + if (expAdjustment < 0) + expAdjustment++; + expAdjustment = expAdjustment * 4 - 1; + + /* Adjust for writing the significand starting at the most + significant nibble. */ + expAdjustment += semantics->precision; + expAdjustment -= partsCount * integerPartWidth; + + /* Adjust for the given exponent. */ + exponent = totalExponent(p + 1, end, expAdjustment); + } + + return normalize(rounding_mode, lost_fraction); +} + +IEEEFloat::opStatus +IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, + unsigned sigPartCount, int exp, + roundingMode rounding_mode) { + unsigned int parts, pow5PartCount; + fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; + integerPart pow5Parts[maxPowerOfFiveParts]; + bool isNearest; + + isNearest = (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway); + + parts = partCountForBits(semantics->precision + 11); + + /* Calculate pow(5, abs(exp)). */ + pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); + + for (;; parts *= 2) { + opStatus sigStatus, powStatus; + unsigned int excessPrecision, truncatedBits; + + calcSemantics.precision = parts * integerPartWidth - 1; + excessPrecision = calcSemantics.precision - semantics->precision; + truncatedBits = excessPrecision; + + IEEEFloat decSig(calcSemantics, uninitialized); + decSig.makeZero(sign); + IEEEFloat pow5(calcSemantics); + + sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, + rmNearestTiesToEven); + powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, + rmNearestTiesToEven); + /* Add exp, as 10^n = 5^n * 2^n. */ + decSig.exponent += exp; + + lostFraction calcLostFraction; + integerPart HUerr, HUdistance; + unsigned int powHUerr; + + if (exp >= 0) { + /* multiplySignificand leaves the precision-th bit set to 1. */ + calcLostFraction = decSig.multiplySignificand(pow5, nullptr); + powHUerr = powStatus != opOK; + } else { + calcLostFraction = decSig.divideSignificand(pow5); + /* Denormal numbers have less precision. */ + if (decSig.exponent < semantics->minExponent) { + excessPrecision += (semantics->minExponent - decSig.exponent); + truncatedBits = excessPrecision; + if (excessPrecision > calcSemantics.precision) + excessPrecision = calcSemantics.precision; + } + /* Extra half-ulp lost in reciprocal of exponent. */ + powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; + } + + /* Both multiplySignificand and divideSignificand return the + result with the integer bit set. */ + assert(APInt::tcExtractBit + (decSig.significandParts(), calcSemantics.precision - 1) == 1); + + HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, + powHUerr); + HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), + excessPrecision, isNearest); + + /* Are we guaranteed to round correctly if we truncate? */ + if (HUdistance >= HUerr) { + APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), + calcSemantics.precision - excessPrecision, + excessPrecision); + /* Take the exponent of decSig. If we tcExtract-ed less bits + above we must adjust our exponent to compensate for the + implicit right shift. */ + exponent = (decSig.exponent + semantics->precision + - (calcSemantics.precision - excessPrecision)); + calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), + decSig.partCount(), + truncatedBits); + return normalize(rounding_mode, calcLostFraction); + } + } +} + +IEEEFloat::opStatus +IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { + decimalInfo D; + opStatus fs; + + /* Scan the text. */ + StringRef::iterator p = str.begin(); + interpretDecimal(p, str.end(), &D); + + /* Handle the quick cases. First the case of no significant digits, + i.e. zero, and then exponents that are obviously too large or too + small. Writing L for log 10 / log 2, a number d.ddddd*10^exp + definitely overflows if + + (exp - 1) * L >= maxExponent + + and definitely underflows to zero where + + (exp + 1) * L <= minExponent - precision + + With integer arithmetic the tightest bounds for L are + + 93/28 < L < 196/59 [ numerator <= 256 ] + 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] + */ + + // Test if we have a zero number allowing for strings with no null terminators + // and zero decimals with non-zero exponents. + // + // We computed firstSigDigit by ignoring all zeros and dots. Thus if + // D->firstSigDigit equals str.end(), every digit must be a zero and there can + // be at most one dot. On the other hand, if we have a zero with a non-zero + // exponent, then we know that D.firstSigDigit will be non-numeric. + if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { + category = fcZero; + fs = opOK; + + /* Check whether the normalized exponent is high enough to overflow + max during the log-rebasing in the max-exponent check below. */ + } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { + fs = handleOverflow(rounding_mode); + + /* If it wasn't, then it also wasn't high enough to overflow max + during the log-rebasing in the min-exponent check. Check that it + won't overflow min in either check, then perform the min-exponent + check. */ + } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || + (D.normalizedExponent + 1) * 28738 <= + 8651 * (semantics->minExponent - (int) semantics->precision)) { + /* Underflow to zero and round. */ + category = fcNormal; + zeroSignificand(); + fs = normalize(rounding_mode, lfLessThanHalf); + + /* We can finally safely perform the max-exponent check. */ + } else if ((D.normalizedExponent - 1) * 42039 + >= 12655 * semantics->maxExponent) { + /* Overflow and round. */ + fs = handleOverflow(rounding_mode); + } else { + integerPart *decSignificand; + unsigned int partCount; + + /* A tight upper bound on number of bits required to hold an + N-digit decimal integer is N * 196 / 59. Allocate enough space + to hold the full significand, and an extra part required by + tcMultiplyPart. */ + partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; + partCount = partCountForBits(1 + 196 * partCount / 59); + decSignificand = new integerPart[partCount + 1]; + partCount = 0; + + /* Convert to binary efficiently - we do almost all multiplication + in an integerPart. When this would overflow do we do a single + bignum multiplication, and then revert again to multiplication + in an integerPart. */ + do { + integerPart decValue, val, multiplier; + + val = 0; + multiplier = 1; + + do { + if (*p == '.') { + p++; + if (p == str.end()) { + break; + } + } + decValue = decDigitValue(*p++); + assert(decValue < 10U && "Invalid character in significand"); + multiplier *= 10; + val = val * 10 + decValue; + /* The maximum number that can be multiplied by ten with any + digit added without overflowing an integerPart. */ + } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); + + /* Multiply out the current part. */ + APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, + partCount, partCount + 1, false); + + /* If we used another part (likely but not guaranteed), increase + the count. */ + if (decSignificand[partCount]) + partCount++; + } while (p <= D.lastSigDigit); + + category = fcNormal; + fs = roundSignificandWithExponent(decSignificand, partCount, + D.exponent, rounding_mode); + + delete [] decSignificand; + } + + return fs; +} + +bool IEEEFloat::convertFromStringSpecials(StringRef str) { + if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { + makeInf(false); + return true; + } + + if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) { + makeInf(true); + return true; + } + + if (str.equals("nan") || str.equals("NaN")) { + makeNaN(false, false); + return true; + } + + if (str.equals("-nan") || str.equals("-NaN")) { + makeNaN(false, true); + return true; + } + + return false; +} + +IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str, + roundingMode rounding_mode) { + assert(!str.empty() && "Invalid string length"); + + // Handle special cases. + if (convertFromStringSpecials(str)) + return opOK; + + /* Handle a leading minus sign. */ + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + sign = *p == '-' ? 1 : 0; + if (*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String has no digits"); + } + + if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + assert(slen - 2 && "Invalid string"); + return convertFromHexadecimalString(StringRef(p + 2, slen - 2), + rounding_mode); + } + + return convertFromDecimalString(StringRef(p, slen), rounding_mode); +} + +/* Write out a hexadecimal representation of the floating point value + to DST, which must be of sufficient size, in the C99 form + [-]0xh.hhhhp[+-]d. Return the number of characters written, + excluding the terminating NUL. + + If UPPERCASE, the output is in upper case, otherwise in lower case. + + HEXDIGITS digits appear altogether, rounding the value if + necessary. If HEXDIGITS is 0, the minimal precision to display the + number precisely is used instead. If nothing would appear after + the decimal point it is suppressed. + + The decimal exponent is always printed and has at least one digit. + Zero values display an exponent of zero. Infinities and NaNs + appear as "infinity" or "nan" respectively. + + The above rules are as specified by C99. There is ambiguity about + what the leading hexadecimal digit should be. This implementation + uses whatever is necessary so that the exponent is displayed as + stored. This implies the exponent will fall within the IEEE format + range, and the leading hexadecimal digit will be 0 (for denormals), + 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with + any other digits zero). +*/ +unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, + bool upperCase, + roundingMode rounding_mode) const { + char *p; + + p = dst; + if (sign) + *dst++ = '-'; + + switch (category) { + case fcInfinity: + memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); + dst += sizeof infinityL - 1; + break; + + case fcNaN: + memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); + dst += sizeof NaNU - 1; + break; + + case fcZero: + *dst++ = '0'; + *dst++ = upperCase ? 'X': 'x'; + *dst++ = '0'; + if (hexDigits > 1) { + *dst++ = '.'; + memset (dst, '0', hexDigits - 1); + dst += hexDigits - 1; + } + *dst++ = upperCase ? 'P': 'p'; + *dst++ = '0'; + break; + + case fcNormal: + dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); + break; + } + + *dst = 0; + + return static_cast<unsigned int>(dst - p); +} + +/* Does the hard work of outputting the correctly rounded hexadecimal + form of a normal floating point number with the specified number of + hexadecimal digits. If HEXDIGITS is zero the minimum number of + digits necessary to print the value precisely is output. */ +char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, + bool upperCase, + roundingMode rounding_mode) const { + unsigned int count, valueBits, shift, partsCount, outputDigits; + const char *hexDigitChars; + const integerPart *significand; + char *p; + bool roundUp; + + *dst++ = '0'; + *dst++ = upperCase ? 'X': 'x'; + + roundUp = false; + hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; + + significand = significandParts(); + partsCount = partCount(); + + /* +3 because the first digit only uses the single integer bit, so + we have 3 virtual zero most-significant-bits. */ + valueBits = semantics->precision + 3; + shift = integerPartWidth - valueBits % integerPartWidth; + + /* The natural number of digits required ignoring trailing + insignificant zeroes. */ + outputDigits = (valueBits - significandLSB () + 3) / 4; + + /* hexDigits of zero means use the required number for the + precision. Otherwise, see if we are truncating. If we are, + find out if we need to round away from zero. */ + if (hexDigits) { + if (hexDigits < outputDigits) { + /* We are dropping non-zero bits, so need to check how to round. + "bits" is the number of dropped bits. */ + unsigned int bits; + lostFraction fraction; + + bits = valueBits - hexDigits * 4; + fraction = lostFractionThroughTruncation (significand, partsCount, bits); + roundUp = roundAwayFromZero(rounding_mode, fraction, bits); + } + outputDigits = hexDigits; + } + + /* Write the digits consecutively, and start writing in the location + of the hexadecimal point. We move the most significant digit + left and add the hexadecimal point later. */ + p = ++dst; + + count = (valueBits + integerPartWidth - 1) / integerPartWidth; + + while (outputDigits && count) { + integerPart part; + + /* Put the most significant integerPartWidth bits in "part". */ + if (--count == partsCount) + part = 0; /* An imaginary higher zero part. */ + else + part = significand[count] << shift; + + if (count && shift) + part |= significand[count - 1] >> (integerPartWidth - shift); + + /* Convert as much of "part" to hexdigits as we can. */ + unsigned int curDigits = integerPartWidth / 4; + + if (curDigits > outputDigits) + curDigits = outputDigits; + dst += partAsHex (dst, part, curDigits, hexDigitChars); + outputDigits -= curDigits; + } + + if (roundUp) { + char *q = dst; + + /* Note that hexDigitChars has a trailing '0'. */ + do { + q--; + *q = hexDigitChars[hexDigitValue (*q) + 1]; + } while (*q == '0'); + assert(q >= p); + } else { + /* Add trailing zeroes. */ + memset (dst, '0', outputDigits); + dst += outputDigits; + } + + /* Move the most significant digit to before the point, and if there + is something after the decimal point add it. This must come + after rounding above. */ + p[-1] = p[0]; + if (dst -1 == p) + dst--; + else + p[0] = '.'; + + /* Finally output the exponent. */ + *dst++ = upperCase ? 'P': 'p'; + + return writeSignedDecimal (dst, exponent); +} + +hash_code hash_value(const IEEEFloat &Arg) { + if (!Arg.isFiniteNonZero()) + return hash_combine((uint8_t)Arg.category, + // NaN has no sign, fix it at zero. + Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, + Arg.semantics->precision); + + // Normal floats need their exponent and significand hashed. + return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, + Arg.semantics->precision, Arg.exponent, + hash_combine_range( + Arg.significandParts(), + Arg.significandParts() + Arg.partCount())); +} + +// Conversion from APFloat to/from host float/double. It may eventually be +// possible to eliminate these and have everybody deal with APFloats, but that +// will take a while. This approach will not easily extend to long double. +// Current implementation requires integerPartWidth==64, which is correct at +// the moment but could be made more general. + +// Denormals have exponent minExponent in APFloat, but minExponent-1 in +// the actual IEEE respresentations. We compensate for that here. + +APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); + assert(partCount()==2); + + uint64_t myexponent, mysignificand; + + if (isFiniteNonZero()) { + myexponent = exponent+16383; //bias + mysignificand = significandParts()[0]; + if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x7fff; + mysignificand = 0x8000000000000000ULL; + } else { + assert(category == fcNaN && "Unknown category"); + myexponent = 0x7fff; + mysignificand = significandParts()[0]; + } + + uint64_t words[2]; + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 15) | + (myexponent & 0x7fffLL); + return APInt(80, words); +} + +APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); + assert(partCount()==2); + + uint64_t words[2]; + opStatus fs; + bool losesInfo; + + // Convert number to double. To avoid spurious underflows, we re- + // normalize against the "double" minExponent first, and only *then* + // truncate the mantissa. The result of that second conversion + // may be inexact, but should never underflow. + // Declare fltSemantics before APFloat that uses it (and + // saves pointer to it) to ensure correct destruction order. + fltSemantics extendedSemantics = *semantics; + extendedSemantics.minExponent = semIEEEdouble.minExponent; + IEEEFloat extended(*this); + fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + IEEEFloat u(extended); + fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK || fs == opInexact); + (void)fs; + words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); + + // If conversion was exact or resulted in a special case, we're done; + // just set the second double to zero. Otherwise, re-convert back to + // the extended format and compute the difference. This now should + // convert exactly to double. + if (u.isFiniteNonZero() && losesInfo) { + fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + IEEEFloat v(extended); + v.subtract(u, rmNearestTiesToEven); + fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); + } else { + words[1] = 0; + } + + return APInt(128, words); +} + +APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); + assert(partCount()==2); + + uint64_t myexponent, mysignificand, mysignificand2; + + if (isFiniteNonZero()) { + myexponent = exponent+16383; //bias + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = mysignificand2 = 0; + } else if (category==fcInfinity) { + myexponent = 0x7fff; + mysignificand = mysignificand2 = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x7fff; + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + } + + uint64_t words[2]; + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7fff) << 48) | + (mysignificand2 & 0xffffffffffffLL); + + return APInt(128, words); +} + +APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); + assert(partCount()==1); + + uint64_t myexponent, mysignificand; + + if (isFiniteNonZero()) { + myexponent = exponent+1023; //bias + mysignificand = *significandParts(); + if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x7ff; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x7ff; + mysignificand = *significandParts(); + } + + return APInt(64, ((((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7ff) << 52) | + (mysignificand & 0xfffffffffffffLL)))); +} + +APInt IEEEFloat::convertFloatAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); + assert(partCount()==1); + + uint32_t myexponent, mysignificand; + + if (isFiniteNonZero()) { + myexponent = exponent+127; //bias + mysignificand = (uint32_t)*significandParts(); + if (myexponent == 1 && !(mysignificand & 0x800000)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0xff; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0xff; + mysignificand = (uint32_t)*significandParts(); + } + + return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | + (mysignificand & 0x7fffff))); +} + +APInt IEEEFloat::convertHalfAPFloatToAPInt() const { + assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); + assert(partCount()==1); + + uint32_t myexponent, mysignificand; + + if (isFiniteNonZero()) { + myexponent = exponent+15; //bias + mysignificand = (uint32_t)*significandParts(); + if (myexponent == 1 && !(mysignificand & 0x400)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x1f; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x1f; + mysignificand = (uint32_t)*significandParts(); + } + + return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | + (mysignificand & 0x3ff))); +} + +// This function creates an APInt that is just a bit map of the floating +// point constant as it would appear in memory. It is not a conversion, +// and treating the result as a normal integer is unlikely to be useful. + +APInt IEEEFloat::bitcastToAPInt() const { + if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) + return convertHalfAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) + return convertFloatAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) + return convertDoubleAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&semIEEEquad) + return convertQuadrupleAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) + return convertPPCDoubleDoubleAPFloatToAPInt(); + + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && + "unknown format!"); + return convertF80LongDoubleAPFloatToAPInt(); +} + +float IEEEFloat::convertToFloat() const { + assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && + "Float semantics are not IEEEsingle"); + APInt api = bitcastToAPInt(); + return api.bitsToFloat(); +} + +double IEEEFloat::convertToDouble() const { + assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && + "Float semantics are not IEEEdouble"); + APInt api = bitcastToAPInt(); + return api.bitsToDouble(); +} + +/// Integer bit is explicit in this format. Intel hardware (387 and later) +/// does not support these bit patterns: +/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") +/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") +/// exponent!=0 nor all 1's, integer bit 0 ("unnormal") +/// exponent = 0, integer bit 1 ("pseudodenormal") +/// At the moment, the first three are treated as NaNs, the last one as Normal. +void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { + assert(api.getBitWidth()==80); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i2 & 0x7fff); + uint64_t mysignificand = i1; + uint8_t myintegerbit = mysignificand >> 63; + + initialize(&semX87DoubleExtended); + assert(partCount()==2); + + sign = static_cast<unsigned int>(i2>>15); + if (myexponent == 0 && mysignificand == 0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { + // exponent, significand meaningless + category = fcInfinity; + } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || + (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { + // exponent meaningless + category = fcNaN; + significandParts()[0] = mysignificand; + significandParts()[1] = 0; + } else { + category = fcNormal; + exponent = myexponent - 16383; + significandParts()[0] = mysignificand; + significandParts()[1] = 0; + if (myexponent==0) // denormal + exponent = -16382; + } +} + +void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + opStatus fs; + bool losesInfo; + + // Get the first double and convert to our format. + initFromDoubleAPInt(APInt(64, i1)); + fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + // Unless we have a special case, add in second double. + if (isFiniteNonZero()) { + IEEEFloat v(semIEEEdouble, APInt(64, i2)); + fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + add(v, rmNearestTiesToEven); + } +} + +void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i2 >> 48) & 0x7fff; + uint64_t mysignificand = i1; + uint64_t mysignificand2 = i2 & 0xffffffffffffLL; + + initialize(&semIEEEquad); + assert(partCount()==2); + + sign = static_cast<unsigned int>(i2>>63); + if (myexponent==0 && + (mysignificand==0 && mysignificand2==0)) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7fff && + (mysignificand==0 && mysignificand2==0)) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7fff && + (mysignificand!=0 || mysignificand2 !=0)) { + // exponent meaningless + category = fcNaN; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + } else { + category = fcNormal; + exponent = myexponent - 16383; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + if (myexponent==0) // denormal + exponent = -16382; + else + significandParts()[1] |= 0x1000000000000LL; // integer bit + } +} + +void IEEEFloat::initFromDoubleAPInt(const APInt &api) { + assert(api.getBitWidth()==64); + uint64_t i = *api.getRawData(); + uint64_t myexponent = (i >> 52) & 0x7ff; + uint64_t mysignificand = i & 0xfffffffffffffLL; + + initialize(&semIEEEdouble); + assert(partCount()==1); + + sign = static_cast<unsigned int>(i>>63); + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7ff && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7ff && mysignificand!=0) { + // exponent meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 1023; + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -1022; + else + *significandParts() |= 0x10000000000000LL; // integer bit + } +} + +void IEEEFloat::initFromFloatAPInt(const APInt &api) { + assert(api.getBitWidth()==32); + uint32_t i = (uint32_t)*api.getRawData(); + uint32_t myexponent = (i >> 23) & 0xff; + uint32_t mysignificand = i & 0x7fffff; + + initialize(&semIEEEsingle); + assert(partCount()==1); + + sign = i >> 31; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0xff && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0xff && mysignificand!=0) { + // sign, exponent, significand meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 127; //bias + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -126; + else + *significandParts() |= 0x800000; // integer bit + } +} + +void IEEEFloat::initFromHalfAPInt(const APInt &api) { + assert(api.getBitWidth()==16); + uint32_t i = (uint32_t)*api.getRawData(); + uint32_t myexponent = (i >> 10) & 0x1f; + uint32_t mysignificand = i & 0x3ff; + + initialize(&semIEEEhalf); + assert(partCount()==1); + + sign = i >> 15; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x1f && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x1f && mysignificand!=0) { + // sign, exponent, significand meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 15; //bias + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -14; + else + *significandParts() |= 0x400; // integer bit + } +} + +/// Treat api as containing the bits of a floating point number. Currently +/// we infer the floating point type from the size of the APInt. The +/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful +/// when the size is anything else). +void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { + if (Sem == &semIEEEhalf) + return initFromHalfAPInt(api); + if (Sem == &semIEEEsingle) + return initFromFloatAPInt(api); + if (Sem == &semIEEEdouble) + return initFromDoubleAPInt(api); + if (Sem == &semX87DoubleExtended) + return initFromF80LongDoubleAPInt(api); + if (Sem == &semIEEEquad) + return initFromQuadrupleAPInt(api); + if (Sem == &semPPCDoubleDoubleLegacy) + return initFromPPCDoubleDoubleAPInt(api); + + llvm_unreachable(nullptr); +} + +/// Make this number the largest magnitude normal number in the given +/// semantics. +void IEEEFloat::makeLargest(bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 1..10 + // significand = 1..1 + category = fcNormal; + sign = Negative; + exponent = semantics->maxExponent; + + // Use memset to set all but the highest integerPart to all ones. + integerPart *significand = significandParts(); + unsigned PartCount = partCount(); + memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); + + // Set the high integerPart especially setting all unused top bits for + // internal consistency. + const unsigned NumUnusedHighBits = + PartCount*integerPartWidth - semantics->precision; + significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) + ? (~integerPart(0) >> NumUnusedHighBits) + : 0; +} + +/// Make this number the smallest magnitude denormal number in the given +/// semantics. +void IEEEFloat::makeSmallest(bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 0..0 + // significand = 0..01 + category = fcNormal; + sign = Negative; + exponent = semantics->minExponent; + APInt::tcSet(significandParts(), 1, partCount()); +} + +void IEEEFloat::makeSmallestNormalized(bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 0..0 + // significand = 10..0 + + category = fcNormal; + zeroSignificand(); + sign = Negative; + exponent = semantics->minExponent; + significandParts()[partCountForBits(semantics->precision) - 1] |= + (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); +} + +IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { + initFromAPInt(&Sem, API); +} + +IEEEFloat::IEEEFloat(float f) { + initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); +} + +IEEEFloat::IEEEFloat(double d) { + initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); +} + +namespace { + void append(SmallVectorImpl<char> &Buffer, StringRef Str) { + Buffer.append(Str.begin(), Str.end()); + } + + /// Removes data from the given significand until it is no more + /// precise than is required for the desired precision. + void AdjustToPrecision(APInt &significand, + int &exp, unsigned FormatPrecision) { + unsigned bits = significand.getActiveBits(); + + // 196/59 is a very slight overestimate of lg_2(10). + unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; + + if (bits <= bitsRequired) return; + + unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; + if (!tensRemovable) return; + + exp += tensRemovable; + + APInt divisor(significand.getBitWidth(), 1); + APInt powten(significand.getBitWidth(), 10); + while (true) { + if (tensRemovable & 1) + divisor *= powten; + tensRemovable >>= 1; + if (!tensRemovable) break; + powten *= powten; + } + + significand = significand.udiv(divisor); + + // Truncate the significand down to its active bit count. + significand = significand.trunc(significand.getActiveBits()); + } + + + void AdjustToPrecision(SmallVectorImpl<char> &buffer, + int &exp, unsigned FormatPrecision) { + unsigned N = buffer.size(); + if (N <= FormatPrecision) return; + + // The most significant figures are the last ones in the buffer. + unsigned FirstSignificant = N - FormatPrecision; + + // Round. + // FIXME: this probably shouldn't use 'round half up'. + + // Rounding down is just a truncation, except we also want to drop + // trailing zeros from the new result. + if (buffer[FirstSignificant - 1] < '5') { + while (FirstSignificant < N && buffer[FirstSignificant] == '0') + FirstSignificant++; + + exp += FirstSignificant; + buffer.erase(&buffer[0], &buffer[FirstSignificant]); + return; + } + + // Rounding up requires a decimal add-with-carry. If we continue + // the carry, the newly-introduced zeros will just be truncated. + for (unsigned I = FirstSignificant; I != N; ++I) { + if (buffer[I] == '9') { + FirstSignificant++; + } else { + buffer[I]++; + break; + } + } + + // If we carried through, we have exactly one digit of precision. + if (FirstSignificant == N) { + exp += FirstSignificant; + buffer.clear(); + buffer.push_back('1'); + return; + } + + exp += FirstSignificant; + buffer.erase(&buffer[0], &buffer[FirstSignificant]); + } +} + +void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, + unsigned FormatMaxPadding, bool TruncateZero) const { + switch (category) { + case fcInfinity: + if (isNegative()) + return append(Str, "-Inf"); + else + return append(Str, "+Inf"); + + case fcNaN: return append(Str, "NaN"); + + case fcZero: + if (isNegative()) + Str.push_back('-'); + + if (!FormatMaxPadding) { + if (TruncateZero) + append(Str, "0.0E+0"); + else { + append(Str, "0.0"); + if (FormatPrecision > 1) + Str.append(FormatPrecision - 1, '0'); + append(Str, "e+00"); + } + } else + Str.push_back('0'); + return; + + case fcNormal: + break; + } + + if (isNegative()) + Str.push_back('-'); + + // Decompose the number into an APInt and an exponent. + int exp = exponent - ((int) semantics->precision - 1); + APInt significand(semantics->precision, + makeArrayRef(significandParts(), + partCountForBits(semantics->precision))); + + // Set FormatPrecision if zero. We want to do this before we + // truncate trailing zeros, as those are part of the precision. + if (!FormatPrecision) { + // We use enough digits so the number can be round-tripped back to an + // APFloat. The formula comes from "How to Print Floating-Point Numbers + // Accurately" by Steele and White. + // FIXME: Using a formula based purely on the precision is conservative; + // we can print fewer digits depending on the actual value being printed. + + // FormatPrecision = 2 + floor(significandBits / lg_2(10)) + FormatPrecision = 2 + semantics->precision * 59 / 196; + } + + // Ignore trailing binary zeros. + int trailingZeros = significand.countTrailingZeros(); + exp += trailingZeros; + significand.lshrInPlace(trailingZeros); + + // Change the exponent from 2^e to 10^e. + if (exp == 0) { + // Nothing to do. + } else if (exp > 0) { + // Just shift left. + significand = significand.zext(semantics->precision + exp); + significand <<= exp; + exp = 0; + } else { /* exp < 0 */ + int texp = -exp; + + // We transform this using the identity: + // (N)(2^-e) == (N)(5^e)(10^-e) + // This means we have to multiply N (the significand) by 5^e. + // To avoid overflow, we have to operate on numbers large + // enough to store N * 5^e: + // log2(N * 5^e) == log2(N) + e * log2(5) + // <= semantics->precision + e * 137 / 59 + // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) + + unsigned precision = semantics->precision + (137 * texp + 136) / 59; + + // Multiply significand by 5^e. + // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) + significand = significand.zext(precision); + APInt five_to_the_i(precision, 5); + while (true) { + if (texp & 1) significand *= five_to_the_i; + + texp >>= 1; + if (!texp) break; + five_to_the_i *= five_to_the_i; + } + } + + AdjustToPrecision(significand, exp, FormatPrecision); + + SmallVector<char, 256> buffer; + + // Fill the buffer. + unsigned precision = significand.getBitWidth(); + APInt ten(precision, 10); + APInt digit(precision, 0); + + bool inTrail = true; + while (significand != 0) { + // digit <- significand % 10 + // significand <- significand / 10 + APInt::udivrem(significand, ten, significand, digit); + + unsigned d = digit.getZExtValue(); + + // Drop trailing zeros. + if (inTrail && !d) exp++; + else { + buffer.push_back((char) ('0' + d)); + inTrail = false; + } + } + + assert(!buffer.empty() && "no characters in buffer!"); + + // Drop down to FormatPrecision. + // TODO: don't do more precise calculations above than are required. + AdjustToPrecision(buffer, exp, FormatPrecision); + + unsigned NDigits = buffer.size(); + + // Check whether we should use scientific notation. + bool FormatScientific; + if (!FormatMaxPadding) + FormatScientific = true; + else { + if (exp >= 0) { + // 765e3 --> 765000 + // ^^^ + // But we shouldn't make the number look more precise than it is. + FormatScientific = ((unsigned) exp > FormatMaxPadding || + NDigits + (unsigned) exp > FormatPrecision); + } else { + // Power of the most significant digit. + int MSD = exp + (int) (NDigits - 1); + if (MSD >= 0) { + // 765e-2 == 7.65 + FormatScientific = false; + } else { + // 765e-5 == 0.00765 + // ^ ^^ + FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; + } + } + } + + // Scientific formatting is pretty straightforward. + if (FormatScientific) { + exp += (NDigits - 1); + + Str.push_back(buffer[NDigits-1]); + Str.push_back('.'); + if (NDigits == 1 && TruncateZero) + Str.push_back('0'); + else + for (unsigned I = 1; I != NDigits; ++I) + Str.push_back(buffer[NDigits-1-I]); + // Fill with zeros up to FormatPrecision. + if (!TruncateZero && FormatPrecision > NDigits - 1) + Str.append(FormatPrecision - NDigits + 1, '0'); + // For !TruncateZero we use lower 'e'. + Str.push_back(TruncateZero ? 'E' : 'e'); + + Str.push_back(exp >= 0 ? '+' : '-'); + if (exp < 0) exp = -exp; + SmallVector<char, 6> expbuf; + do { + expbuf.push_back((char) ('0' + (exp % 10))); + exp /= 10; + } while (exp); + // Exponent always at least two digits if we do not truncate zeros. + if (!TruncateZero && expbuf.size() < 2) + expbuf.push_back('0'); + for (unsigned I = 0, E = expbuf.size(); I != E; ++I) + Str.push_back(expbuf[E-1-I]); + return; + } + + // Non-scientific, positive exponents. + if (exp >= 0) { + for (unsigned I = 0; I != NDigits; ++I) + Str.push_back(buffer[NDigits-1-I]); + for (unsigned I = 0; I != (unsigned) exp; ++I) + Str.push_back('0'); + return; + } + + // Non-scientific, negative exponents. + + // The number of digits to the left of the decimal point. + int NWholeDigits = exp + (int) NDigits; + + unsigned I = 0; + if (NWholeDigits > 0) { + for (; I != (unsigned) NWholeDigits; ++I) + Str.push_back(buffer[NDigits-I-1]); + Str.push_back('.'); + } else { + unsigned NZeros = 1 + (unsigned) -NWholeDigits; + + Str.push_back('0'); + Str.push_back('.'); + for (unsigned Z = 1; Z != NZeros; ++Z) + Str.push_back('0'); + } + + for (; I != NDigits; ++I) + Str.push_back(buffer[NDigits-I-1]); +} + +bool IEEEFloat::getExactInverse(APFloat *inv) const { + // Special floats and denormals have no exact inverse. + if (!isFiniteNonZero()) + return false; + + // Check that the number is a power of two by making sure that only the + // integer bit is set in the significand. + if (significandLSB() != semantics->precision - 1) + return false; + + // Get the inverse. + IEEEFloat reciprocal(*semantics, 1ULL); + if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) + return false; + + // Avoid multiplication with a denormal, it is not safe on all platforms and + // may be slower than a normal division. + if (reciprocal.isDenormal()) + return false; + + assert(reciprocal.isFiniteNonZero() && + reciprocal.significandLSB() == reciprocal.semantics->precision - 1); + + if (inv) + *inv = APFloat(reciprocal, *semantics); + + return true; +} + +bool IEEEFloat::isSignaling() const { + if (!isNaN()) + return false; + + // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the + // first bit of the trailing significand being 0. + return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); +} + +/// IEEE-754R 2008 5.3.1: nextUp/nextDown. +/// +/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with +/// appropriate sign switching before/after the computation. +IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { + // If we are performing nextDown, swap sign so we have -x. + if (nextDown) + changeSign(); + + // Compute nextUp(x) + opStatus result = opOK; + + // Handle each float category separately. + switch (category) { + case fcInfinity: + // nextUp(+inf) = +inf + if (!isNegative()) + break; + // nextUp(-inf) = -getLargest() + makeLargest(true); + break; + case fcNaN: + // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. + // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not + // change the payload. + if (isSignaling()) { + result = opInvalidOp; + // For consistency, propagate the sign of the sNaN to the qNaN. + makeNaN(false, isNegative(), nullptr); + } + break; + case fcZero: + // nextUp(pm 0) = +getSmallest() + makeSmallest(false); + break; + case fcNormal: + // nextUp(-getSmallest()) = -0 + if (isSmallest() && isNegative()) { + APInt::tcSet(significandParts(), 0, partCount()); + category = fcZero; + exponent = 0; + break; + } + + // nextUp(getLargest()) == INFINITY + if (isLargest() && !isNegative()) { + APInt::tcSet(significandParts(), 0, partCount()); + category = fcInfinity; + exponent = semantics->maxExponent + 1; + break; + } + + // nextUp(normal) == normal + inc. + if (isNegative()) { + // If we are negative, we need to decrement the significand. + + // We only cross a binade boundary that requires adjusting the exponent + // if: + // 1. exponent != semantics->minExponent. This implies we are not in the + // smallest binade or are dealing with denormals. + // 2. Our significand excluding the integral bit is all zeros. + bool WillCrossBinadeBoundary = + exponent != semantics->minExponent && isSignificandAllZeros(); + + // Decrement the significand. + // + // We always do this since: + // 1. If we are dealing with a non-binade decrement, by definition we + // just decrement the significand. + // 2. If we are dealing with a normal -> normal binade decrement, since + // we have an explicit integral bit the fact that all bits but the + // integral bit are zero implies that subtracting one will yield a + // significand with 0 integral bit and 1 in all other spots. Thus we + // must just adjust the exponent and set the integral bit to 1. + // 3. If we are dealing with a normal -> denormal binade decrement, + // since we set the integral bit to 0 when we represent denormals, we + // just decrement the significand. + integerPart *Parts = significandParts(); + APInt::tcDecrement(Parts, partCount()); + + if (WillCrossBinadeBoundary) { + // Our result is a normal number. Do the following: + // 1. Set the integral bit to 1. + // 2. Decrement the exponent. + APInt::tcSetBit(Parts, semantics->precision - 1); + exponent--; + } + } else { + // If we are positive, we need to increment the significand. + + // We only cross a binade boundary that requires adjusting the exponent if + // the input is not a denormal and all of said input's significand bits + // are set. If all of said conditions are true: clear the significand, set + // the integral bit to 1, and increment the exponent. If we have a + // denormal always increment since moving denormals and the numbers in the + // smallest normal binade have the same exponent in our representation. + bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); + + if (WillCrossBinadeBoundary) { + integerPart *Parts = significandParts(); + APInt::tcSet(Parts, 0, partCount()); + APInt::tcSetBit(Parts, semantics->precision - 1); + assert(exponent != semantics->maxExponent && + "We can not increment an exponent beyond the maxExponent allowed" + " by the given floating point semantics."); + exponent++; + } else { + incrementSignificand(); + } + } + break; + } + + // If we are performing nextDown, swap sign so we have -nextUp(-x) + if (nextDown) + changeSign(); + + return result; +} + +void IEEEFloat::makeInf(bool Negative) { + category = fcInfinity; + sign = Negative; + exponent = semantics->maxExponent + 1; + APInt::tcSet(significandParts(), 0, partCount()); +} + +void IEEEFloat::makeZero(bool Negative) { + category = fcZero; + sign = Negative; + exponent = semantics->minExponent-1; + APInt::tcSet(significandParts(), 0, partCount()); +} + +void IEEEFloat::makeQuiet() { + assert(isNaN()); + APInt::tcSetBit(significandParts(), semantics->precision - 2); +} + +int ilogb(const IEEEFloat &Arg) { + if (Arg.isNaN()) + return IEEEFloat::IEK_NaN; + if (Arg.isZero()) + return IEEEFloat::IEK_Zero; + if (Arg.isInfinity()) + return IEEEFloat::IEK_Inf; + if (!Arg.isDenormal()) + return Arg.exponent; + + IEEEFloat Normalized(Arg); + int SignificandBits = Arg.getSemantics().precision - 1; + + Normalized.exponent += SignificandBits; + Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); + return Normalized.exponent - SignificandBits; +} + +IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { + auto MaxExp = X.getSemantics().maxExponent; + auto MinExp = X.getSemantics().minExponent; + + // If Exp is wildly out-of-scale, simply adding it to X.exponent will + // overflow; clamp it to a safe range before adding, but ensure that the range + // is large enough that the clamp does not change the result. The range we + // need to support is the difference between the largest possible exponent and + // the normalized exponent of half the smallest denormal. + + int SignificandBits = X.getSemantics().precision - 1; + int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; + + // Clamp to one past the range ends to let normalize handle overlflow. + X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); + X.normalize(RoundingMode, lfExactlyZero); + if (X.isNaN()) + X.makeQuiet(); + return X; +} + +IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { + Exp = ilogb(Val); + + // Quiet signalling nans. + if (Exp == IEEEFloat::IEK_NaN) { + IEEEFloat Quiet(Val); + Quiet.makeQuiet(); + return Quiet; + } + + if (Exp == IEEEFloat::IEK_Inf) + return Val; + + // 1 is added because frexp is defined to return a normalized fraction in + // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). + Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; + return scalbn(Val, -Exp, RM); +} + +DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) + : Semantics(&S), + Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { + assert(Semantics == &semPPCDoubleDouble); +} + +DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) + : Semantics(&S), + Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), + APFloat(semIEEEdouble, uninitialized)}) { + assert(Semantics == &semPPCDoubleDouble); +} + +DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) + : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), + APFloat(semIEEEdouble)}) { + assert(Semantics == &semPPCDoubleDouble); +} + +DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) + : Semantics(&S), + Floats(new APFloat[2]{ + APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), + APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { + assert(Semantics == &semPPCDoubleDouble); +} + +DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, + APFloat &&Second) + : Semantics(&S), + Floats(new APFloat[2]{std::move(First), std::move(Second)}) { + assert(Semantics == &semPPCDoubleDouble); + assert(&Floats[0].getSemantics() == &semIEEEdouble); + assert(&Floats[1].getSemantics() == &semIEEEdouble); +} + +DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) + : Semantics(RHS.Semantics), + Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), + APFloat(RHS.Floats[1])} + : nullptr) { + assert(Semantics == &semPPCDoubleDouble); +} + +DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) + : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { + RHS.Semantics = &semBogus; + assert(Semantics == &semPPCDoubleDouble); +} + +DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { + if (Semantics == RHS.Semantics && RHS.Floats) { + Floats[0] = RHS.Floats[0]; + Floats[1] = RHS.Floats[1]; + } else if (this != &RHS) { + this->~DoubleAPFloat(); + new (this) DoubleAPFloat(RHS); + } + return *this; +} + +// Implement addition, subtraction, multiplication and division based on: +// "Software for Doubled-Precision Floating-Point Computations", +// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. +APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, + const APFloat &c, const APFloat &cc, + roundingMode RM) { + int Status = opOK; + APFloat z = a; + Status |= z.add(c, RM); + if (!z.isFinite()) { + if (!z.isInfinity()) { + Floats[0] = std::move(z); + Floats[1].makeZero(/* Neg = */ false); + return (opStatus)Status; + } + Status = opOK; + auto AComparedToC = a.compareAbsoluteValue(c); + z = cc; + Status |= z.add(aa, RM); + if (AComparedToC == APFloat::cmpGreaterThan) { + // z = cc + aa + c + a; + Status |= z.add(c, RM); + Status |= z.add(a, RM); + } else { + // z = cc + aa + a + c; + Status |= z.add(a, RM); + Status |= z.add(c, RM); + } + if (!z.isFinite()) { + Floats[0] = std::move(z); + Floats[1].makeZero(/* Neg = */ false); + return (opStatus)Status; + } + Floats[0] = z; + APFloat zz = aa; + Status |= zz.add(cc, RM); + if (AComparedToC == APFloat::cmpGreaterThan) { + // Floats[1] = a - z + c + zz; + Floats[1] = a; + Status |= Floats[1].subtract(z, RM); + Status |= Floats[1].add(c, RM); + Status |= Floats[1].add(zz, RM); + } else { + // Floats[1] = c - z + a + zz; + Floats[1] = c; + Status |= Floats[1].subtract(z, RM); + Status |= Floats[1].add(a, RM); + Status |= Floats[1].add(zz, RM); + } + } else { + // q = a - z; + APFloat q = a; + Status |= q.subtract(z, RM); + + // zz = q + c + (a - (q + z)) + aa + cc; + // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. + auto zz = q; + Status |= zz.add(c, RM); + Status |= q.add(z, RM); + Status |= q.subtract(a, RM); + q.changeSign(); + Status |= zz.add(q, RM); + Status |= zz.add(aa, RM); + Status |= zz.add(cc, RM); + if (zz.isZero() && !zz.isNegative()) { + Floats[0] = std::move(z); + Floats[1].makeZero(/* Neg = */ false); + return opOK; + } + Floats[0] = z; + Status |= Floats[0].add(zz, RM); + if (!Floats[0].isFinite()) { + Floats[1].makeZero(/* Neg = */ false); + return (opStatus)Status; + } + Floats[1] = std::move(z); + Status |= Floats[1].subtract(Floats[0], RM); + Status |= Floats[1].add(zz, RM); + } + return (opStatus)Status; +} + +APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, + const DoubleAPFloat &RHS, + DoubleAPFloat &Out, + roundingMode RM) { + if (LHS.getCategory() == fcNaN) { + Out = LHS; + return opOK; + } + if (RHS.getCategory() == fcNaN) { + Out = RHS; + return opOK; + } + if (LHS.getCategory() == fcZero) { + Out = RHS; + return opOK; + } + if (RHS.getCategory() == fcZero) { + Out = LHS; + return opOK; + } + if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && + LHS.isNegative() != RHS.isNegative()) { + Out.makeNaN(false, Out.isNegative(), nullptr); + return opInvalidOp; + } + if (LHS.getCategory() == fcInfinity) { + Out = LHS; + return opOK; + } + if (RHS.getCategory() == fcInfinity) { + Out = RHS; + return opOK; + } + assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); + + APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), + CC(RHS.Floats[1]); + assert(&A.getSemantics() == &semIEEEdouble); + assert(&AA.getSemantics() == &semIEEEdouble); + assert(&C.getSemantics() == &semIEEEdouble); + assert(&CC.getSemantics() == &semIEEEdouble); + assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); + assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); + return Out.addImpl(A, AA, C, CC, RM); +} + +APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, + roundingMode RM) { + return addWithSpecial(*this, RHS, *this, RM); +} + +APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, + roundingMode RM) { + changeSign(); + auto Ret = add(RHS, RM); + changeSign(); + return Ret; +} + +APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, + APFloat::roundingMode RM) { + const auto &LHS = *this; + auto &Out = *this; + /* Interesting observation: For special categories, finding the lowest + common ancestor of the following layered graph gives the correct + return category: + + NaN + / \ + Zero Inf + \ / + Normal + + e.g. NaN * NaN = NaN + Zero * Inf = NaN + Normal * Zero = Zero + Normal * Inf = Inf + */ + if (LHS.getCategory() == fcNaN) { + Out = LHS; + return opOK; + } + if (RHS.getCategory() == fcNaN) { + Out = RHS; + return opOK; + } + if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || + (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { + Out.makeNaN(false, false, nullptr); + return opOK; + } + if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { + Out = LHS; + return opOK; + } + if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { + Out = RHS; + return opOK; + } + assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && + "Special cases not handled exhaustively"); + + int Status = opOK; + APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; + // t = a * c + APFloat T = A; + Status |= T.multiply(C, RM); + if (!T.isFiniteNonZero()) { + Floats[0] = T; + Floats[1].makeZero(/* Neg = */ false); + return (opStatus)Status; + } + + // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). + APFloat Tau = A; + T.changeSign(); + Status |= Tau.fusedMultiplyAdd(C, T, RM); + T.changeSign(); + { + // v = a * d + APFloat V = A; + Status |= V.multiply(D, RM); + // w = b * c + APFloat W = B; + Status |= W.multiply(C, RM); + Status |= V.add(W, RM); + // tau += v + w + Status |= Tau.add(V, RM); + } + // u = t + tau + APFloat U = T; + Status |= U.add(Tau, RM); + + Floats[0] = U; + if (!U.isFinite()) { + Floats[1].makeZero(/* Neg = */ false); + } else { + // Floats[1] = (t - u) + tau + Status |= T.subtract(U, RM); + Status |= T.add(Tau, RM); + Floats[1] = T; + } + return (opStatus)Status; +} + +APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, + APFloat::roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = + Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = + Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus +DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, + const DoubleAPFloat &Addend, + APFloat::roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.fusedMultiplyAdd( + APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), + APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.roundToIntegral(RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +void DoubleAPFloat::changeSign() { + Floats[0].changeSign(); + Floats[1].changeSign(); +} + +APFloat::cmpResult +DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { + auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); + if (Result != cmpEqual) + return Result; + Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); + if (Result == cmpLessThan || Result == cmpGreaterThan) { + auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); + auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); + if (Against && !RHSAgainst) + return cmpLessThan; + if (!Against && RHSAgainst) + return cmpGreaterThan; + if (!Against && !RHSAgainst) + return Result; + if (Against && RHSAgainst) + return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); + } + return Result; +} + +APFloat::fltCategory DoubleAPFloat::getCategory() const { + return Floats[0].getCategory(); +} + +bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } + +void DoubleAPFloat::makeInf(bool Neg) { + Floats[0].makeInf(Neg); + Floats[1].makeZero(/* Neg = */ false); +} + +void DoubleAPFloat::makeZero(bool Neg) { + Floats[0].makeZero(Neg); + Floats[1].makeZero(/* Neg = */ false); +} + +void DoubleAPFloat::makeLargest(bool Neg) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); + Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); + if (Neg) + changeSign(); +} + +void DoubleAPFloat::makeSmallest(bool Neg) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + Floats[0].makeSmallest(Neg); + Floats[1].makeZero(/* Neg = */ false); +} + +void DoubleAPFloat::makeSmallestNormalized(bool Neg) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); + if (Neg) + Floats[0].changeSign(); + Floats[1].makeZero(/* Neg = */ false); +} + +void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { + Floats[0].makeNaN(SNaN, Neg, fill); + Floats[1].makeZero(/* Neg = */ false); +} + +APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { + auto Result = Floats[0].compare(RHS.Floats[0]); + // |Float[0]| > |Float[1]| + if (Result == APFloat::cmpEqual) + return Floats[1].compare(RHS.Floats[1]); + return Result; +} + +bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { + return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && + Floats[1].bitwiseIsEqual(RHS.Floats[1]); +} + +hash_code hash_value(const DoubleAPFloat &Arg) { + if (Arg.Floats) + return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); + return hash_combine(Arg.Semantics); +} + +APInt DoubleAPFloat::bitcastToAPInt() const { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + uint64_t Data[] = { + Floats[0].bitcastToAPInt().getRawData()[0], + Floats[1].bitcastToAPInt().getRawData()[0], + }; + return APInt(128, 2, Data); +} + +APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S, + roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy); + auto Ret = Tmp.convertFromString(S, RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus DoubleAPFloat::next(bool nextDown) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + auto Ret = Tmp.next(nextDown); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus +DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, + unsigned int Width, bool IsSigned, + roundingMode RM, bool *IsExact) const { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) + .convertToInteger(Input, Width, IsSigned, RM, IsExact); +} + +APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, + bool IsSigned, + roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy); + auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus +DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, + unsigned int InputSize, + bool IsSigned, roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy); + auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +APFloat::opStatus +DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, + unsigned int InputSize, + bool IsSigned, roundingMode RM) { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy); + auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); + *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); + return Ret; +} + +unsigned int DoubleAPFloat::convertToHexString(char *DST, + unsigned int HexDigits, + bool UpperCase, + roundingMode RM) const { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) + .convertToHexString(DST, HexDigits, UpperCase, RM); +} + +bool DoubleAPFloat::isDenormal() const { + return getCategory() == fcNormal && + (Floats[0].isDenormal() || Floats[1].isDenormal() || + // (double)(Hi + Lo) == Hi defines a normal number. + Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual); +} + +bool DoubleAPFloat::isSmallest() const { + if (getCategory() != fcNormal) + return false; + DoubleAPFloat Tmp(*this); + Tmp.makeSmallest(this->isNegative()); + return Tmp.compare(*this) == cmpEqual; +} + +bool DoubleAPFloat::isLargest() const { + if (getCategory() != fcNormal) + return false; + DoubleAPFloat Tmp(*this); + Tmp.makeLargest(this->isNegative()); + return Tmp.compare(*this) == cmpEqual; +} + +bool DoubleAPFloat::isInteger() const { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + return Floats[0].isInteger() && Floats[1].isInteger(); +} + +void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, + unsigned FormatPrecision, + unsigned FormatMaxPadding, + bool TruncateZero) const { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) + .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); +} + +bool DoubleAPFloat::getExactInverse(APFloat *inv) const { + assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); + if (!inv) + return Tmp.getExactInverse(nullptr); + APFloat Inv(semPPCDoubleDoubleLegacy); + auto Ret = Tmp.getExactInverse(&Inv); + *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); + return Ret; +} + +DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) { + assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), + scalbn(Arg.Floats[1], Exp, RM)); +} + +DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, + APFloat::roundingMode RM) { + assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); + APFloat First = frexp(Arg.Floats[0], Exp, RM); + APFloat Second = Arg.Floats[1]; + if (Arg.getCategory() == APFloat::fcNormal) + Second = scalbn(Second, -Exp, RM); + return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); +} + +} // End detail namespace + +APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { + if (usesLayout<IEEEFloat>(Semantics)) { + new (&IEEE) IEEEFloat(std::move(F)); + return; + } + if (usesLayout<DoubleAPFloat>(Semantics)) { + const fltSemantics& S = F.getSemantics(); + new (&Double) + DoubleAPFloat(Semantics, APFloat(std::move(F), S), + APFloat(semIEEEdouble)); + return; + } + llvm_unreachable("Unexpected semantics"); +} + +APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) { + APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); +} + +hash_code hash_value(const APFloat &Arg) { + if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) + return hash_value(Arg.U.IEEE); + if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) + return hash_value(Arg.U.Double); + llvm_unreachable("Unexpected semantics"); +} + +APFloat::APFloat(const fltSemantics &Semantics, StringRef S) + : APFloat(Semantics) { + convertFromString(S, rmNearestTiesToEven); +} + +APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, + roundingMode RM, bool *losesInfo) { + if (&getSemantics() == &ToSemantics) { + *losesInfo = false; + return opOK; + } + if (usesLayout<IEEEFloat>(getSemantics()) && + usesLayout<IEEEFloat>(ToSemantics)) + return U.IEEE.convert(ToSemantics, RM, losesInfo); + if (usesLayout<IEEEFloat>(getSemantics()) && + usesLayout<DoubleAPFloat>(ToSemantics)) { + assert(&ToSemantics == &semPPCDoubleDouble); + auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); + *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); + return Ret; + } + if (usesLayout<DoubleAPFloat>(getSemantics()) && + usesLayout<IEEEFloat>(ToSemantics)) { + auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); + *this = APFloat(std::move(getIEEE()), ToSemantics); + return Ret; + } + llvm_unreachable("Unexpected semantics"); +} + +APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { + if (isIEEE) { + switch (BitWidth) { + case 16: + return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth)); + case 32: + return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth)); + case 64: + return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth)); + case 80: + return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth)); + case 128: + return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth)); + default: + llvm_unreachable("Unknown floating bit width"); + } + } else { + assert(BitWidth == 128); + return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); + } +} + +void APFloat::print(raw_ostream &OS) const { + SmallVector<char, 16> Buffer; + toString(Buffer); + OS << Buffer << "\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } +#endif + +void APFloat::Profile(FoldingSetNodeID &NID) const { + NID.Add(bitcastToAPInt()); +} + +/* Same as convertToInteger(integerPart*, ...), except the result is returned in + an APSInt, whose initial bit-width and signed-ness are used to determine the + precision of the conversion. + */ +APFloat::opStatus APFloat::convertToInteger(APSInt &result, + roundingMode rounding_mode, + bool *isExact) const { + unsigned bitWidth = result.getBitWidth(); + SmallVector<uint64_t, 4> parts(result.getNumWords()); + opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), + rounding_mode, isExact); + // Keeps the original signed-ness. + result = APInt(bitWidth, parts); + return status; +} + +} // End llvm namespace + +#undef APFLOAT_DISPATCH_ON_SEMANTICS diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp new file mode 100644 index 0000000000000..758fe8b4f866a --- /dev/null +++ b/llvm/lib/Support/APInt.cpp @@ -0,0 +1,3041 @@ +//===-- APInt.cpp - Implement APInt class ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a class to represent arbitrary precision integer +// constant values and provide a variety of arithmetic operations on them. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/bit.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <climits> +#include <cmath> +#include <cstdlib> +#include <cstring> +using namespace llvm; + +#define DEBUG_TYPE "apint" + +/// A utility function for allocating memory, checking for allocation failures, +/// and ensuring the contents are zeroed. +inline static uint64_t* getClearedMemory(unsigned numWords) { + uint64_t *result = new uint64_t[numWords]; + memset(result, 0, numWords * sizeof(uint64_t)); + return result; +} + +/// A utility function for allocating memory and checking for allocation +/// failure. The content is not zeroed. +inline static uint64_t* getMemory(unsigned numWords) { + return new uint64_t[numWords]; +} + +/// A utility function that converts a character to a digit. +inline static unsigned getDigit(char cdigit, uint8_t radix) { + unsigned r; + + if (radix == 16 || radix == 36) { + r = cdigit - '0'; + if (r <= 9) + return r; + + r = cdigit - 'A'; + if (r <= radix - 11U) + return r + 10; + + r = cdigit - 'a'; + if (r <= radix - 11U) + return r + 10; + + radix = 10; + } + + r = cdigit - '0'; + if (r < radix) + return r; + + return -1U; +} + + +void APInt::initSlowCase(uint64_t val, bool isSigned) { + U.pVal = getClearedMemory(getNumWords()); + U.pVal[0] = val; + if (isSigned && int64_t(val) < 0) + for (unsigned i = 1; i < getNumWords(); ++i) + U.pVal[i] = WORDTYPE_MAX; + clearUnusedBits(); +} + +void APInt::initSlowCase(const APInt& that) { + U.pVal = getMemory(getNumWords()); + memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); +} + +void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { + assert(BitWidth && "Bitwidth too small"); + assert(bigVal.data() && "Null pointer detected!"); + if (isSingleWord()) + U.VAL = bigVal[0]; + else { + // Get memory, cleared to 0 + U.pVal = getClearedMemory(getNumWords()); + // Calculate the number of words to copy + unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); + // Copy the words from bigVal to pVal + memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); + } + // Make sure unused high bits are cleared + clearUnusedBits(); +} + +APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) + : BitWidth(numBits) { + initFromArray(bigVal); +} + +APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) + : BitWidth(numBits) { + initFromArray(makeArrayRef(bigVal, numWords)); +} + +APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) + : BitWidth(numbits) { + assert(BitWidth && "Bitwidth too small"); + fromString(numbits, Str, radix); +} + +void APInt::reallocate(unsigned NewBitWidth) { + // If the number of words is the same we can just change the width and stop. + if (getNumWords() == getNumWords(NewBitWidth)) { + BitWidth = NewBitWidth; + return; + } + + // If we have an allocation, delete it. + if (!isSingleWord()) + delete [] U.pVal; + + // Update BitWidth. + BitWidth = NewBitWidth; + + // If we are supposed to have an allocation, create it. + if (!isSingleWord()) + U.pVal = getMemory(getNumWords()); +} + +void APInt::AssignSlowCase(const APInt& RHS) { + // Don't do anything for X = X + if (this == &RHS) + return; + + // Adjust the bit width and handle allocations as necessary. + reallocate(RHS.getBitWidth()); + + // Copy the data. + if (isSingleWord()) + U.VAL = RHS.U.VAL; + else + memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); +} + +/// This method 'profiles' an APInt for use with FoldingSet. +void APInt::Profile(FoldingSetNodeID& ID) const { + ID.AddInteger(BitWidth); + + if (isSingleWord()) { + ID.AddInteger(U.VAL); + return; + } + + unsigned NumWords = getNumWords(); + for (unsigned i = 0; i < NumWords; ++i) + ID.AddInteger(U.pVal[i]); +} + +/// Prefix increment operator. Increments the APInt by one. +APInt& APInt::operator++() { + if (isSingleWord()) + ++U.VAL; + else + tcIncrement(U.pVal, getNumWords()); + return clearUnusedBits(); +} + +/// Prefix decrement operator. Decrements the APInt by one. +APInt& APInt::operator--() { + if (isSingleWord()) + --U.VAL; + else + tcDecrement(U.pVal, getNumWords()); + return clearUnusedBits(); +} + +/// Adds the RHS APint to this APInt. +/// @returns this, after addition of RHS. +/// Addition assignment operator. +APInt& APInt::operator+=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + U.VAL += RHS.U.VAL; + else + tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); + return clearUnusedBits(); +} + +APInt& APInt::operator+=(uint64_t RHS) { + if (isSingleWord()) + U.VAL += RHS; + else + tcAddPart(U.pVal, RHS, getNumWords()); + return clearUnusedBits(); +} + +/// Subtracts the RHS APInt from this APInt +/// @returns this, after subtraction +/// Subtraction assignment operator. +APInt& APInt::operator-=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + U.VAL -= RHS.U.VAL; + else + tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); + return clearUnusedBits(); +} + +APInt& APInt::operator-=(uint64_t RHS) { + if (isSingleWord()) + U.VAL -= RHS; + else + tcSubtractPart(U.pVal, RHS, getNumWords()); + return clearUnusedBits(); +} + +APInt APInt::operator*(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return APInt(BitWidth, U.VAL * RHS.U.VAL); + + APInt Result(getMemory(getNumWords()), getBitWidth()); + + tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); + + Result.clearUnusedBits(); + return Result; +} + +void APInt::AndAssignSlowCase(const APInt& RHS) { + tcAnd(U.pVal, RHS.U.pVal, getNumWords()); +} + +void APInt::OrAssignSlowCase(const APInt& RHS) { + tcOr(U.pVal, RHS.U.pVal, getNumWords()); +} + +void APInt::XorAssignSlowCase(const APInt& RHS) { + tcXor(U.pVal, RHS.U.pVal, getNumWords()); +} + +APInt& APInt::operator*=(const APInt& RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + *this = *this * RHS; + return *this; +} + +APInt& APInt::operator*=(uint64_t RHS) { + if (isSingleWord()) { + U.VAL *= RHS; + } else { + unsigned NumWords = getNumWords(); + tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); + } + return clearUnusedBits(); +} + +bool APInt::EqualSlowCase(const APInt& RHS) const { + return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); +} + +int APInt::compare(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); + if (isSingleWord()) + return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; + + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); +} + +int APInt::compareSigned(const APInt& RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); + if (isSingleWord()) { + int64_t lhsSext = SignExtend64(U.VAL, BitWidth); + int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); + return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; + } + + bool lhsNeg = isNegative(); + bool rhsNeg = RHS.isNegative(); + + // If the sign bits don't match, then (LHS < RHS) if LHS is negative + if (lhsNeg != rhsNeg) + return lhsNeg ? -1 : 1; + + // Otherwise we can just use an unsigned comparison, because even negative + // numbers compare correctly this way if both have the same signed-ness. + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); +} + +void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { + unsigned loWord = whichWord(loBit); + unsigned hiWord = whichWord(hiBit); + + // Create an initial mask for the low word with zeros below loBit. + uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); + + // If hiBit is not aligned, we need a high mask. + unsigned hiShiftAmt = whichBit(hiBit); + if (hiShiftAmt != 0) { + // Create a high mask with zeros above hiBit. + uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); + // If loWord and hiWord are equal, then we combine the masks. Otherwise, + // set the bits in hiWord. + if (hiWord == loWord) + loMask &= hiMask; + else + U.pVal[hiWord] |= hiMask; + } + // Apply the mask to the low word. + U.pVal[loWord] |= loMask; + + // Fill any words between loWord and hiWord with all ones. + for (unsigned word = loWord + 1; word < hiWord; ++word) + U.pVal[word] = WORDTYPE_MAX; +} + +/// Toggle every bit to its opposite value. +void APInt::flipAllBitsSlowCase() { + tcComplement(U.pVal, getNumWords()); + clearUnusedBits(); +} + +/// Toggle a given bit to its opposite value whose position is given +/// as "bitPosition". +/// Toggles a given bit to its opposite value. +void APInt::flipBit(unsigned bitPosition) { + assert(bitPosition < BitWidth && "Out of the bit-width range!"); + if ((*this)[bitPosition]) clearBit(bitPosition); + else setBit(bitPosition); +} + +void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { + unsigned subBitWidth = subBits.getBitWidth(); + assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && + "Illegal bit insertion"); + + // Insertion is a direct copy. + if (subBitWidth == BitWidth) { + *this = subBits; + return; + } + + // Single word result can be done as a direct bitmask. + if (isSingleWord()) { + uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + U.VAL &= ~(mask << bitPosition); + U.VAL |= (subBits.U.VAL << bitPosition); + return; + } + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); + + // Insertion within a single word can be done as a direct bitmask. + if (loWord == hi1Word) { + uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + U.pVal[loWord] &= ~(mask << loBit); + U.pVal[loWord] |= (subBits.U.VAL << loBit); + return; + } + + // Insert on word boundaries. + if (loBit == 0) { + // Direct copy whole words. + unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; + memcpy(U.pVal + loWord, subBits.getRawData(), + numWholeSubWords * APINT_WORD_SIZE); + + // Mask+insert remaining bits. + unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; + if (remainingBits != 0) { + uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); + U.pVal[hi1Word] &= ~mask; + U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); + } + return; + } + + // General case - set/clear individual bits in dst based on src. + // TODO - there is scope for optimization here, but at the moment this code + // path is barely used so prefer readability over performance. + for (unsigned i = 0; i != subBitWidth; ++i) { + if (subBits[i]) + setBit(bitPosition + i); + else + clearBit(bitPosition + i); + } +} + +void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { + uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); + subBits &= maskBits; + if (isSingleWord()) { + U.VAL &= ~(maskBits << bitPosition); + U.VAL |= subBits << bitPosition; + return; + } + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + if (loWord == hiWord) { + U.pVal[loWord] &= ~(maskBits << loBit); + U.pVal[loWord] |= subBits << loBit; + return; + } + + static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); + unsigned wordBits = 8 * sizeof(WordType); + U.pVal[loWord] &= ~(maskBits << loBit); + U.pVal[loWord] |= subBits << loBit; + + U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); + U.pVal[hiWord] |= subBits >> (wordBits - loBit); +} + +APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { + assert(numBits > 0 && "Can't extract zero bits"); + assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && + "Illegal bit extraction"); + + if (isSingleWord()) + return APInt(numBits, U.VAL >> bitPosition); + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + + // Single word result extracting bits from a single word source. + if (loWord == hiWord) + return APInt(numBits, U.pVal[loWord] >> loBit); + + // Extracting bits that start on a source word boundary can be done + // as a fast memory copy. + if (loBit == 0) + return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); + + // General case - shift + copy source words directly into place. + APInt Result(numBits, 0); + unsigned NumSrcWords = getNumWords(); + unsigned NumDstWords = Result.getNumWords(); + + uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; + for (unsigned word = 0; word < NumDstWords; ++word) { + uint64_t w0 = U.pVal[loWord + word]; + uint64_t w1 = + (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; + DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); + } + + return Result.clearUnusedBits(); +} + +uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, + unsigned bitPosition) const { + assert(numBits > 0 && "Can't extract zero bits"); + assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && + "Illegal bit extraction"); + assert(numBits <= 64 && "Illegal bit extraction"); + + uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); + if (isSingleWord()) + return (U.VAL >> bitPosition) & maskBits; + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + if (loWord == hiWord) + return (U.pVal[loWord] >> loBit) & maskBits; + + static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); + unsigned wordBits = 8 * sizeof(WordType); + uint64_t retBits = U.pVal[loWord] >> loBit; + retBits |= U.pVal[hiWord] << (wordBits - loBit); + retBits &= maskBits; + return retBits; +} + +unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { + assert(!str.empty() && "Invalid string length"); + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || + radix == 36) && + "Radix should be 2, 8, 10, 16, or 36!"); + + size_t slen = str.size(); + + // Each computation below needs to know if it's negative. + StringRef::iterator p = str.begin(); + unsigned isNegative = *p == '-'; + if (*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String is only a sign, needs a value."); + } + + // For radixes of power-of-two values, the bits required is accurately and + // easily computed + if (radix == 2) + return slen + isNegative; + if (radix == 8) + return slen * 3 + isNegative; + if (radix == 16) + return slen * 4 + isNegative; + + // FIXME: base 36 + + // This is grossly inefficient but accurate. We could probably do something + // with a computation of roughly slen*64/20 and then adjust by the value of + // the first few digits. But, I'm not sure how accurate that could be. + + // Compute a sufficient number of bits that is always large enough but might + // be too large. This avoids the assertion in the constructor. This + // calculation doesn't work appropriately for the numbers 0-9, so just use 4 + // bits in that case. + unsigned sufficient + = radix == 10? (slen == 1 ? 4 : slen * 64/18) + : (slen == 1 ? 7 : slen * 16/3); + + // Convert to the actual binary value. + APInt tmp(sufficient, StringRef(p, slen), radix); + + // Compute how many bits are required. If the log is infinite, assume we need + // just bit. If the log is exact and value is negative, then the value is + // MinSignedValue with (log + 1) bits. + unsigned log = tmp.logBase2(); + if (log == (unsigned)-1) { + return isNegative + 1; + } else if (isNegative && tmp.isPowerOf2()) { + return isNegative + log; + } else { + return isNegative + log + 1; + } +} + +hash_code llvm::hash_value(const APInt &Arg) { + if (Arg.isSingleWord()) + return hash_combine(Arg.U.VAL); + + return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); +} + +bool APInt::isSplat(unsigned SplatSizeInBits) const { + assert(getBitWidth() % SplatSizeInBits == 0 && + "SplatSizeInBits must divide width!"); + // We can check that all parts of an integer are equal by making use of a + // little trick: rotate and check if it's still the same value. + return *this == rotl(SplatSizeInBits); +} + +/// This function returns the high "numBits" bits of this APInt. +APInt APInt::getHiBits(unsigned numBits) const { + return this->lshr(BitWidth - numBits); +} + +/// This function returns the low "numBits" bits of this APInt. +APInt APInt::getLoBits(unsigned numBits) const { + APInt Result(getLowBitsSet(BitWidth, numBits)); + Result &= *this; + return Result; +} + +/// Return a value containing V broadcasted over NewLen bits. +APInt APInt::getSplat(unsigned NewLen, const APInt &V) { + assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); + + APInt Val = V.zextOrSelf(NewLen); + for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) + Val |= Val << I; + + return Val; +} + +unsigned APInt::countLeadingZerosSlowCase() const { + unsigned Count = 0; + for (int i = getNumWords()-1; i >= 0; --i) { + uint64_t V = U.pVal[i]; + if (V == 0) + Count += APINT_BITS_PER_WORD; + else { + Count += llvm::countLeadingZeros(V); + break; + } + } + // Adjust for unused bits in the most significant word (they are zero). + unsigned Mod = BitWidth % APINT_BITS_PER_WORD; + Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; + return Count; +} + +unsigned APInt::countLeadingOnesSlowCase() const { + unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; + unsigned shift; + if (!highWordBits) { + highWordBits = APINT_BITS_PER_WORD; + shift = 0; + } else { + shift = APINT_BITS_PER_WORD - highWordBits; + } + int i = getNumWords() - 1; + unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); + if (Count == highWordBits) { + for (i--; i >= 0; --i) { + if (U.pVal[i] == WORDTYPE_MAX) + Count += APINT_BITS_PER_WORD; + else { + Count += llvm::countLeadingOnes(U.pVal[i]); + break; + } + } + } + return Count; +} + +unsigned APInt::countTrailingZerosSlowCase() const { + unsigned Count = 0; + unsigned i = 0; + for (; i < getNumWords() && U.pVal[i] == 0; ++i) + Count += APINT_BITS_PER_WORD; + if (i < getNumWords()) + Count += llvm::countTrailingZeros(U.pVal[i]); + return std::min(Count, BitWidth); +} + +unsigned APInt::countTrailingOnesSlowCase() const { + unsigned Count = 0; + unsigned i = 0; + for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) + Count += APINT_BITS_PER_WORD; + if (i < getNumWords()) + Count += llvm::countTrailingOnes(U.pVal[i]); + assert(Count <= BitWidth); + return Count; +} + +unsigned APInt::countPopulationSlowCase() const { + unsigned Count = 0; + for (unsigned i = 0; i < getNumWords(); ++i) + Count += llvm::countPopulation(U.pVal[i]); + return Count; +} + +bool APInt::intersectsSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((U.pVal[i] & RHS.U.pVal[i]) != 0) + return true; + + return false; +} + +bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) + return false; + + return true; +} + +APInt APInt::byteSwap() const { + assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); + if (BitWidth == 16) + return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); + if (BitWidth == 32) + return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); + if (BitWidth == 48) { + unsigned Tmp1 = unsigned(U.VAL >> 16); + Tmp1 = ByteSwap_32(Tmp1); + uint16_t Tmp2 = uint16_t(U.VAL); + Tmp2 = ByteSwap_16(Tmp2); + return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); + } + if (BitWidth == 64) + return APInt(BitWidth, ByteSwap_64(U.VAL)); + + APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); + for (unsigned I = 0, N = getNumWords(); I != N; ++I) + Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); + if (Result.BitWidth != BitWidth) { + Result.lshrInPlace(Result.BitWidth - BitWidth); + Result.BitWidth = BitWidth; + } + return Result; +} + +APInt APInt::reverseBits() const { + switch (BitWidth) { + case 64: + return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); + case 32: + return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); + case 16: + return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); + case 8: + return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); + default: + break; + } + + APInt Val(*this); + APInt Reversed(BitWidth, 0); + unsigned S = BitWidth; + + for (; Val != 0; Val.lshrInPlace(1)) { + Reversed <<= 1; + Reversed |= Val[0]; + --S; + } + + Reversed <<= S; + return Reversed; +} + +APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { + // Fast-path a common case. + if (A == B) return A; + + // Corner cases: if either operand is zero, the other is the gcd. + if (!A) return B; + if (!B) return A; + + // Count common powers of 2 and remove all other powers of 2. + unsigned Pow2; + { + unsigned Pow2_A = A.countTrailingZeros(); + unsigned Pow2_B = B.countTrailingZeros(); + if (Pow2_A > Pow2_B) { + A.lshrInPlace(Pow2_A - Pow2_B); + Pow2 = Pow2_B; + } else if (Pow2_B > Pow2_A) { + B.lshrInPlace(Pow2_B - Pow2_A); + Pow2 = Pow2_A; + } else { + Pow2 = Pow2_A; + } + } + + // Both operands are odd multiples of 2^Pow_2: + // + // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) + // + // This is a modified version of Stein's algorithm, taking advantage of + // efficient countTrailingZeros(). + while (A != B) { + if (A.ugt(B)) { + A -= B; + A.lshrInPlace(A.countTrailingZeros() - Pow2); + } else { + B -= A; + B.lshrInPlace(B.countTrailingZeros() - Pow2); + } + } + + return A; +} + +APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { + uint64_t I = bit_cast<uint64_t>(Double); + + // Get the sign bit from the highest order bit + bool isNeg = I >> 63; + + // Get the 11-bit exponent and adjust for the 1023 bit bias + int64_t exp = ((I >> 52) & 0x7ff) - 1023; + + // If the exponent is negative, the value is < 0 so just return 0. + if (exp < 0) + return APInt(width, 0u); + + // Extract the mantissa by clearing the top 12 bits (sign + exponent). + uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; + + // If the exponent doesn't shift all bits out of the mantissa + if (exp < 52) + return isNeg ? -APInt(width, mantissa >> (52 - exp)) : + APInt(width, mantissa >> (52 - exp)); + + // If the client didn't provide enough bits for us to shift the mantissa into + // then the result is undefined, just return 0 + if (width <= exp - 52) + return APInt(width, 0); + + // Otherwise, we have to shift the mantissa bits up to the right location + APInt Tmp(width, mantissa); + Tmp <<= (unsigned)exp - 52; + return isNeg ? -Tmp : Tmp; +} + +/// This function converts this APInt to a double. +/// The layout for double is as following (IEEE Standard 754): +/// -------------------------------------- +/// | Sign Exponent Fraction Bias | +/// |-------------------------------------- | +/// | 1[63] 11[62-52] 52[51-00] 1023 | +/// -------------------------------------- +double APInt::roundToDouble(bool isSigned) const { + + // Handle the simple case where the value is contained in one uint64_t. + // It is wrong to optimize getWord(0) to VAL; there might be more than one word. + if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { + if (isSigned) { + int64_t sext = SignExtend64(getWord(0), BitWidth); + return double(sext); + } else + return double(getWord(0)); + } + + // Determine if the value is negative. + bool isNeg = isSigned ? (*this)[BitWidth-1] : false; + + // Construct the absolute value if we're negative. + APInt Tmp(isNeg ? -(*this) : (*this)); + + // Figure out how many bits we're using. + unsigned n = Tmp.getActiveBits(); + + // The exponent (without bias normalization) is just the number of bits + // we are using. Note that the sign bit is gone since we constructed the + // absolute value. + uint64_t exp = n; + + // Return infinity for exponent overflow + if (exp > 1023) { + if (!isSigned || !isNeg) + return std::numeric_limits<double>::infinity(); + else + return -std::numeric_limits<double>::infinity(); + } + exp += 1023; // Increment for 1023 bias + + // Number of bits in mantissa is 52. To obtain the mantissa value, we must + // extract the high 52 bits from the correct words in pVal. + uint64_t mantissa; + unsigned hiWord = whichWord(n-1); + if (hiWord == 0) { + mantissa = Tmp.U.pVal[0]; + if (n > 52) + mantissa >>= n - 52; // shift down, we want the top 52 bits. + } else { + assert(hiWord > 0 && "huh?"); + uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); + uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); + mantissa = hibits | lobits; + } + + // The leading bit of mantissa is implicit, so get rid of it. + uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; + uint64_t I = sign | (exp << 52) | mantissa; + return bit_cast<double>(I); +} + +// Truncate to new width. +APInt APInt::trunc(unsigned width) const { + assert(width < BitWidth && "Invalid APInt Truncate request"); + assert(width && "Can't truncate to 0 bits"); + + if (width <= APINT_BITS_PER_WORD) + return APInt(width, getRawData()[0]); + + APInt Result(getMemory(getNumWords(width)), width); + + // Copy full words. + unsigned i; + for (i = 0; i != width / APINT_BITS_PER_WORD; i++) + Result.U.pVal[i] = U.pVal[i]; + + // Truncate and copy any partial word. + unsigned bits = (0 - width) % APINT_BITS_PER_WORD; + if (bits != 0) + Result.U.pVal[i] = U.pVal[i] << bits >> bits; + + return Result; +} + +// Sign extend to a new width. +APInt APInt::sext(unsigned Width) const { + assert(Width > BitWidth && "Invalid APInt SignExtend request"); + + if (Width <= APINT_BITS_PER_WORD) + return APInt(Width, SignExtend64(U.VAL, BitWidth)); + + APInt Result(getMemory(getNumWords(Width)), Width); + + // Copy words. + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); + + // Sign extend the last word since there may be unused bits in the input. + Result.U.pVal[getNumWords() - 1] = + SignExtend64(Result.U.pVal[getNumWords() - 1], + ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + + // Fill with sign bits. + std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); + Result.clearUnusedBits(); + return Result; +} + +// Zero extend to a new width. +APInt APInt::zext(unsigned width) const { + assert(width > BitWidth && "Invalid APInt ZeroExtend request"); + + if (width <= APINT_BITS_PER_WORD) + return APInt(width, U.VAL); + + APInt Result(getMemory(getNumWords(width)), width); + + // Copy words. + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); + + // Zero remaining words. + std::memset(Result.U.pVal + getNumWords(), 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); + + return Result; +} + +APInt APInt::zextOrTrunc(unsigned width) const { + if (BitWidth < width) + return zext(width); + if (BitWidth > width) + return trunc(width); + return *this; +} + +APInt APInt::sextOrTrunc(unsigned width) const { + if (BitWidth < width) + return sext(width); + if (BitWidth > width) + return trunc(width); + return *this; +} + +APInt APInt::zextOrSelf(unsigned width) const { + if (BitWidth < width) + return zext(width); + return *this; +} + +APInt APInt::sextOrSelf(unsigned width) const { + if (BitWidth < width) + return sext(width); + return *this; +} + +/// Arithmetic right-shift this APInt by shiftAmt. +/// Arithmetic right-shift function. +void APInt::ashrInPlace(const APInt &shiftAmt) { + ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); +} + +/// Arithmetic right-shift this APInt by shiftAmt. +/// Arithmetic right-shift function. +void APInt::ashrSlowCase(unsigned ShiftAmt) { + // Don't bother performing a no-op shift. + if (!ShiftAmt) + return; + + // Save the original sign bit for later. + bool Negative = isNegative(); + + // WordShift is the inter-part shift; BitShift is intra-part shift. + unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; + unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; + + unsigned WordsToMove = getNumWords() - WordShift; + if (WordsToMove != 0) { + // Sign extend the last word to fill in the unused bits. + U.pVal[getNumWords() - 1] = SignExtend64( + U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + // Move the words containing significant bits. + for (unsigned i = 0; i != WordsToMove - 1; ++i) + U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | + (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); + + // Handle the last word which has no high bits to copy. + U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; + // Sign extend one more time. + U.pVal[WordsToMove - 1] = + SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); + } + } + + // Fill in the remainder based on the original sign. + std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, + WordShift * APINT_WORD_SIZE); + clearUnusedBits(); +} + +/// Logical right-shift this APInt by shiftAmt. +/// Logical right-shift function. +void APInt::lshrInPlace(const APInt &shiftAmt) { + lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); +} + +/// Logical right-shift this APInt by shiftAmt. +/// Logical right-shift function. +void APInt::lshrSlowCase(unsigned ShiftAmt) { + tcShiftRight(U.pVal, getNumWords(), ShiftAmt); +} + +/// Left-shift this APInt by shiftAmt. +/// Left-shift function. +APInt &APInt::operator<<=(const APInt &shiftAmt) { + // It's undefined behavior in C to shift by BitWidth or greater. + *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); + return *this; +} + +void APInt::shlSlowCase(unsigned ShiftAmt) { + tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); + clearUnusedBits(); +} + +// Calculate the rotate amount modulo the bit width. +static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { + unsigned rotBitWidth = rotateAmt.getBitWidth(); + APInt rot = rotateAmt; + if (rotBitWidth < BitWidth) { + // Extend the rotate APInt, so that the urem doesn't divide by 0. + // e.g. APInt(1, 32) would give APInt(1, 0). + rot = rotateAmt.zext(BitWidth); + } + rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); + return rot.getLimitedValue(BitWidth); +} + +APInt APInt::rotl(const APInt &rotateAmt) const { + return rotl(rotateModulo(BitWidth, rotateAmt)); +} + +APInt APInt::rotl(unsigned rotateAmt) const { + rotateAmt %= BitWidth; + if (rotateAmt == 0) + return *this; + return shl(rotateAmt) | lshr(BitWidth - rotateAmt); +} + +APInt APInt::rotr(const APInt &rotateAmt) const { + return rotr(rotateModulo(BitWidth, rotateAmt)); +} + +APInt APInt::rotr(unsigned rotateAmt) const { + rotateAmt %= BitWidth; + if (rotateAmt == 0) + return *this; + return lshr(rotateAmt) | shl(BitWidth - rotateAmt); +} + +// Square Root - this method computes and returns the square root of "this". +// Three mechanisms are used for computation. For small values (<= 5 bits), +// a table lookup is done. This gets some performance for common cases. For +// values using less than 52 bits, the value is converted to double and then +// the libc sqrt function is called. The result is rounded and then converted +// back to a uint64_t which is then used to construct the result. Finally, +// the Babylonian method for computing square roots is used. +APInt APInt::sqrt() const { + + // Determine the magnitude of the value. + unsigned magnitude = getActiveBits(); + + // Use a fast table for some small values. This also gets rid of some + // rounding errors in libc sqrt for small values. + if (magnitude <= 5) { + static const uint8_t results[32] = { + /* 0 */ 0, + /* 1- 2 */ 1, 1, + /* 3- 6 */ 2, 2, 2, 2, + /* 7-12 */ 3, 3, 3, 3, 3, 3, + /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, + /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + /* 31 */ 6 + }; + return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); + } + + // If the magnitude of the value fits in less than 52 bits (the precision of + // an IEEE double precision floating point value), then we can use the + // libc sqrt function which will probably use a hardware sqrt computation. + // This should be faster than the algorithm below. + if (magnitude < 52) { + return APInt(BitWidth, + uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL + : U.pVal[0]))))); + } + + // Okay, all the short cuts are exhausted. We must compute it. The following + // is a classical Babylonian method for computing the square root. This code + // was adapted to APInt from a wikipedia article on such computations. + // See http://www.wikipedia.org/ and go to the page named + // Calculate_an_integer_square_root. + unsigned nbits = BitWidth, i = 4; + APInt testy(BitWidth, 16); + APInt x_old(BitWidth, 1); + APInt x_new(BitWidth, 0); + APInt two(BitWidth, 2); + + // Select a good starting value using binary logarithms. + for (;; i += 2, testy = testy.shl(2)) + if (i >= nbits || this->ule(testy)) { + x_old = x_old.shl(i / 2); + break; + } + + // Use the Babylonian method to arrive at the integer square root: + for (;;) { + x_new = (this->udiv(x_old) + x_old).udiv(two); + if (x_old.ule(x_new)) + break; + x_old = x_new; + } + + // Make sure we return the closest approximation + // NOTE: The rounding calculation below is correct. It will produce an + // off-by-one discrepancy with results from pari/gp. That discrepancy has been + // determined to be a rounding issue with pari/gp as it begins to use a + // floating point representation after 192 bits. There are no discrepancies + // between this algorithm and pari/gp for bit widths < 192 bits. + APInt square(x_old * x_old); + APInt nextSquare((x_old + 1) * (x_old +1)); + if (this->ult(square)) + return x_old; + assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); + APInt midpoint((nextSquare - square).udiv(two)); + APInt offset(*this - square); + if (offset.ult(midpoint)) + return x_old; + return x_old + 1; +} + +/// Computes the multiplicative inverse of this APInt for a given modulo. The +/// iterative extended Euclidean algorithm is used to solve for this value, +/// however we simplify it to speed up calculating only the inverse, and take +/// advantage of div+rem calculations. We also use some tricks to avoid copying +/// (potentially large) APInts around. +/// WARNING: a value of '0' may be returned, +/// signifying that no multiplicative inverse exists! +APInt APInt::multiplicativeInverse(const APInt& modulo) const { + assert(ult(modulo) && "This APInt must be smaller than the modulo"); + + // Using the properties listed at the following web page (accessed 06/21/08): + // http://www.numbertheory.org/php/euclid.html + // (especially the properties numbered 3, 4 and 9) it can be proved that + // BitWidth bits suffice for all the computations in the algorithm implemented + // below. More precisely, this number of bits suffice if the multiplicative + // inverse exists, but may not suffice for the general extended Euclidean + // algorithm. + + APInt r[2] = { modulo, *this }; + APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; + APInt q(BitWidth, 0); + + unsigned i; + for (i = 0; r[i^1] != 0; i ^= 1) { + // An overview of the math without the confusing bit-flipping: + // q = r[i-2] / r[i-1] + // r[i] = r[i-2] % r[i-1] + // t[i] = t[i-2] - t[i-1] * q + udivrem(r[i], r[i^1], q, r[i]); + t[i] -= t[i^1] * q; + } + + // If this APInt and the modulo are not coprime, there is no multiplicative + // inverse, so return 0. We check this by looking at the next-to-last + // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean + // algorithm. + if (r[i] != 1) + return APInt(BitWidth, 0); + + // The next-to-last t is the multiplicative inverse. However, we are + // interested in a positive inverse. Calculate a positive one from a negative + // one if necessary. A simple addition of the modulo suffices because + // abs(t[i]) is known to be less than *this/2 (see the link above). + if (t[i].isNegative()) + t[i] += modulo; + + return std::move(t[i]); +} + +/// Calculate the magic numbers required to implement a signed integer division +/// by a constant as a sequence of multiplies, adds and shifts. Requires that +/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. +/// Warren, Jr., chapter 10. +APInt::ms APInt::magic() const { + const APInt& d = *this; + unsigned p; + APInt ad, anc, delta, q1, r1, q2, r2, t; + APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); + struct ms mag; + + ad = d.abs(); + t = signedMin + (d.lshr(d.getBitWidth() - 1)); + anc = t - 1 - t.urem(ad); // absolute value of nc + p = d.getBitWidth() - 1; // initialize p + q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) + r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) + q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) + r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) + do { + p = p + 1; + q1 = q1<<1; // update q1 = 2p/abs(nc) + r1 = r1<<1; // update r1 = rem(2p/abs(nc)) + if (r1.uge(anc)) { // must be unsigned comparison + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = q2<<1; // update q2 = 2p/abs(d) + r2 = r2<<1; // update r2 = rem(2p/abs(d)) + if (r2.uge(ad)) { // must be unsigned comparison + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1.ult(delta) || (q1 == delta && r1 == 0)); + + mag.m = q2 + 1; + if (d.isNegative()) mag.m = -mag.m; // resulting magic number + mag.s = p - d.getBitWidth(); // resulting shift + return mag; +} + +/// Calculate the magic numbers required to implement an unsigned integer +/// division by a constant as a sequence of multiplies, adds and shifts. +/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry +/// S. Warren, Jr., chapter 10. +/// LeadingZeros can be used to simplify the calculation if the upper bits +/// of the divided value are known zero. +APInt::mu APInt::magicu(unsigned LeadingZeros) const { + const APInt& d = *this; + unsigned p; + APInt nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); + APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); + APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); + + nc = allOnes - (allOnes - d).urem(d); + p = d.getBitWidth() - 1; // initialize p + q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc + r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) + q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d + r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1.uge(nc - r1)) { + q1 = q1 + q1 + 1; // update q1 + r1 = r1 + r1 - nc; // update r1 + } + else { + q1 = q1+q1; // update q1 + r1 = r1+r1; // update r1 + } + if ((r2 + 1).uge(d - r2)) { + if (q2.uge(signedMax)) magu.a = 1; + q2 = q2+q2 + 1; // update q2 + r2 = r2+r2 + 1 - d; // update r2 + } + else { + if (q2.uge(signedMin)) magu.a = 1; + q2 = q2+q2; // update q2 + r2 = r2+r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < d.getBitWidth()*2 && + (q1.ult(delta) || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - d.getBitWidth(); // resulting shift + return magu; +} + +/// Implementation of Knuth's Algorithm D (Division of nonnegative integers) +/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The +/// variables here have the same names as in the algorithm. Comments explain +/// the algorithm and any deviation from it. +static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, + unsigned m, unsigned n) { + assert(u && "Must provide dividend"); + assert(v && "Must provide divisor"); + assert(q && "Must provide quotient"); + assert(u != v && u != q && v != q && "Must use different memory"); + assert(n>1 && "n must be > 1"); + + // b denotes the base of the number system. In our case b is 2^32. + const uint64_t b = uint64_t(1) << 32; + +// The DEBUG macros here tend to be spam in the debug output if you're not +// debugging this code. Disable them unless KNUTH_DEBUG is defined. +#ifdef KNUTH_DEBUG +#define DEBUG_KNUTH(X) LLVM_DEBUG(X) +#else +#define DEBUG_KNUTH(X) do {} while(false) +#endif + + DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); + DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); + DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); + DEBUG_KNUTH(dbgs() << " by"); + DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); + DEBUG_KNUTH(dbgs() << '\n'); + // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of + // u and v by d. Note that we have taken Knuth's advice here to use a power + // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of + // 2 allows us to shift instead of multiply and it is easy to determine the + // shift amount from the leading zeros. We are basically normalizing the u + // and v so that its high bits are shifted to the top of v's range without + // overflow. Note that this can require an extra word in u so that u must + // be of length m+n+1. + unsigned shift = countLeadingZeros(v[n-1]); + uint32_t v_carry = 0; + uint32_t u_carry = 0; + if (shift) { + for (unsigned i = 0; i < m+n; ++i) { + uint32_t u_tmp = u[i] >> (32 - shift); + u[i] = (u[i] << shift) | u_carry; + u_carry = u_tmp; + } + for (unsigned i = 0; i < n; ++i) { + uint32_t v_tmp = v[i] >> (32 - shift); + v[i] = (v[i] << shift) | v_carry; + v_carry = v_tmp; + } + } + u[m+n] = u_carry; + + DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); + DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); + DEBUG_KNUTH(dbgs() << " by"); + DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); + DEBUG_KNUTH(dbgs() << '\n'); + + // D2. [Initialize j.] Set j to m. This is the loop counter over the places. + int j = m; + do { + DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); + // D3. [Calculate q'.]. + // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') + // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') + // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease + // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test + // on v[n-2] determines at high speed most of the cases in which the trial + // value qp is one too large, and it eliminates all cases where qp is two + // too large. + uint64_t dividend = Make_64(u[j+n], u[j+n-1]); + DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); + uint64_t qp = dividend / v[n-1]; + uint64_t rp = dividend % v[n-1]; + if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { + qp--; + rp += v[n-1]; + if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) + qp--; + } + DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); + + // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with + // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation + // consists of a simple multiplication by a one-place number, combined with + // a subtraction. + // The digits (u[j+n]...u[j]) should be kept positive; if the result of + // this step is actually negative, (u[j+n]...u[j]) should be left as the + // true value plus b**(n+1), namely as the b's complement of + // the true value, and a "borrow" to the left should be remembered. + int64_t borrow = 0; + for (unsigned i = 0; i < n; ++i) { + uint64_t p = uint64_t(qp) * uint64_t(v[i]); + int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); + u[j+i] = Lo_32(subres); + borrow = Hi_32(p) - Hi_32(subres); + DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] + << ", borrow = " << borrow << '\n'); + } + bool isNeg = u[j+n] < borrow; + u[j+n] -= Lo_32(borrow); + + DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); + DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); + DEBUG_KNUTH(dbgs() << '\n'); + + // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was + // negative, go to step D6; otherwise go on to step D7. + q[j] = Lo_32(qp); + if (isNeg) { + // D6. [Add back]. The probability that this step is necessary is very + // small, on the order of only 2/b. Make sure that test data accounts for + // this possibility. Decrease q[j] by 1 + q[j]--; + // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). + // A carry will occur to the left of u[j+n], and it should be ignored + // since it cancels with the borrow that occurred in D4. + bool carry = false; + for (unsigned i = 0; i < n; i++) { + uint32_t limit = std::min(u[j+i],v[i]); + u[j+i] += v[i] + carry; + carry = u[j+i] < limit || (carry && u[j+i] == limit); + } + u[j+n] += carry; + } + DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); + DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); + DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); + + // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. + } while (--j >= 0); + + DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); + DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); + DEBUG_KNUTH(dbgs() << '\n'); + + // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired + // remainder may be obtained by dividing u[...] by d. If r is non-null we + // compute the remainder (urem uses this). + if (r) { + // The value d is expressed by the "shift" value above since we avoided + // multiplication by d by using a shift left. So, all we have to do is + // shift right here. + if (shift) { + uint32_t carry = 0; + DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); + for (int i = n-1; i >= 0; i--) { + r[i] = (u[i] >> shift) | carry; + carry = u[i] << (32 - shift); + DEBUG_KNUTH(dbgs() << " " << r[i]); + } + } else { + for (int i = n-1; i >= 0; i--) { + r[i] = u[i]; + DEBUG_KNUTH(dbgs() << " " << r[i]); + } + } + DEBUG_KNUTH(dbgs() << '\n'); + } + DEBUG_KNUTH(dbgs() << '\n'); +} + +void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, + unsigned rhsWords, WordType *Quotient, WordType *Remainder) { + assert(lhsWords >= rhsWords && "Fractional result"); + + // First, compose the values into an array of 32-bit words instead of + // 64-bit words. This is a necessity of both the "short division" algorithm + // and the Knuth "classical algorithm" which requires there to be native + // operations for +, -, and * on an m bit value with an m*2 bit result. We + // can't use 64-bit operands here because we don't have native results of + // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't + // work on large-endian machines. + unsigned n = rhsWords * 2; + unsigned m = (lhsWords * 2) - n; + + // Allocate space for the temporary values we need either on the stack, if + // it will fit, or on the heap if it won't. + uint32_t SPACE[128]; + uint32_t *U = nullptr; + uint32_t *V = nullptr; + uint32_t *Q = nullptr; + uint32_t *R = nullptr; + if ((Remainder?4:3)*n+2*m+1 <= 128) { + U = &SPACE[0]; + V = &SPACE[m+n+1]; + Q = &SPACE[(m+n+1) + n]; + if (Remainder) + R = &SPACE[(m+n+1) + n + (m+n)]; + } else { + U = new uint32_t[m + n + 1]; + V = new uint32_t[n]; + Q = new uint32_t[m+n]; + if (Remainder) + R = new uint32_t[n]; + } + + // Initialize the dividend + memset(U, 0, (m+n+1)*sizeof(uint32_t)); + for (unsigned i = 0; i < lhsWords; ++i) { + uint64_t tmp = LHS[i]; + U[i * 2] = Lo_32(tmp); + U[i * 2 + 1] = Hi_32(tmp); + } + U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. + + // Initialize the divisor + memset(V, 0, (n)*sizeof(uint32_t)); + for (unsigned i = 0; i < rhsWords; ++i) { + uint64_t tmp = RHS[i]; + V[i * 2] = Lo_32(tmp); + V[i * 2 + 1] = Hi_32(tmp); + } + + // initialize the quotient and remainder + memset(Q, 0, (m+n) * sizeof(uint32_t)); + if (Remainder) + memset(R, 0, n * sizeof(uint32_t)); + + // Now, adjust m and n for the Knuth division. n is the number of words in + // the divisor. m is the number of words by which the dividend exceeds the + // divisor (i.e. m+n is the length of the dividend). These sizes must not + // contain any zero words or the Knuth algorithm fails. + for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { + n--; + m++; + } + for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) + m--; + + // If we're left with only a single word for the divisor, Knuth doesn't work + // so we implement the short division algorithm here. This is much simpler + // and faster because we are certain that we can divide a 64-bit quantity + // by a 32-bit quantity at hardware speed and short division is simply a + // series of such operations. This is just like doing short division but we + // are using base 2^32 instead of base 10. + assert(n != 0 && "Divide by zero?"); + if (n == 1) { + uint32_t divisor = V[0]; + uint32_t remainder = 0; + for (int i = m; i >= 0; i--) { + uint64_t partial_dividend = Make_64(remainder, U[i]); + if (partial_dividend == 0) { + Q[i] = 0; + remainder = 0; + } else if (partial_dividend < divisor) { + Q[i] = 0; + remainder = Lo_32(partial_dividend); + } else if (partial_dividend == divisor) { + Q[i] = 1; + remainder = 0; + } else { + Q[i] = Lo_32(partial_dividend / divisor); + remainder = Lo_32(partial_dividend - (Q[i] * divisor)); + } + } + if (R) + R[0] = remainder; + } else { + // Now we're ready to invoke the Knuth classical divide algorithm. In this + // case n > 1. + KnuthDiv(U, V, Q, R, m, n); + } + + // If the caller wants the quotient + if (Quotient) { + for (unsigned i = 0; i < lhsWords; ++i) + Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); + } + + // If the caller wants the remainder + if (Remainder) { + for (unsigned i = 0; i < rhsWords; ++i) + Remainder[i] = Make_64(R[i*2+1], R[i*2]); + } + + // Clean up the memory we allocated. + if (U != &SPACE[0]) { + delete [] U; + delete [] V; + delete [] Q; + delete [] R; + } +} + +APInt APInt::udiv(const APInt &RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + + // First, deal with the easy case + if (isSingleWord()) { + assert(RHS.U.VAL != 0 && "Divide by zero?"); + return APInt(BitWidth, U.VAL / RHS.U.VAL); + } + + // Get some facts about the LHS and RHS number of bits and words + unsigned lhsWords = getNumWords(getActiveBits()); + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = getNumWords(rhsBits); + assert(rhsWords && "Divided by zero???"); + + // Deal with some degenerate cases + if (!lhsWords) + // 0 / X ===> 0 + return APInt(BitWidth, 0); + if (rhsBits == 1) + // X / 1 ===> X + return *this; + if (lhsWords < rhsWords || this->ult(RHS)) + // X / Y ===> 0, iff X < Y + return APInt(BitWidth, 0); + if (*this == RHS) + // X / X ===> 1 + return APInt(BitWidth, 1); + if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. + // All high words are zero, just use native divide + return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Quotient(BitWidth, 0); // to hold result. + divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); + return Quotient; +} + +APInt APInt::udiv(uint64_t RHS) const { + assert(RHS != 0 && "Divide by zero?"); + + // First, deal with the easy case + if (isSingleWord()) + return APInt(BitWidth, U.VAL / RHS); + + // Get some facts about the LHS words. + unsigned lhsWords = getNumWords(getActiveBits()); + + // Deal with some degenerate cases + if (!lhsWords) + // 0 / X ===> 0 + return APInt(BitWidth, 0); + if (RHS == 1) + // X / 1 ===> X + return *this; + if (this->ult(RHS)) + // X / Y ===> 0, iff X < Y + return APInt(BitWidth, 0); + if (*this == RHS) + // X / X ===> 1 + return APInt(BitWidth, 1); + if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. + // All high words are zero, just use native divide + return APInt(BitWidth, this->U.pVal[0] / RHS); + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Quotient(BitWidth, 0); // to hold result. + divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); + return Quotient; +} + +APInt APInt::sdiv(const APInt &RHS) const { + if (isNegative()) { + if (RHS.isNegative()) + return (-(*this)).udiv(-RHS); + return -((-(*this)).udiv(RHS)); + } + if (RHS.isNegative()) + return -(this->udiv(-RHS)); + return this->udiv(RHS); +} + +APInt APInt::sdiv(int64_t RHS) const { + if (isNegative()) { + if (RHS < 0) + return (-(*this)).udiv(-RHS); + return -((-(*this)).udiv(RHS)); + } + if (RHS < 0) + return -(this->udiv(-RHS)); + return this->udiv(RHS); +} + +APInt APInt::urem(const APInt &RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + assert(RHS.U.VAL != 0 && "Remainder by zero?"); + return APInt(BitWidth, U.VAL % RHS.U.VAL); + } + + // Get some facts about the LHS + unsigned lhsWords = getNumWords(getActiveBits()); + + // Get some facts about the RHS + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = getNumWords(rhsBits); + assert(rhsWords && "Performing remainder operation by zero ???"); + + // Check the degenerate cases + if (lhsWords == 0) + // 0 % Y ===> 0 + return APInt(BitWidth, 0); + if (rhsBits == 1) + // X % 1 ===> 0 + return APInt(BitWidth, 0); + if (lhsWords < rhsWords || this->ult(RHS)) + // X % Y ===> X, iff X < Y + return *this; + if (*this == RHS) + // X % X == 0; + return APInt(BitWidth, 0); + if (lhsWords == 1) + // All high words are zero, just use native remainder + return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Remainder(BitWidth, 0); + divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); + return Remainder; +} + +uint64_t APInt::urem(uint64_t RHS) const { + assert(RHS != 0 && "Remainder by zero?"); + + if (isSingleWord()) + return U.VAL % RHS; + + // Get some facts about the LHS + unsigned lhsWords = getNumWords(getActiveBits()); + + // Check the degenerate cases + if (lhsWords == 0) + // 0 % Y ===> 0 + return 0; + if (RHS == 1) + // X % 1 ===> 0 + return 0; + if (this->ult(RHS)) + // X % Y ===> X, iff X < Y + return getZExtValue(); + if (*this == RHS) + // X % X == 0; + return 0; + if (lhsWords == 1) + // All high words are zero, just use native remainder + return U.pVal[0] % RHS; + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + uint64_t Remainder; + divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); + return Remainder; +} + +APInt APInt::srem(const APInt &RHS) const { + if (isNegative()) { + if (RHS.isNegative()) + return -((-(*this)).urem(-RHS)); + return -((-(*this)).urem(RHS)); + } + if (RHS.isNegative()) + return this->urem(-RHS); + return this->urem(RHS); +} + +int64_t APInt::srem(int64_t RHS) const { + if (isNegative()) { + if (RHS < 0) + return -((-(*this)).urem(-RHS)); + return -((-(*this)).urem(RHS)); + } + if (RHS < 0) + return this->urem(-RHS); + return this->urem(RHS); +} + +void APInt::udivrem(const APInt &LHS, const APInt &RHS, + APInt &Quotient, APInt &Remainder) { + assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); + unsigned BitWidth = LHS.BitWidth; + + // First, deal with the easy case + if (LHS.isSingleWord()) { + assert(RHS.U.VAL != 0 && "Divide by zero?"); + uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; + uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; + Quotient = APInt(BitWidth, QuotVal); + Remainder = APInt(BitWidth, RemVal); + return; + } + + // Get some size facts about the dividend and divisor + unsigned lhsWords = getNumWords(LHS.getActiveBits()); + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = getNumWords(rhsBits); + assert(rhsWords && "Performing divrem operation by zero ???"); + + // Check the degenerate cases + if (lhsWords == 0) { + Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 + Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 + return; + } + + if (rhsBits == 1) { + Quotient = LHS; // X / 1 ===> X + Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 + } + + if (lhsWords < rhsWords || LHS.ult(RHS)) { + Remainder = LHS; // X % Y ===> X, iff X < Y + Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y + return; + } + + if (LHS == RHS) { + Quotient = APInt(BitWidth, 1); // X / X ===> 1 + Remainder = APInt(BitWidth, 0); // X % X ===> 0; + return; + } + + // Make sure there is enough space to hold the results. + // NOTE: This assumes that reallocate won't affect any bits if it doesn't + // change the size. This is necessary if Quotient or Remainder is aliased + // with LHS or RHS. + Quotient.reallocate(BitWidth); + Remainder.reallocate(BitWidth); + + if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. + // There is only one word to consider so use the native versions. + uint64_t lhsValue = LHS.U.pVal[0]; + uint64_t rhsValue = RHS.U.pVal[0]; + Quotient = lhsValue / rhsValue; + Remainder = lhsValue % rhsValue; + return; + } + + // Okay, lets do it the long way + divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, + Remainder.U.pVal); + // Clear the rest of the Quotient and Remainder. + std::memset(Quotient.U.pVal + lhsWords, 0, + (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); + std::memset(Remainder.U.pVal + rhsWords, 0, + (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); +} + +void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, + uint64_t &Remainder) { + assert(RHS != 0 && "Divide by zero?"); + unsigned BitWidth = LHS.BitWidth; + + // First, deal with the easy case + if (LHS.isSingleWord()) { + uint64_t QuotVal = LHS.U.VAL / RHS; + Remainder = LHS.U.VAL % RHS; + Quotient = APInt(BitWidth, QuotVal); + return; + } + + // Get some size facts about the dividend and divisor + unsigned lhsWords = getNumWords(LHS.getActiveBits()); + + // Check the degenerate cases + if (lhsWords == 0) { + Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 + Remainder = 0; // 0 % Y ===> 0 + return; + } + + if (RHS == 1) { + Quotient = LHS; // X / 1 ===> X + Remainder = 0; // X % 1 ===> 0 + return; + } + + if (LHS.ult(RHS)) { + Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y + Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y + return; + } + + if (LHS == RHS) { + Quotient = APInt(BitWidth, 1); // X / X ===> 1 + Remainder = 0; // X % X ===> 0; + return; + } + + // Make sure there is enough space to hold the results. + // NOTE: This assumes that reallocate won't affect any bits if it doesn't + // change the size. This is necessary if Quotient is aliased with LHS. + Quotient.reallocate(BitWidth); + + if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. + // There is only one word to consider so use the native versions. + uint64_t lhsValue = LHS.U.pVal[0]; + Quotient = lhsValue / RHS; + Remainder = lhsValue % RHS; + return; + } + + // Okay, lets do it the long way + divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); + // Clear the rest of the Quotient. + std::memset(Quotient.U.pVal + lhsWords, 0, + (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); +} + +void APInt::sdivrem(const APInt &LHS, const APInt &RHS, + APInt &Quotient, APInt &Remainder) { + if (LHS.isNegative()) { + if (RHS.isNegative()) + APInt::udivrem(-LHS, -RHS, Quotient, Remainder); + else { + APInt::udivrem(-LHS, RHS, Quotient, Remainder); + Quotient.negate(); + } + Remainder.negate(); + } else if (RHS.isNegative()) { + APInt::udivrem(LHS, -RHS, Quotient, Remainder); + Quotient.negate(); + } else { + APInt::udivrem(LHS, RHS, Quotient, Remainder); + } +} + +void APInt::sdivrem(const APInt &LHS, int64_t RHS, + APInt &Quotient, int64_t &Remainder) { + uint64_t R = Remainder; + if (LHS.isNegative()) { + if (RHS < 0) + APInt::udivrem(-LHS, -RHS, Quotient, R); + else { + APInt::udivrem(-LHS, RHS, Quotient, R); + Quotient.negate(); + } + R = -R; + } else if (RHS < 0) { + APInt::udivrem(LHS, -RHS, Quotient, R); + Quotient.negate(); + } else { + APInt::udivrem(LHS, RHS, Quotient, R); + } + Remainder = R; +} + +APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this+RHS; + Overflow = isNonNegative() == RHS.isNonNegative() && + Res.isNonNegative() != isNonNegative(); + return Res; +} + +APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this+RHS; + Overflow = Res.ult(RHS); + return Res; +} + +APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this - RHS; + Overflow = isNonNegative() != RHS.isNonNegative() && + Res.isNonNegative() != isNonNegative(); + return Res; +} + +APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this-RHS; + Overflow = Res.ugt(*this); + return Res; +} + +APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { + // MININT/-1 --> overflow. + Overflow = isMinSignedValue() && RHS.isAllOnesValue(); + return sdiv(RHS); +} + +APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this * RHS; + + if (*this != 0 && RHS != 0) + Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; + else + Overflow = false; + return Res; +} + +APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { + if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { + Overflow = true; + return *this * RHS; + } + + APInt Res = lshr(1) * RHS; + Overflow = Res.isNegative(); + Res <<= 1; + if ((*this)[0]) { + Res += RHS; + if (Res.ult(RHS)) + Overflow = true; + } + return Res; +} + +APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { + Overflow = ShAmt.uge(getBitWidth()); + if (Overflow) + return APInt(BitWidth, 0); + + if (isNonNegative()) // Don't allow sign change. + Overflow = ShAmt.uge(countLeadingZeros()); + else + Overflow = ShAmt.uge(countLeadingOnes()); + + return *this << ShAmt; +} + +APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { + Overflow = ShAmt.uge(getBitWidth()); + if (Overflow) + return APInt(BitWidth, 0); + + Overflow = ShAmt.ugt(countLeadingZeros()); + + return *this << ShAmt; +} + +APInt APInt::sadd_sat(const APInt &RHS) const { + bool Overflow; + APInt Res = sadd_ov(RHS, Overflow); + if (!Overflow) + return Res; + + return isNegative() ? APInt::getSignedMinValue(BitWidth) + : APInt::getSignedMaxValue(BitWidth); +} + +APInt APInt::uadd_sat(const APInt &RHS) const { + bool Overflow; + APInt Res = uadd_ov(RHS, Overflow); + if (!Overflow) + return Res; + + return APInt::getMaxValue(BitWidth); +} + +APInt APInt::ssub_sat(const APInt &RHS) const { + bool Overflow; + APInt Res = ssub_ov(RHS, Overflow); + if (!Overflow) + return Res; + + return isNegative() ? APInt::getSignedMinValue(BitWidth) + : APInt::getSignedMaxValue(BitWidth); +} + +APInt APInt::usub_sat(const APInt &RHS) const { + bool Overflow; + APInt Res = usub_ov(RHS, Overflow); + if (!Overflow) + return Res; + + return APInt(BitWidth, 0); +} + + +void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { + // Check our assumptions here + assert(!str.empty() && "Invalid string length"); + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || + radix == 36) && + "Radix should be 2, 8, 10, 16, or 36!"); + + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + bool isNeg = *p == '-'; + if (*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String is only a sign, needs a value."); + } + assert((slen <= numbits || radix != 2) && "Insufficient bit width"); + assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); + assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); + assert((((slen-1)*64)/22 <= numbits || radix != 10) && + "Insufficient bit width"); + + // Allocate memory if needed + if (isSingleWord()) + U.VAL = 0; + else + U.pVal = getClearedMemory(getNumWords()); + + // Figure out if we can shift instead of multiply + unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); + + // Enter digit traversal loop + for (StringRef::iterator e = str.end(); p != e; ++p) { + unsigned digit = getDigit(*p, radix); + assert(digit < radix && "Invalid character in digit string"); + + // Shift or multiply the value by the radix + if (slen > 1) { + if (shift) + *this <<= shift; + else + *this *= radix; + } + + // Add in the digit we just interpreted + *this += digit; + } + // If its negative, put it in two's complement form + if (isNeg) + this->negate(); +} + +void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, + bool Signed, bool formatAsCLiteral) const { + assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || + Radix == 36) && + "Radix should be 2, 8, 10, 16, or 36!"); + + const char *Prefix = ""; + if (formatAsCLiteral) { + switch (Radix) { + case 2: + // Binary literals are a non-standard extension added in gcc 4.3: + // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html + Prefix = "0b"; + break; + case 8: + Prefix = "0"; + break; + case 10: + break; // No prefix + case 16: + Prefix = "0x"; + break; + default: + llvm_unreachable("Invalid radix!"); + } + } + + // First, check for a zero value and just short circuit the logic below. + if (*this == 0) { + while (*Prefix) { + Str.push_back(*Prefix); + ++Prefix; + }; + Str.push_back('0'); + return; + } + + static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + if (isSingleWord()) { + char Buffer[65]; + char *BufPtr = std::end(Buffer); + + uint64_t N; + if (!Signed) { + N = getZExtValue(); + } else { + int64_t I = getSExtValue(); + if (I >= 0) { + N = I; + } else { + Str.push_back('-'); + N = -(uint64_t)I; + } + } + + while (*Prefix) { + Str.push_back(*Prefix); + ++Prefix; + }; + + while (N) { + *--BufPtr = Digits[N % Radix]; + N /= Radix; + } + Str.append(BufPtr, std::end(Buffer)); + return; + } + + APInt Tmp(*this); + + if (Signed && isNegative()) { + // They want to print the signed version and it is a negative value + // Flip the bits and add one to turn it into the equivalent positive + // value and put a '-' in the result. + Tmp.negate(); + Str.push_back('-'); + } + + while (*Prefix) { + Str.push_back(*Prefix); + ++Prefix; + }; + + // We insert the digits backward, then reverse them to get the right order. + unsigned StartDig = Str.size(); + + // For the 2, 8 and 16 bit cases, we can just shift instead of divide + // because the number of bits per digit (1, 3 and 4 respectively) divides + // equally. We just shift until the value is zero. + if (Radix == 2 || Radix == 8 || Radix == 16) { + // Just shift tmp right for each digit width until it becomes zero + unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); + unsigned MaskAmt = Radix - 1; + + while (Tmp.getBoolValue()) { + unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; + Str.push_back(Digits[Digit]); + Tmp.lshrInPlace(ShiftAmt); + } + } else { + while (Tmp.getBoolValue()) { + uint64_t Digit; + udivrem(Tmp, Radix, Tmp, Digit); + assert(Digit < Radix && "divide failed"); + Str.push_back(Digits[Digit]); + } + } + + // Reverse the digits before returning. + std::reverse(Str.begin()+StartDig, Str.end()); +} + +/// Returns the APInt as a std::string. Note that this is an inefficient method. +/// It is better to pass in a SmallVector/SmallString to the methods above. +std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { + SmallString<40> S; + toString(S, Radix, Signed, /* formatAsCLiteral = */false); + return S.str(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void APInt::dump() const { + SmallString<40> S, U; + this->toStringUnsigned(U); + this->toStringSigned(S); + dbgs() << "APInt(" << BitWidth << "b, " + << U << "u " << S << "s)\n"; +} +#endif + +void APInt::print(raw_ostream &OS, bool isSigned) const { + SmallString<40> S; + this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); + OS << S; +} + +// This implements a variety of operations on a representation of +// arbitrary precision, two's-complement, bignum integer values. + +// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe +// and unrestricting assumption. +static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, + "Part width must be divisible by 2!"); + +/* Some handy functions local to this file. */ + +/* Returns the integer part with the least significant BITS set. + BITS cannot be zero. */ +static inline APInt::WordType lowBitMask(unsigned bits) { + assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); + + return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); +} + +/* Returns the value of the lower half of PART. */ +static inline APInt::WordType lowHalf(APInt::WordType part) { + return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); +} + +/* Returns the value of the upper half of PART. */ +static inline APInt::WordType highHalf(APInt::WordType part) { + return part >> (APInt::APINT_BITS_PER_WORD / 2); +} + +/* Returns the bit number of the most significant set bit of a part. + If the input number has no bits set -1U is returned. */ +static unsigned partMSB(APInt::WordType value) { + return findLastSet(value, ZB_Max); +} + +/* Returns the bit number of the least significant set bit of a + part. If the input number has no bits set -1U is returned. */ +static unsigned partLSB(APInt::WordType value) { + return findFirstSet(value, ZB_Max); +} + +/* Sets the least significant part of a bignum to the input value, and + zeroes out higher parts. */ +void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { + assert(parts > 0); + + dst[0] = part; + for (unsigned i = 1; i < parts; i++) + dst[i] = 0; +} + +/* Assign one bignum to another. */ +void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { + for (unsigned i = 0; i < parts; i++) + dst[i] = src[i]; +} + +/* Returns true if a bignum is zero, false otherwise. */ +bool APInt::tcIsZero(const WordType *src, unsigned parts) { + for (unsigned i = 0; i < parts; i++) + if (src[i]) + return false; + + return true; +} + +/* Extract the given bit of a bignum; returns 0 or 1. */ +int APInt::tcExtractBit(const WordType *parts, unsigned bit) { + return (parts[whichWord(bit)] & maskBit(bit)) != 0; +} + +/* Set the given bit of a bignum. */ +void APInt::tcSetBit(WordType *parts, unsigned bit) { + parts[whichWord(bit)] |= maskBit(bit); +} + +/* Clears the given bit of a bignum. */ +void APInt::tcClearBit(WordType *parts, unsigned bit) { + parts[whichWord(bit)] &= ~maskBit(bit); +} + +/* Returns the bit number of the least significant set bit of a + number. If the input number has no bits set -1U is returned. */ +unsigned APInt::tcLSB(const WordType *parts, unsigned n) { + for (unsigned i = 0; i < n; i++) { + if (parts[i] != 0) { + unsigned lsb = partLSB(parts[i]); + + return lsb + i * APINT_BITS_PER_WORD; + } + } + + return -1U; +} + +/* Returns the bit number of the most significant set bit of a number. + If the input number has no bits set -1U is returned. */ +unsigned APInt::tcMSB(const WordType *parts, unsigned n) { + do { + --n; + + if (parts[n] != 0) { + unsigned msb = partMSB(parts[n]); + + return msb + n * APINT_BITS_PER_WORD; + } + } while (n); + + return -1U; +} + +/* Copy the bit vector of width srcBITS from SRC, starting at bit + srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes + the least significant bit of DST. All high bits above srcBITS in + DST are zero-filled. */ +void +APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, + unsigned srcBits, unsigned srcLSB) { + unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; + assert(dstParts <= dstCount); + + unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; + tcAssign (dst, src + firstSrcPart, dstParts); + + unsigned shift = srcLSB % APINT_BITS_PER_WORD; + tcShiftRight (dst, dstParts, shift); + + /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC + in DST. If this is less that srcBits, append the rest, else + clear the high bits. */ + unsigned n = dstParts * APINT_BITS_PER_WORD - shift; + if (n < srcBits) { + WordType mask = lowBitMask (srcBits - n); + dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) + << n % APINT_BITS_PER_WORD); + } else if (n > srcBits) { + if (srcBits % APINT_BITS_PER_WORD) + dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); + } + + /* Clear high parts. */ + while (dstParts < dstCount) + dst[dstParts++] = 0; +} + +/* DST += RHS + C where C is zero or one. Returns the carry flag. */ +APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, + WordType c, unsigned parts) { + assert(c <= 1); + + for (unsigned i = 0; i < parts; i++) { + WordType l = dst[i]; + if (c) { + dst[i] += rhs[i] + 1; + c = (dst[i] <= l); + } else { + dst[i] += rhs[i]; + c = (dst[i] < l); + } + } + + return c; +} + +/// This function adds a single "word" integer, src, to the multiple +/// "word" integer array, dst[]. dst[] is modified to reflect the addition and +/// 1 is returned if there is a carry out, otherwise 0 is returned. +/// @returns the carry of the addition. +APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, + unsigned parts) { + for (unsigned i = 0; i < parts; ++i) { + dst[i] += src; + if (dst[i] >= src) + return 0; // No need to carry so exit early. + src = 1; // Carry one to next digit. + } + + return 1; +} + +/* DST -= RHS + C where C is zero or one. Returns the carry flag. */ +APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, + WordType c, unsigned parts) { + assert(c <= 1); + + for (unsigned i = 0; i < parts; i++) { + WordType l = dst[i]; + if (c) { + dst[i] -= rhs[i] + 1; + c = (dst[i] >= l); + } else { + dst[i] -= rhs[i]; + c = (dst[i] > l); + } + } + + return c; +} + +/// This function subtracts a single "word" (64-bit word), src, from +/// the multi-word integer array, dst[], propagating the borrowed 1 value until +/// no further borrowing is needed or it runs out of "words" in dst. The result +/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not +/// exhausted. In other words, if src > dst then this function returns 1, +/// otherwise 0. +/// @returns the borrow out of the subtraction +APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, + unsigned parts) { + for (unsigned i = 0; i < parts; ++i) { + WordType Dst = dst[i]; + dst[i] -= src; + if (src <= Dst) + return 0; // No need to borrow so exit early. + src = 1; // We have to "borrow 1" from next "word" + } + + return 1; +} + +/* Negate a bignum in-place. */ +void APInt::tcNegate(WordType *dst, unsigned parts) { + tcComplement(dst, parts); + tcIncrement(dst, parts); +} + +/* DST += SRC * MULTIPLIER + CARRY if add is true + DST = SRC * MULTIPLIER + CARRY if add is false + + Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC + they must start at the same point, i.e. DST == SRC. + + If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is + returned. Otherwise DST is filled with the least significant + DSTPARTS parts of the result, and if all of the omitted higher + parts were zero return zero, otherwise overflow occurred and + return one. */ +int APInt::tcMultiplyPart(WordType *dst, const WordType *src, + WordType multiplier, WordType carry, + unsigned srcParts, unsigned dstParts, + bool add) { + /* Otherwise our writes of DST kill our later reads of SRC. */ + assert(dst <= src || dst >= src + srcParts); + assert(dstParts <= srcParts + 1); + + /* N loops; minimum of dstParts and srcParts. */ + unsigned n = std::min(dstParts, srcParts); + + for (unsigned i = 0; i < n; i++) { + WordType low, mid, high, srcPart; + + /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. + + This cannot overflow, because + + (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) + + which is less than n^2. */ + + srcPart = src[i]; + + if (multiplier == 0 || srcPart == 0) { + low = carry; + high = 0; + } else { + low = lowHalf(srcPart) * lowHalf(multiplier); + high = highHalf(srcPart) * highHalf(multiplier); + + mid = lowHalf(srcPart) * highHalf(multiplier); + high += highHalf(mid); + mid <<= APINT_BITS_PER_WORD / 2; + if (low + mid < low) + high++; + low += mid; + + mid = highHalf(srcPart) * lowHalf(multiplier); + high += highHalf(mid); + mid <<= APINT_BITS_PER_WORD / 2; + if (low + mid < low) + high++; + low += mid; + + /* Now add carry. */ + if (low + carry < low) + high++; + low += carry; + } + + if (add) { + /* And now DST[i], and store the new low part there. */ + if (low + dst[i] < low) + high++; + dst[i] += low; + } else + dst[i] = low; + + carry = high; + } + + if (srcParts < dstParts) { + /* Full multiplication, there is no overflow. */ + assert(srcParts + 1 == dstParts); + dst[srcParts] = carry; + return 0; + } + + /* We overflowed if there is carry. */ + if (carry) + return 1; + + /* We would overflow if any significant unwritten parts would be + non-zero. This is true if any remaining src parts are non-zero + and the multiplier is non-zero. */ + if (multiplier) + for (unsigned i = dstParts; i < srcParts; i++) + if (src[i]) + return 1; + + /* We fitted in the narrow destination. */ + return 0; +} + +/* DST = LHS * RHS, where DST has the same width as the operands and + is filled with the least significant parts of the result. Returns + one if overflow occurred, otherwise zero. DST must be disjoint + from both operands. */ +int APInt::tcMultiply(WordType *dst, const WordType *lhs, + const WordType *rhs, unsigned parts) { + assert(dst != lhs && dst != rhs); + + int overflow = 0; + tcSet(dst, 0, parts); + + for (unsigned i = 0; i < parts; i++) + overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, + parts - i, true); + + return overflow; +} + +/// DST = LHS * RHS, where DST has width the sum of the widths of the +/// operands. No overflow occurs. DST must be disjoint from both operands. +void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, + const WordType *rhs, unsigned lhsParts, + unsigned rhsParts) { + /* Put the narrower number on the LHS for less loops below. */ + if (lhsParts > rhsParts) + return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); + + assert(dst != lhs && dst != rhs); + + tcSet(dst, 0, rhsParts); + + for (unsigned i = 0; i < lhsParts; i++) + tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); +} + +/* If RHS is zero LHS and REMAINDER are left unchanged, return one. + Otherwise set LHS to LHS / RHS with the fractional part discarded, + set REMAINDER to the remainder, return zero. i.e. + + OLD_LHS = RHS * LHS + REMAINDER + + SCRATCH is a bignum of the same size as the operands and result for + use by the routine; its contents need not be initialized and are + destroyed. LHS, REMAINDER and SCRATCH must be distinct. +*/ +int APInt::tcDivide(WordType *lhs, const WordType *rhs, + WordType *remainder, WordType *srhs, + unsigned parts) { + assert(lhs != remainder && lhs != srhs && remainder != srhs); + + unsigned shiftCount = tcMSB(rhs, parts) + 1; + if (shiftCount == 0) + return true; + + shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; + unsigned n = shiftCount / APINT_BITS_PER_WORD; + WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); + + tcAssign(srhs, rhs, parts); + tcShiftLeft(srhs, parts, shiftCount); + tcAssign(remainder, lhs, parts); + tcSet(lhs, 0, parts); + + /* Loop, subtracting SRHS if REMAINDER is greater and adding that to + the total. */ + for (;;) { + int compare = tcCompare(remainder, srhs, parts); + if (compare >= 0) { + tcSubtract(remainder, srhs, 0, parts); + lhs[n] |= mask; + } + + if (shiftCount == 0) + break; + shiftCount--; + tcShiftRight(srhs, parts, 1); + if ((mask >>= 1) == 0) { + mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); + n--; + } + } + + return false; +} + +/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are +/// no restrictions on Count. +void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { + // Don't bother performing a no-op shift. + if (!Count) + return; + + // WordShift is the inter-part shift; BitShift is the intra-part shift. + unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); + unsigned BitShift = Count % APINT_BITS_PER_WORD; + + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); + } else { + while (Words-- > WordShift) { + Dst[Words] = Dst[Words - WordShift] << BitShift; + if (Words > WordShift) + Dst[Words] |= + Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); + } + } + + // Fill in the remainder with 0s. + std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); +} + +/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There +/// are no restrictions on Count. +void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { + // Don't bother performing a no-op shift. + if (!Count) + return; + + // WordShift is the inter-part shift; BitShift is the intra-part shift. + unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); + unsigned BitShift = Count % APINT_BITS_PER_WORD; + + unsigned WordsToMove = Words - WordShift; + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + for (unsigned i = 0; i != WordsToMove; ++i) { + Dst[i] = Dst[i + WordShift] >> BitShift; + if (i + 1 != WordsToMove) + Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); + } + } + + // Fill in the remainder with 0s. + std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); +} + +/* Bitwise and of two bignums. */ +void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { + for (unsigned i = 0; i < parts; i++) + dst[i] &= rhs[i]; +} + +/* Bitwise inclusive or of two bignums. */ +void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { + for (unsigned i = 0; i < parts; i++) + dst[i] |= rhs[i]; +} + +/* Bitwise exclusive or of two bignums. */ +void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { + for (unsigned i = 0; i < parts; i++) + dst[i] ^= rhs[i]; +} + +/* Complement a bignum in-place. */ +void APInt::tcComplement(WordType *dst, unsigned parts) { + for (unsigned i = 0; i < parts; i++) + dst[i] = ~dst[i]; +} + +/* Comparison (unsigned) of two bignums. */ +int APInt::tcCompare(const WordType *lhs, const WordType *rhs, + unsigned parts) { + while (parts) { + parts--; + if (lhs[parts] != rhs[parts]) + return (lhs[parts] > rhs[parts]) ? 1 : -1; + } + + return 0; +} + +/* Set the least significant BITS bits of a bignum, clear the + rest. */ +void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, + unsigned bits) { + unsigned i = 0; + while (bits > APINT_BITS_PER_WORD) { + dst[i++] = ~(WordType) 0; + bits -= APINT_BITS_PER_WORD; + } + + if (bits) + dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); + + while (i < parts) + dst[i++] = 0; +} + +APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, + APInt::Rounding RM) { + // Currently udivrem always rounds down. + switch (RM) { + case APInt::Rounding::DOWN: + case APInt::Rounding::TOWARD_ZERO: + return A.udiv(B); + case APInt::Rounding::UP: { + APInt Quo, Rem; + APInt::udivrem(A, B, Quo, Rem); + if (Rem == 0) + return Quo; + return Quo + 1; + } + } + llvm_unreachable("Unknown APInt::Rounding enum"); +} + +APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, + APInt::Rounding RM) { + switch (RM) { + case APInt::Rounding::DOWN: + case APInt::Rounding::UP: { + APInt Quo, Rem; + APInt::sdivrem(A, B, Quo, Rem); + if (Rem == 0) + return Quo; + // This algorithm deals with arbitrary rounding mode used by sdivrem. + // We want to check whether the non-integer part of the mathematical value + // is negative or not. If the non-integer part is negative, we need to round + // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's + // already rounded down. + if (RM == APInt::Rounding::DOWN) { + if (Rem.isNegative() != B.isNegative()) + return Quo - 1; + return Quo; + } + if (Rem.isNegative() != B.isNegative()) + return Quo; + return Quo + 1; + } + // Currently sdiv rounds twards zero. + case APInt::Rounding::TOWARD_ZERO: + return A.sdiv(B); + } + llvm_unreachable("Unknown APInt::Rounding enum"); +} + +Optional<APInt> +llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, + unsigned RangeWidth) { + unsigned CoeffWidth = A.getBitWidth(); + assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); + assert(RangeWidth <= CoeffWidth && + "Value range width should be less than coefficient width"); + assert(RangeWidth > 1 && "Value range bit width should be > 1"); + + LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B + << "x + " << C << ", rw:" << RangeWidth << '\n'); + + // Identify 0 as a (non)solution immediately. + if (C.sextOrTrunc(RangeWidth).isNullValue() ) { + LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); + return APInt(CoeffWidth, 0); + } + + // The result of APInt arithmetic has the same bit width as the operands, + // so it can actually lose high bits. A product of two n-bit integers needs + // 2n-1 bits to represent the full value. + // The operation done below (on quadratic coefficients) that can produce + // the largest value is the evaluation of the equation during bisection, + // which needs 3 times the bitwidth of the coefficient, so the total number + // of required bits is 3n. + // + // The purpose of this extension is to simulate the set Z of all integers, + // where n+1 > n for all n in Z. In Z it makes sense to talk about positive + // and negative numbers (not so much in a modulo arithmetic). The method + // used to solve the equation is based on the standard formula for real + // numbers, and uses the concepts of "positive" and "negative" with their + // usual meanings. + CoeffWidth *= 3; + A = A.sext(CoeffWidth); + B = B.sext(CoeffWidth); + C = C.sext(CoeffWidth); + + // Make A > 0 for simplicity. Negate cannot overflow at this point because + // the bit width has increased. + if (A.isNegative()) { + A.negate(); + B.negate(); + C.negate(); + } + + // Solving an equation q(x) = 0 with coefficients in modular arithmetic + // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., + // and R = 2^BitWidth. + // Since we're trying not only to find exact solutions, but also values + // that "wrap around", such a set will always have a solution, i.e. an x + // that satisfies at least one of the equations, or such that |q(x)| + // exceeds kR, while |q(x-1)| for the same k does not. + // + // We need to find a value k, such that Ax^2 + Bx + C = kR will have a + // positive solution n (in the above sense), and also such that the n + // will be the least among all solutions corresponding to k = 0, 1, ... + // (more precisely, the least element in the set + // { n(k) | k is such that a solution n(k) exists }). + // + // Consider the parabola (over real numbers) that corresponds to the + // quadratic equation. Since A > 0, the arms of the parabola will point + // up. Picking different values of k will shift it up and down by R. + // + // We want to shift the parabola in such a way as to reduce the problem + // of solving q(x) = kR to solving shifted_q(x) = 0. + // (The interesting solutions are the ceilings of the real number + // solutions.) + APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); + APInt TwoA = 2 * A; + APInt SqrB = B * B; + bool PickLow; + + auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { + assert(A.isStrictlyPositive()); + APInt T = V.abs().urem(A); + if (T.isNullValue()) + return V; + return V.isNegative() ? V+T : V+(A-T); + }; + + // The vertex of the parabola is at -B/2A, but since A > 0, it's negative + // iff B is positive. + if (B.isNonNegative()) { + // If B >= 0, the vertex it at a negative location (or at 0), so in + // order to have a non-negative solution we need to pick k that makes + // C-kR negative. To satisfy all the requirements for the solution + // that we are looking for, it needs to be closest to 0 of all k. + C = C.srem(R); + if (C.isStrictlyPositive()) + C -= R; + // Pick the greater solution. + PickLow = false; + } else { + // If B < 0, the vertex is at a positive location. For any solution + // to exist, the discriminant must be non-negative. This means that + // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a + // lower bound on values of k: kR >= C - B^2/4A. + APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. + // Round LowkR up (towards +inf) to the nearest kR. + LowkR = RoundUp(LowkR, R); + + // If there exists k meeting the condition above, and such that + // C-kR > 0, there will be two positive real number solutions of + // q(x) = kR. Out of all such values of k, pick the one that makes + // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). + // In other words, find maximum k such that LowkR <= kR < C. + if (C.sgt(LowkR)) { + // If LowkR < C, then such a k is guaranteed to exist because + // LowkR itself is a multiple of R. + C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) + // Pick the smaller solution. + PickLow = true; + } else { + // If C-kR < 0 for all potential k's, it means that one solution + // will be negative, while the other will be positive. The positive + // solution will shift towards 0 if the parabola is moved up. + // Pick the kR closest to the lower bound (i.e. make C-kR closest + // to 0, or in other words, out of all parabolas that have solutions, + // pick the one that is the farthest "up"). + // Since LowkR is itself a multiple of R, simply take C-LowkR. + C -= LowkR; + // Pick the greater solution. + PickLow = false; + } + } + + LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " + << B << "x + " << C << ", rw:" << RangeWidth << '\n'); + + APInt D = SqrB - 4*A*C; + assert(D.isNonNegative() && "Negative discriminant"); + APInt SQ = D.sqrt(); + + APInt Q = SQ * SQ; + bool InexactSQ = Q != D; + // The calculated SQ may actually be greater than the exact (non-integer) + // value. If that's the case, decremement SQ to get a value that is lower. + if (Q.sgt(D)) + SQ -= 1; + + APInt X; + APInt Rem; + + // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. + // When using the quadratic formula directly, the calculated low root + // may be greater than the exact one, since we would be subtracting SQ. + // To make sure that the calculated root is not greater than the exact + // one, subtract SQ+1 when calculating the low root (for inexact value + // of SQ). + if (PickLow) + APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); + else + APInt::sdivrem(-B + SQ, TwoA, X, Rem); + + // The updated coefficients should be such that the (exact) solution is + // positive. Since APInt division rounds towards 0, the calculated one + // can be 0, but cannot be negative. + assert(X.isNonNegative() && "Solution should be non-negative"); + + if (!InexactSQ && Rem.isNullValue()) { + LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); + return X; + } + + assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); + // The exact value of the square root of D should be between SQ and SQ+1. + // This implies that the solution should be between that corresponding to + // SQ (i.e. X) and that corresponding to SQ+1. + // + // The calculated X cannot be greater than the exact (real) solution. + // Actually it must be strictly less than the exact solution, while + // X+1 will be greater than or equal to it. + + APInt VX = (A*X + B)*X + C; + APInt VY = VX + TwoA*X + A + B; + bool SignChange = VX.isNegative() != VY.isNegative() || + VX.isNullValue() != VY.isNullValue(); + // If the sign did not change between X and X+1, X is not a valid solution. + // This could happen when the actual (exact) roots don't have an integer + // between them, so they would both be contained between X and X+1. + if (!SignChange) { + LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); + return None; + } + + X += 1; + LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); + return X; +} + +/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst +/// with the integer held in IntVal. +void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, + unsigned StoreBytes) { + assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); + const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); + + if (sys::IsLittleEndianHost) { + // Little-endian host - the source is ordered from LSB to MSB. Order the + // destination from LSB to MSB: Do a straight copy. + memcpy(Dst, Src, StoreBytes); + } else { + // Big-endian host - the source is an array of 64 bit words ordered from + // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination + // from MSB to LSB: Reverse the word order, but not the bytes in a word. + while (StoreBytes > sizeof(uint64_t)) { + StoreBytes -= sizeof(uint64_t); + // May not be aligned so use memcpy. + memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); + Src += sizeof(uint64_t); + } + + memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); + } +} + +/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting +/// from Src into IntVal, which is assumed to be wide enough and to hold zero. +void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { + assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); + uint8_t *Dst = reinterpret_cast<uint8_t *>( + const_cast<uint64_t *>(IntVal.getRawData())); + + if (sys::IsLittleEndianHost) + // Little-endian host - the destination must be ordered from LSB to MSB. + // The source is ordered from LSB to MSB: Do a straight copy. + memcpy(Dst, Src, LoadBytes); + else { + // Big-endian - the destination is an array of 64 bit words ordered from + // LSW to MSW. Each word must be ordered from MSB to LSB. The source is + // ordered from MSB to LSB: Reverse the word order, but not the bytes in + // a word. + while (LoadBytes > sizeof(uint64_t)) { + LoadBytes -= sizeof(uint64_t); + // May not be aligned so use memcpy. + memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); + Dst += sizeof(uint64_t); + } + + memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); + } +} diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp new file mode 100644 index 0000000000000..7c48880f96eac --- /dev/null +++ b/llvm/lib/Support/APSInt.cpp @@ -0,0 +1,42 @@ +//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the APSInt class, which is a simple class that +// represents an arbitrary sized integer that knows its signedness. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +APSInt::APSInt(StringRef Str) { + assert(!Str.empty() && "Invalid string length"); + + // (Over-)estimate the required number of bits. + unsigned NumBits = ((Str.size() * 64) / 19) + 2; + APInt Tmp(NumBits, Str, /*radix=*/10); + if (Str[0] == '-') { + unsigned MinBits = Tmp.getMinSignedBits(); + if (MinBits > 0 && MinBits < NumBits) + Tmp = Tmp.trunc(MinBits); + *this = APSInt(Tmp, /*isUnsigned=*/false); + return; + } + unsigned ActiveBits = Tmp.getActiveBits(); + if (ActiveBits > 0 && ActiveBits < NumBits) + Tmp = Tmp.trunc(ActiveBits); + *this = APSInt(Tmp, /*isUnsigned=*/true); +} + +void APSInt::Profile(FoldingSetNodeID& ID) const { + ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0)); + APInt::Profile(ID); +} diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp new file mode 100644 index 0000000000000..df50fff720cd2 --- /dev/null +++ b/llvm/lib/Support/ARMAttributeParser.cpp @@ -0,0 +1,727 @@ +//===--- ARMAttributeParser.cpp - ARM Attribute Information Printer -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ARMAttributeParser.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/ScopedPrinter.h" + +using namespace llvm; +using namespace llvm::ARMBuildAttrs; + + +static const EnumEntry<unsigned> TagNames[] = { + { "Tag_File", ARMBuildAttrs::File }, + { "Tag_Section", ARMBuildAttrs::Section }, + { "Tag_Symbol", ARMBuildAttrs::Symbol }, +}; + +namespace llvm { +#define ATTRIBUTE_HANDLER(Attr_) \ + { ARMBuildAttrs::Attr_, &ARMAttributeParser::Attr_ } + +const ARMAttributeParser::DisplayHandler +ARMAttributeParser::DisplayRoutines[] = { + { ARMBuildAttrs::CPU_raw_name, &ARMAttributeParser::StringAttribute, }, + { ARMBuildAttrs::CPU_name, &ARMAttributeParser::StringAttribute }, + ATTRIBUTE_HANDLER(CPU_arch), + ATTRIBUTE_HANDLER(CPU_arch_profile), + ATTRIBUTE_HANDLER(ARM_ISA_use), + ATTRIBUTE_HANDLER(THUMB_ISA_use), + ATTRIBUTE_HANDLER(FP_arch), + ATTRIBUTE_HANDLER(WMMX_arch), + ATTRIBUTE_HANDLER(Advanced_SIMD_arch), + ATTRIBUTE_HANDLER(MVE_arch), + ATTRIBUTE_HANDLER(PCS_config), + ATTRIBUTE_HANDLER(ABI_PCS_R9_use), + ATTRIBUTE_HANDLER(ABI_PCS_RW_data), + ATTRIBUTE_HANDLER(ABI_PCS_RO_data), + ATTRIBUTE_HANDLER(ABI_PCS_GOT_use), + ATTRIBUTE_HANDLER(ABI_PCS_wchar_t), + ATTRIBUTE_HANDLER(ABI_FP_rounding), + ATTRIBUTE_HANDLER(ABI_FP_denormal), + ATTRIBUTE_HANDLER(ABI_FP_exceptions), + ATTRIBUTE_HANDLER(ABI_FP_user_exceptions), + ATTRIBUTE_HANDLER(ABI_FP_number_model), + ATTRIBUTE_HANDLER(ABI_align_needed), + ATTRIBUTE_HANDLER(ABI_align_preserved), + ATTRIBUTE_HANDLER(ABI_enum_size), + ATTRIBUTE_HANDLER(ABI_HardFP_use), + ATTRIBUTE_HANDLER(ABI_VFP_args), + ATTRIBUTE_HANDLER(ABI_WMMX_args), + ATTRIBUTE_HANDLER(ABI_optimization_goals), + ATTRIBUTE_HANDLER(ABI_FP_optimization_goals), + ATTRIBUTE_HANDLER(compatibility), + ATTRIBUTE_HANDLER(CPU_unaligned_access), + ATTRIBUTE_HANDLER(FP_HP_extension), + ATTRIBUTE_HANDLER(ABI_FP_16bit_format), + ATTRIBUTE_HANDLER(MPextension_use), + ATTRIBUTE_HANDLER(DIV_use), + ATTRIBUTE_HANDLER(DSP_extension), + ATTRIBUTE_HANDLER(T2EE_use), + ATTRIBUTE_HANDLER(Virtualization_use), + ATTRIBUTE_HANDLER(nodefaults) +}; + +#undef ATTRIBUTE_HANDLER + +uint64_t ARMAttributeParser::ParseInteger(const uint8_t *Data, + uint32_t &Offset) { + unsigned Length; + uint64_t Value = decodeULEB128(Data + Offset, &Length); + Offset = Offset + Length; + return Value; +} + +StringRef ARMAttributeParser::ParseString(const uint8_t *Data, + uint32_t &Offset) { + const char *String = reinterpret_cast<const char*>(Data + Offset); + size_t Length = std::strlen(String); + Offset = Offset + Length + 1; + return StringRef(String, Length); +} + +void ARMAttributeParser::IntegerAttribute(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + + uint64_t Value = ParseInteger(Data, Offset); + Attributes.insert(std::make_pair(Tag, Value)); + + if (SW) + SW->printNumber(ARMBuildAttrs::AttrTypeAsString(Tag), Value); +} + +void ARMAttributeParser::StringAttribute(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + StringRef TagName = ARMBuildAttrs::AttrTypeAsString(Tag, /*TagPrefix*/false); + StringRef ValueDesc = ParseString(Data, Offset); + + if (SW) { + DictScope AS(*SW, "Attribute"); + SW->printNumber("Tag", Tag); + if (!TagName.empty()) + SW->printString("TagName", TagName); + SW->printString("Value", ValueDesc); + } +} + +void ARMAttributeParser::PrintAttribute(unsigned Tag, unsigned Value, + StringRef ValueDesc) { + Attributes.insert(std::make_pair(Tag, Value)); + + if (SW) { + StringRef TagName = ARMBuildAttrs::AttrTypeAsString(Tag, + /*TagPrefix*/false); + DictScope AS(*SW, "Attribute"); + SW->printNumber("Tag", Tag); + SW->printNumber("Value", Value); + if (!TagName.empty()) + SW->printString("TagName", TagName); + if (!ValueDesc.empty()) + SW->printString("Description", ValueDesc); + } +} + +void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6", + "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M", + "ARM v7E-M", "ARM v8", nullptr, + "ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr, + "ARM v8.1-M Mainline" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::CPU_arch_profile(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + uint64_t Encoded = ParseInteger(Data, Offset); + + StringRef Profile; + switch (Encoded) { + default: Profile = "Unknown"; break; + case 'A': Profile = "Application"; break; + case 'R': Profile = "Real-time"; break; + case 'M': Profile = "Microcontroller"; break; + case 'S': Profile = "Classic"; break; + case 0: Profile = "None"; break; + } + + PrintAttribute(Tag, Encoded, Profile); +} + +void ARMAttributeParser::ARM_ISA_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "Permitted" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::THUMB_ISA_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "Thumb-1", "Thumb-2" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::FP_arch(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16", "VFPv4", + "VFPv4-D16", "ARMv8-a FP", "ARMv8-a FP-D16" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::WMMX_arch(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "WMMXv1", "WMMXv2" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "NEONv1", "NEONv2+FMA", "ARMv8-a NEON", "ARMv8.1-a NEON" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::MVE_arch(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "MVE integer", "MVE integer and float" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "None", "Bare Platform", "Linux Application", "Linux DSO", "Palm OS 2004", + "Reserved (Palm OS)", "Symbian OS 2004", "Reserved (Symbian OS)" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_PCS_R9_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "v6", "Static Base", "TLS", "Unused" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_PCS_RW_data(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Absolute", "PC-relative", "SB-relative", "Not Permitted" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_PCS_RO_data(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Absolute", "PC-relative", "Not Permitted" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_PCS_GOT_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "Direct", "GOT-Indirect" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_PCS_wchar_t(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "Unknown", "2-byte", "Unknown", "4-byte" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_rounding(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "IEEE-754", "Runtime" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_denormal(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Unsupported", "IEEE-754", "Sign Only" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_exceptions(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "IEEE-754" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_user_exceptions(AttrType Tag, + const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "IEEE-754" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_number_model(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "Finite Only", "RTABI", "IEEE-754" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_align_needed(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "8-byte alignment", "4-byte alignment", "Reserved" + }; + + uint64_t Value = ParseInteger(Data, Offset); + + std::string Description; + if (Value < array_lengthof(Strings)) + Description = std::string(Strings[Value]); + else if (Value <= 12) + Description = std::string("8-byte alignment, ") + utostr(1ULL << Value) + + std::string("-byte extended alignment"); + else + Description = "Invalid"; + + PrintAttribute(Tag, Value, Description); +} + +void ARMAttributeParser::ABI_align_preserved(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Required", "8-byte data alignment", "8-byte data and code alignment", + "Reserved" + }; + + uint64_t Value = ParseInteger(Data, Offset); + + std::string Description; + if (Value < array_lengthof(Strings)) + Description = std::string(Strings[Value]); + else if (Value <= 12) + Description = std::string("8-byte stack alignment, ") + + utostr(1ULL << Value) + std::string("-byte data alignment"); + else + Description = "Invalid"; + + PrintAttribute(Tag, Value, Description); +} + +void ARMAttributeParser::ABI_enum_size(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "Packed", "Int32", "External Int32" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_HardFP_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Tag_FP_arch", "Single-Precision", "Reserved", "Tag_FP_arch (deprecated)" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_VFP_args(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "AAPCS", "AAPCS VFP", "Custom", "Not Permitted" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_WMMX_args(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "AAPCS", "iWMMX", "Custom" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_optimization_goals(AttrType Tag, + const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Debugging", + "Best Debugging" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_optimization_goals(AttrType Tag, + const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Accuracy", + "Best Accuracy" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::compatibility(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + uint64_t Integer = ParseInteger(Data, Offset); + StringRef String = ParseString(Data, Offset); + + if (SW) { + DictScope AS(*SW, "Attribute"); + SW->printNumber("Tag", Tag); + SW->startLine() << "Value: " << Integer << ", " << String << '\n'; + SW->printString("TagName", AttrTypeAsString(Tag, /*TagPrefix*/false)); + switch (Integer) { + case 0: + SW->printString("Description", StringRef("No Specific Requirements")); + break; + case 1: + SW->printString("Description", StringRef("AEABI Conformant")); + break; + default: + SW->printString("Description", StringRef("AEABI Non-Conformant")); + break; + } + } +} + +void ARMAttributeParser::CPU_unaligned_access(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "v6-style" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::FP_HP_extension(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "If Available", "Permitted" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::ABI_FP_16bit_format(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "IEEE-754", "VFPv3" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::MPextension_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "Permitted" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::DIV_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "If Available", "Not Permitted", "Permitted" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::DSP_extension(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "Permitted" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::T2EE_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { "Not Permitted", "Permitted" }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::Virtualization_use(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + static const char *const Strings[] = { + "Not Permitted", "TrustZone", "Virtualization Extensions", + "TrustZone + Virtualization Extensions" + }; + + uint64_t Value = ParseInteger(Data, Offset); + StringRef ValueDesc = + (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr; + PrintAttribute(Tag, Value, ValueDesc); +} + +void ARMAttributeParser::nodefaults(AttrType Tag, const uint8_t *Data, + uint32_t &Offset) { + uint64_t Value = ParseInteger(Data, Offset); + PrintAttribute(Tag, Value, "Unspecified Tags UNDEFINED"); +} + +void ARMAttributeParser::ParseIndexList(const uint8_t *Data, uint32_t &Offset, + SmallVectorImpl<uint8_t> &IndexList) { + for (;;) { + unsigned Length; + uint64_t Value = decodeULEB128(Data + Offset, &Length); + Offset = Offset + Length; + if (Value == 0) + break; + IndexList.push_back(Value); + } +} + +void ARMAttributeParser::ParseAttributeList(const uint8_t *Data, + uint32_t &Offset, uint32_t Length) { + while (Offset < Length) { + unsigned Length; + uint64_t Tag = decodeULEB128(Data + Offset, &Length); + Offset += Length; + + bool Handled = false; + for (unsigned AHI = 0, AHE = array_lengthof(DisplayRoutines); + AHI != AHE && !Handled; ++AHI) { + if (uint64_t(DisplayRoutines[AHI].Attribute) == Tag) { + (this->*DisplayRoutines[AHI].Routine)(ARMBuildAttrs::AttrType(Tag), + Data, Offset); + Handled = true; + break; + } + } + if (!Handled) { + if (Tag < 32) { + errs() << "unhandled AEABI Tag " << Tag + << " (" << ARMBuildAttrs::AttrTypeAsString(Tag) << ")\n"; + continue; + } + + if (Tag % 2 == 0) + IntegerAttribute(ARMBuildAttrs::AttrType(Tag), Data, Offset); + else + StringAttribute(ARMBuildAttrs::AttrType(Tag), Data, Offset); + } + } +} + +void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) { + uint32_t Offset = sizeof(uint32_t); /* SectionLength */ + + const char *VendorName = reinterpret_cast<const char*>(Data + Offset); + size_t VendorNameLength = std::strlen(VendorName); + Offset = Offset + VendorNameLength + 1; + + if (SW) { + SW->printNumber("SectionLength", Length); + SW->printString("Vendor", StringRef(VendorName, VendorNameLength)); + } + + if (StringRef(VendorName, VendorNameLength).lower() != "aeabi") { + return; + } + + while (Offset < Length) { + /// Tag_File | Tag_Section | Tag_Symbol uleb128:byte-size + uint8_t Tag = Data[Offset]; + Offset = Offset + sizeof(Tag); + + uint32_t Size = + *reinterpret_cast<const support::ulittle32_t*>(Data + Offset); + Offset = Offset + sizeof(Size); + + if (SW) { + SW->printEnum("Tag", Tag, makeArrayRef(TagNames)); + SW->printNumber("Size", Size); + } + + if (Size > Length) { + errs() << "subsection length greater than section length\n"; + return; + } + + StringRef ScopeName, IndexName; + SmallVector<uint8_t, 8> Indicies; + switch (Tag) { + case ARMBuildAttrs::File: + ScopeName = "FileAttributes"; + break; + case ARMBuildAttrs::Section: + ScopeName = "SectionAttributes"; + IndexName = "Sections"; + ParseIndexList(Data, Offset, Indicies); + break; + case ARMBuildAttrs::Symbol: + ScopeName = "SymbolAttributes"; + IndexName = "Symbols"; + ParseIndexList(Data, Offset, Indicies); + break; + default: + errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n'; + return; + } + + if (SW) { + DictScope ASS(*SW, ScopeName); + if (!Indicies.empty()) + SW->printList(IndexName, Indicies); + ParseAttributeList(Data, Offset, Length); + } else { + ParseAttributeList(Data, Offset, Length); + } + } +} + +void ARMAttributeParser::Parse(ArrayRef<uint8_t> Section, bool isLittle) { + uint64_t Offset = 1; + unsigned SectionNumber = 0; + + while (Offset < Section.size()) { + uint32_t SectionLength = isLittle ? + support::endian::read32le(Section.data() + Offset) : + support::endian::read32be(Section.data() + Offset); + + if (SW) { + SW->startLine() << "Section " << ++SectionNumber << " {\n"; + SW->indent(); + } + + if (SectionLength == 0 || (SectionLength + Offset) > Section.size()) { + errs() << "invalid subsection length " << SectionLength << " at offset " + << Offset << "\n"; + return; + } + + ParseSubsection(Section.data() + Offset, SectionLength); + Offset = Offset + SectionLength; + + if (SW) { + SW->unindent(); + SW->startLine() << "}\n"; + } + } +} +} diff --git a/llvm/lib/Support/ARMBuildAttrs.cpp b/llvm/lib/Support/ARMBuildAttrs.cpp new file mode 100644 index 0000000000000..d0c4fb792cb8c --- /dev/null +++ b/llvm/lib/Support/ARMBuildAttrs.cpp @@ -0,0 +1,102 @@ +//===-- ARMBuildAttrs.cpp - ARM Build Attributes --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ARMBuildAttributes.h" + +using namespace llvm; + +namespace { +const struct { + ARMBuildAttrs::AttrType Attr; + StringRef TagName; +} ARMAttributeTags[] = { + { ARMBuildAttrs::File, "Tag_File" }, + { ARMBuildAttrs::Section, "Tag_Section" }, + { ARMBuildAttrs::Symbol, "Tag_Symbol" }, + { ARMBuildAttrs::CPU_raw_name, "Tag_CPU_raw_name" }, + { ARMBuildAttrs::CPU_name, "Tag_CPU_name" }, + { ARMBuildAttrs::CPU_arch, "Tag_CPU_arch" }, + { ARMBuildAttrs::CPU_arch_profile, "Tag_CPU_arch_profile" }, + { ARMBuildAttrs::ARM_ISA_use, "Tag_ARM_ISA_use" }, + { ARMBuildAttrs::THUMB_ISA_use, "Tag_THUMB_ISA_use" }, + { ARMBuildAttrs::FP_arch, "Tag_FP_arch" }, + { ARMBuildAttrs::WMMX_arch, "Tag_WMMX_arch" }, + { ARMBuildAttrs::Advanced_SIMD_arch, "Tag_Advanced_SIMD_arch" }, + { ARMBuildAttrs::MVE_arch, "Tag_MVE_arch" }, + { ARMBuildAttrs::PCS_config, "Tag_PCS_config" }, + { ARMBuildAttrs::ABI_PCS_R9_use, "Tag_ABI_PCS_R9_use" }, + { ARMBuildAttrs::ABI_PCS_RW_data, "Tag_ABI_PCS_RW_data" }, + { ARMBuildAttrs::ABI_PCS_RO_data, "Tag_ABI_PCS_RO_data" }, + { ARMBuildAttrs::ABI_PCS_GOT_use, "Tag_ABI_PCS_GOT_use" }, + { ARMBuildAttrs::ABI_PCS_wchar_t, "Tag_ABI_PCS_wchar_t" }, + { ARMBuildAttrs::ABI_FP_rounding, "Tag_ABI_FP_rounding" }, + { ARMBuildAttrs::ABI_FP_denormal, "Tag_ABI_FP_denormal" }, + { ARMBuildAttrs::ABI_FP_exceptions, "Tag_ABI_FP_exceptions" }, + { ARMBuildAttrs::ABI_FP_user_exceptions, "Tag_ABI_FP_user_exceptions" }, + { ARMBuildAttrs::ABI_FP_number_model, "Tag_ABI_FP_number_model" }, + { ARMBuildAttrs::ABI_align_needed, "Tag_ABI_align_needed" }, + { ARMBuildAttrs::ABI_align_preserved, "Tag_ABI_align_preserved" }, + { ARMBuildAttrs::ABI_enum_size, "Tag_ABI_enum_size" }, + { ARMBuildAttrs::ABI_HardFP_use, "Tag_ABI_HardFP_use" }, + { ARMBuildAttrs::ABI_VFP_args, "Tag_ABI_VFP_args" }, + { ARMBuildAttrs::ABI_WMMX_args, "Tag_ABI_WMMX_args" }, + { ARMBuildAttrs::ABI_optimization_goals, "Tag_ABI_optimization_goals" }, + { ARMBuildAttrs::ABI_FP_optimization_goals, "Tag_ABI_FP_optimization_goals" }, + { ARMBuildAttrs::compatibility, "Tag_compatibility" }, + { ARMBuildAttrs::CPU_unaligned_access, "Tag_CPU_unaligned_access" }, + { ARMBuildAttrs::FP_HP_extension, "Tag_FP_HP_extension" }, + { ARMBuildAttrs::ABI_FP_16bit_format, "Tag_ABI_FP_16bit_format" }, + { ARMBuildAttrs::MPextension_use, "Tag_MPextension_use" }, + { ARMBuildAttrs::DIV_use, "Tag_DIV_use" }, + { ARMBuildAttrs::DSP_extension, "Tag_DSP_extension" }, + { ARMBuildAttrs::nodefaults, "Tag_nodefaults" }, + { ARMBuildAttrs::also_compatible_with, "Tag_also_compatible_with" }, + { ARMBuildAttrs::T2EE_use, "Tag_T2EE_use" }, + { ARMBuildAttrs::conformance, "Tag_conformance" }, + { ARMBuildAttrs::Virtualization_use, "Tag_Virtualization_use" }, + + // Legacy Names + { ARMBuildAttrs::FP_arch, "Tag_VFP_arch" }, + { ARMBuildAttrs::FP_HP_extension, "Tag_VFP_HP_extension" }, + { ARMBuildAttrs::ABI_align_needed, "Tag_ABI_align8_needed" }, + { ARMBuildAttrs::ABI_align_preserved, "Tag_ABI_align8_preserved" }, +}; +} + +namespace llvm { +namespace ARMBuildAttrs { +StringRef AttrTypeAsString(unsigned Attr, bool HasTagPrefix) { + return AttrTypeAsString(static_cast<AttrType>(Attr), HasTagPrefix); +} + +StringRef AttrTypeAsString(AttrType Attr, bool HasTagPrefix) { + for (unsigned TI = 0, TE = sizeof(ARMAttributeTags) / sizeof(*ARMAttributeTags); + TI != TE; ++TI) + if (ARMAttributeTags[TI].Attr == Attr) { + auto TagName = ARMAttributeTags[TI].TagName; + return HasTagPrefix ? TagName : TagName.drop_front(4); + } + return ""; +} + +int AttrTypeFromString(StringRef Tag) { + bool HasTagPrefix = Tag.startswith("Tag_"); + for (unsigned TI = 0, + TE = sizeof(ARMAttributeTags) / sizeof(*ARMAttributeTags); + TI != TE; ++TI) { + auto TagName = ARMAttributeTags[TI].TagName; + if (TagName.drop_front(HasTagPrefix ? 0 : 4) == Tag) { + return ARMAttributeTags[TI].Attr; + } + } + return -1; +} +} +} + diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp new file mode 100644 index 0000000000000..ce5daa7fe58c0 --- /dev/null +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -0,0 +1,626 @@ +//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise ARM hardware features +// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ARMTargetParser.h" +#include "llvm/ADT/StringSwitch.h" +#include <cctype> + +using namespace llvm; + +static StringRef getHWDivSynonym(StringRef HWDiv) { + return StringSwitch<StringRef>(HWDiv) + .Case("thumb,arm", "arm,thumb") + .Default(HWDiv); +} + +// Allows partial match, ex. "v7a" matches "armv7a". +ARM::ArchKind ARM::parseArch(StringRef Arch) { + Arch = getCanonicalArchName(Arch); + StringRef Syn = getArchSynonym(Arch); + for (const auto A : ARCHNames) { + if (A.getName().endswith(Syn)) + return A.ID; + } + return ArchKind::INVALID; +} + +// Version number (ex. v7 = 7). +unsigned ARM::parseArchVersion(StringRef Arch) { + Arch = getCanonicalArchName(Arch); + switch (parseArch(Arch)) { + case ArchKind::ARMV2: + case ArchKind::ARMV2A: + return 2; + case ArchKind::ARMV3: + case ArchKind::ARMV3M: + return 3; + case ArchKind::ARMV4: + case ArchKind::ARMV4T: + return 4; + case ArchKind::ARMV5T: + case ArchKind::ARMV5TE: + case ArchKind::IWMMXT: + case ArchKind::IWMMXT2: + case ArchKind::XSCALE: + case ArchKind::ARMV5TEJ: + return 5; + case ArchKind::ARMV6: + case ArchKind::ARMV6K: + case ArchKind::ARMV6T2: + case ArchKind::ARMV6KZ: + case ArchKind::ARMV6M: + return 6; + case ArchKind::ARMV7A: + case ArchKind::ARMV7VE: + case ArchKind::ARMV7R: + case ArchKind::ARMV7M: + case ArchKind::ARMV7S: + case ArchKind::ARMV7EM: + case ArchKind::ARMV7K: + return 7; + case ArchKind::ARMV8A: + case ArchKind::ARMV8_1A: + case ArchKind::ARMV8_2A: + case ArchKind::ARMV8_3A: + case ArchKind::ARMV8_4A: + case ArchKind::ARMV8_5A: + case ArchKind::ARMV8R: + case ArchKind::ARMV8MBaseline: + case ArchKind::ARMV8MMainline: + case ArchKind::ARMV8_1MMainline: + return 8; + case ArchKind::INVALID: + return 0; + } + llvm_unreachable("Unhandled architecture"); +} + +// Profile A/R/M +ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) { + Arch = getCanonicalArchName(Arch); + switch (parseArch(Arch)) { + case ArchKind::ARMV6M: + case ArchKind::ARMV7M: + case ArchKind::ARMV7EM: + case ArchKind::ARMV8MMainline: + case ArchKind::ARMV8MBaseline: + case ArchKind::ARMV8_1MMainline: + return ProfileKind::M; + case ArchKind::ARMV7R: + case ArchKind::ARMV8R: + return ProfileKind::R; + case ArchKind::ARMV7A: + case ArchKind::ARMV7VE: + case ArchKind::ARMV7K: + case ArchKind::ARMV8A: + case ArchKind::ARMV8_1A: + case ArchKind::ARMV8_2A: + case ArchKind::ARMV8_3A: + case ArchKind::ARMV8_4A: + case ArchKind::ARMV8_5A: + return ProfileKind::A; + case ArchKind::ARMV2: + case ArchKind::ARMV2A: + case ArchKind::ARMV3: + case ArchKind::ARMV3M: + case ArchKind::ARMV4: + case ArchKind::ARMV4T: + case ArchKind::ARMV5T: + case ArchKind::ARMV5TE: + case ArchKind::ARMV5TEJ: + case ArchKind::ARMV6: + case ArchKind::ARMV6K: + case ArchKind::ARMV6T2: + case ArchKind::ARMV6KZ: + case ArchKind::ARMV7S: + case ArchKind::IWMMXT: + case ArchKind::IWMMXT2: + case ArchKind::XSCALE: + case ArchKind::INVALID: + return ProfileKind::INVALID; + } + llvm_unreachable("Unhandled architecture"); +} + +StringRef ARM::getArchSynonym(StringRef Arch) { + return StringSwitch<StringRef>(Arch) + .Case("v5", "v5t") + .Case("v5e", "v5te") + .Case("v6j", "v6") + .Case("v6hl", "v6k") + .Cases("v6m", "v6sm", "v6s-m", "v6-m") + .Cases("v6z", "v6zk", "v6kz") + .Cases("v7", "v7a", "v7hl", "v7l", "v7-a") + .Case("v7r", "v7-r") + .Case("v7m", "v7-m") + .Case("v7em", "v7e-m") + .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a") + .Case("v8.1a", "v8.1-a") + .Case("v8.2a", "v8.2-a") + .Case("v8.3a", "v8.3-a") + .Case("v8.4a", "v8.4-a") + .Case("v8.5a", "v8.5-a") + .Case("v8r", "v8-r") + .Case("v8m.base", "v8-m.base") + .Case("v8m.main", "v8-m.main") + .Case("v8.1m.main", "v8.1-m.main") + .Default(Arch); +} + +bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) { + + if (FPUKind >= FK_LAST || FPUKind == FK_INVALID) + return false; + + static const struct FPUFeatureNameInfo { + const char *PlusName, *MinusName; + FPUVersion MinVersion; + FPURestriction MaxRestriction; + } FPUFeatureInfoList[] = { + // We have to specify the + and - versions of the name in full so + // that we can return them as static StringRefs. + // + // Also, the SubtargetFeatures ending in just "sp" are listed here + // under FPURestriction::None, which is the only FPURestriction in + // which they would be valid (since FPURestriction::SP doesn't + // exist). + + {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16}, + {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16}, + {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, + {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None}, + {"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16}, + {"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16}, + {"+vfp3sp", "-vfp3sp", FPUVersion::VFPV3, FPURestriction::None}, + {"+fp16", "-fp16", FPUVersion::VFPV3_FP16, FPURestriction::SP_D16}, + {"+vfp4", "-vfp4", FPUVersion::VFPV4, FPURestriction::None}, + {"+vfp4d16", "-vfp4d16", FPUVersion::VFPV4, FPURestriction::D16}, + {"+vfp4d16sp", "-vfp4d16sp", FPUVersion::VFPV4, FPURestriction::SP_D16}, + {"+vfp4sp", "-vfp4sp", FPUVersion::VFPV4, FPURestriction::None}, + {"+fp-armv8", "-fp-armv8", FPUVersion::VFPV5, FPURestriction::None}, + {"+fp-armv8d16", "-fp-armv8d16", FPUVersion::VFPV5, FPURestriction::D16}, + {"+fp-armv8d16sp", "-fp-armv8d16sp", FPUVersion::VFPV5, FPURestriction::SP_D16}, + {"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None}, + {"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16}, + {"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16}, + {"+d32", "-d32", FPUVersion::VFPV3, FPURestriction::None}, + }; + + for (const auto &Info: FPUFeatureInfoList) { + if (FPUNames[FPUKind].FPUVer >= Info.MinVersion && + FPUNames[FPUKind].Restriction <= Info.MaxRestriction) + Features.push_back(Info.PlusName); + else + Features.push_back(Info.MinusName); + } + + static const struct NeonFeatureNameInfo { + const char *PlusName, *MinusName; + NeonSupportLevel MinSupportLevel; + } NeonFeatureInfoList[] = { + {"+neon", "-neon", NeonSupportLevel::Neon}, + {"+crypto", "-crypto", NeonSupportLevel::Crypto}, + }; + + for (const auto &Info: NeonFeatureInfoList) { + if (FPUNames[FPUKind].NeonSupport >= Info.MinSupportLevel) + Features.push_back(Info.PlusName); + else + Features.push_back(Info.MinusName); + } + + return true; +} + +// Little/Big endian +ARM::EndianKind ARM::parseArchEndian(StringRef Arch) { + if (Arch.startswith("armeb") || Arch.startswith("thumbeb") || + Arch.startswith("aarch64_be")) + return EndianKind::BIG; + + if (Arch.startswith("arm") || Arch.startswith("thumb")) { + if (Arch.endswith("eb")) + return EndianKind::BIG; + else + return EndianKind::LITTLE; + } + + if (Arch.startswith("aarch64") || Arch.startswith("aarch64_32")) + return EndianKind::LITTLE; + + return EndianKind::INVALID; +} + +// ARM, Thumb, AArch64 +ARM::ISAKind ARM::parseArchISA(StringRef Arch) { + return StringSwitch<ISAKind>(Arch) + .StartsWith("aarch64", ISAKind::AARCH64) + .StartsWith("arm64", ISAKind::AARCH64) + .StartsWith("thumb", ISAKind::THUMB) + .StartsWith("arm", ISAKind::ARM) + .Default(ISAKind::INVALID); +} + +unsigned ARM::parseFPU(StringRef FPU) { + StringRef Syn = getFPUSynonym(FPU); + for (const auto F : FPUNames) { + if (Syn == F.getName()) + return F.ID; + } + return FK_INVALID; +} + +ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(unsigned FPUKind) { + if (FPUKind >= FK_LAST) + return NeonSupportLevel::None; + return FPUNames[FPUKind].NeonSupport; +} + +// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but +// (iwmmxt|xscale)(eb)? is also permitted. If the former, return +// "v.+", if the latter, return unmodified string, minus 'eb'. +// If invalid, return empty string. +StringRef ARM::getCanonicalArchName(StringRef Arch) { + size_t offset = StringRef::npos; + StringRef A = Arch; + StringRef Error = ""; + + // Begins with "arm" / "thumb", move past it. + if (A.startswith("arm64_32")) + offset = 8; + else if (A.startswith("arm64")) + offset = 5; + else if (A.startswith("aarch64_32")) + offset = 10; + else if (A.startswith("arm")) + offset = 3; + else if (A.startswith("thumb")) + offset = 5; + else if (A.startswith("aarch64")) { + offset = 7; + // AArch64 uses "_be", not "eb" suffix. + if (A.find("eb") != StringRef::npos) + return Error; + if (A.substr(offset, 3) == "_be") + offset += 3; + } + + // Ex. "armebv7", move past the "eb". + if (offset != StringRef::npos && A.substr(offset, 2) == "eb") + offset += 2; + // Or, if it ends with eb ("armv7eb"), chop it off. + else if (A.endswith("eb")) + A = A.substr(0, A.size() - 2); + // Trim the head + if (offset != StringRef::npos) + A = A.substr(offset); + + // Empty string means offset reached the end, which means it's valid. + if (A.empty()) + return Arch; + + // Only match non-marketing names + if (offset != StringRef::npos) { + // Must start with 'vN'. + if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1]))) + return Error; + // Can't have an extra 'eb'. + if (A.find("eb") != StringRef::npos) + return Error; + } + + // Arch will either be a 'v' name (v7a) or a marketing name (xscale). + return A; +} + +StringRef ARM::getFPUSynonym(StringRef FPU) { + return StringSwitch<StringRef>(FPU) + .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported + .Case("vfp2", "vfpv2") + .Case("vfp3", "vfpv3") + .Case("vfp4", "vfpv4") + .Case("vfp3-d16", "vfpv3-d16") + .Case("vfp4-d16", "vfpv4-d16") + .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16") + .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16") + .Case("fp5-sp-d16", "fpv5-sp-d16") + .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16") + // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3. + .Case("neon-vfpv3", "neon") + .Default(FPU); +} + +StringRef ARM::getFPUName(unsigned FPUKind) { + if (FPUKind >= FK_LAST) + return StringRef(); + return FPUNames[FPUKind].getName(); +} + +ARM::FPUVersion ARM::getFPUVersion(unsigned FPUKind) { + if (FPUKind >= FK_LAST) + return FPUVersion::NONE; + return FPUNames[FPUKind].FPUVer; +} + +ARM::FPURestriction ARM::getFPURestriction(unsigned FPUKind) { + if (FPUKind >= FK_LAST) + return FPURestriction::None; + return FPUNames[FPUKind].Restriction; +} + +unsigned ARM::getDefaultFPU(StringRef CPU, ARM::ArchKind AK) { + if (CPU == "generic") + return ARM::ARCHNames[static_cast<unsigned>(AK)].DefaultFPU; + + return StringSwitch<unsigned>(CPU) +#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, DEFAULT_FPU) +#include "llvm/Support/ARMTargetParser.def" + .Default(ARM::FK_INVALID); +} + +unsigned ARM::getDefaultExtensions(StringRef CPU, ARM::ArchKind AK) { + if (CPU == "generic") + return ARM::ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions; + + return StringSwitch<unsigned>(CPU) +#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, \ + ARCHNames[static_cast<unsigned>(ArchKind::ID)].ArchBaseExtensions | \ + DEFAULT_EXT) +#include "llvm/Support/ARMTargetParser.def" + .Default(ARM::AEK_INVALID); +} + +bool ARM::getHWDivFeatures(unsigned HWDivKind, + std::vector<StringRef> &Features) { + + if (HWDivKind == AEK_INVALID) + return false; + + if (HWDivKind & AEK_HWDIVARM) + Features.push_back("+hwdiv-arm"); + else + Features.push_back("-hwdiv-arm"); + + if (HWDivKind & AEK_HWDIVTHUMB) + Features.push_back("+hwdiv"); + else + Features.push_back("-hwdiv"); + + return true; +} + +bool ARM::getExtensionFeatures(unsigned Extensions, + std::vector<StringRef> &Features) { + + if (Extensions == AEK_INVALID) + return false; + + for (const auto AE : ARCHExtNames) { + if ((Extensions & AE.ID) == AE.ID && AE.Feature) + Features.push_back(AE.Feature); + else if (AE.NegFeature) + Features.push_back(AE.NegFeature); + } + + return getHWDivFeatures(Extensions, Features); +} + +StringRef ARM::getArchName(ARM::ArchKind AK) { + return ARCHNames[static_cast<unsigned>(AK)].getName(); +} + +StringRef ARM::getCPUAttr(ARM::ArchKind AK) { + return ARCHNames[static_cast<unsigned>(AK)].getCPUAttr(); +} + +StringRef ARM::getSubArch(ARM::ArchKind AK) { + return ARCHNames[static_cast<unsigned>(AK)].getSubArch(); +} + +unsigned ARM::getArchAttr(ARM::ArchKind AK) { + return ARCHNames[static_cast<unsigned>(AK)].ArchAttr; +} + +StringRef ARM::getArchExtName(unsigned ArchExtKind) { + for (const auto AE : ARCHExtNames) { + if (ArchExtKind == AE.ID) + return AE.getName(); + } + return StringRef(); +} + +static bool stripNegationPrefix(StringRef &Name) { + if (Name.startswith("no")) { + Name = Name.substr(2); + return true; + } + return false; +} + +StringRef ARM::getArchExtFeature(StringRef ArchExt) { + bool Negated = stripNegationPrefix(ArchExt); + for (const auto AE : ARCHExtNames) { + if (AE.Feature && ArchExt == AE.getName()) + return StringRef(Negated ? AE.NegFeature : AE.Feature); + } + + return StringRef(); +} + +static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) { + const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind]; + + // If the input FPU already supports double-precision, then there + // isn't any different FPU we can return here. + // + // The current available FPURestriction values are None (no + // restriction), D16 (only 16 d-regs) and SP_D16 (16 d-regs + // and single precision only); there's no value representing + // SP restriction without D16. So this test just means 'is it + // SP only?'. + if (InputFPU.Restriction != ARM::FPURestriction::SP_D16) + return ARM::FK_INVALID; + + // Otherwise, look for an FPU entry with all the same fields, except + // that SP_D16 has been replaced with just D16, representing adding + // double precision and not changing anything else. + for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) { + if (CandidateFPU.FPUVer == InputFPU.FPUVer && + CandidateFPU.NeonSupport == InputFPU.NeonSupport && + CandidateFPU.Restriction == ARM::FPURestriction::D16) { + return CandidateFPU.ID; + } + } + + // nothing found + return ARM::FK_INVALID; +} + +static unsigned getAEKID(StringRef ArchExtName) { + for (const auto AE : ARM::ARCHExtNames) + if (AE.getName() == ArchExtName) + return AE.ID; + return ARM::AEK_INVALID; +} + +bool ARM::appendArchExtFeatures( + StringRef CPU, ARM::ArchKind AK, StringRef ArchExt, + std::vector<StringRef> &Features) { + + size_t StartingNumFeatures = Features.size(); + const bool Negated = stripNegationPrefix(ArchExt); + unsigned ID = getAEKID(ArchExt); + + if (ID == AEK_INVALID) + return false; + + for (const auto AE : ARCHExtNames) { + if (Negated && (AE.ID & ID) == ID && AE.NegFeature) + Features.push_back(AE.NegFeature); + else if (AE.ID == ID && AE.Feature) + Features.push_back(AE.Feature); + } + + if (CPU == "") + CPU = "generic"; + + if (ArchExt == "fp" || ArchExt == "fp.dp") { + unsigned FPUKind; + if (ArchExt == "fp.dp") { + if (Negated) { + Features.push_back("-fp64"); + return true; + } + FPUKind = findDoublePrecisionFPU(getDefaultFPU(CPU, AK)); + } else if (Negated) { + FPUKind = ARM::FK_NONE; + } else { + FPUKind = getDefaultFPU(CPU, AK); + } + return ARM::getFPUFeatures(FPUKind, Features); + } + return StartingNumFeatures != Features.size(); +} + +StringRef ARM::getHWDivName(unsigned HWDivKind) { + for (const auto D : HWDivNames) { + if (HWDivKind == D.ID) + return D.getName(); + } + return StringRef(); +} + +StringRef ARM::getDefaultCPU(StringRef Arch) { + ArchKind AK = parseArch(Arch); + if (AK == ArchKind::INVALID) + return StringRef(); + + // Look for multiple AKs to find the default for pair AK+Name. + for (const auto CPU : CPUNames) { + if (CPU.ArchID == AK && CPU.Default) + return CPU.getName(); + } + + // If we can't find a default then target the architecture instead + return "generic"; +} + +unsigned ARM::parseHWDiv(StringRef HWDiv) { + StringRef Syn = getHWDivSynonym(HWDiv); + for (const auto D : HWDivNames) { + if (Syn == D.getName()) + return D.ID; + } + return AEK_INVALID; +} + +unsigned ARM::parseArchExt(StringRef ArchExt) { + for (const auto A : ARCHExtNames) { + if (ArchExt == A.getName()) + return A.ID; + } + return AEK_INVALID; +} + +ARM::ArchKind ARM::parseCPUArch(StringRef CPU) { + for (const auto C : CPUNames) { + if (CPU == C.getName()) + return C.ArchID; + } + return ArchKind::INVALID; +} + +void ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) { + for (const CpuNames<ArchKind> &Arch : CPUNames) { + if (Arch.ArchID != ArchKind::INVALID) + Values.push_back(Arch.getName()); + } +} + +StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) { + StringRef ArchName = + CPU.empty() ? TT.getArchName() : getArchName(parseCPUArch(CPU)); + + if (TT.isOSBinFormatMachO()) { + if (TT.getEnvironment() == Triple::EABI || + TT.getOS() == Triple::UnknownOS || + parseArchProfile(ArchName) == ProfileKind::M) + return "aapcs"; + if (TT.isWatchABI()) + return "aapcs16"; + return "apcs-gnu"; + } else if (TT.isOSWindows()) + // FIXME: this is invalid for WindowsCE. + return "aapcs"; + + // Select the default based on the platform. + switch (TT.getEnvironment()) { + case Triple::Android: + case Triple::GNUEABI: + case Triple::GNUEABIHF: + case Triple::MuslEABI: + case Triple::MuslEABIHF: + return "aapcs-linux"; + case Triple::EABIHF: + case Triple::EABI: + return "aapcs"; + default: + if (TT.isOSNetBSD()) + return "apcs-gnu"; + if (TT.isOSOpenBSD()) + return "aapcs-linux"; + return "aapcs"; + } +} diff --git a/llvm/lib/Support/ARMWinEH.cpp b/llvm/lib/Support/ARMWinEH.cpp new file mode 100644 index 0000000000000..831f95cd4b0b1 --- /dev/null +++ b/llvm/lib/Support/ARMWinEH.cpp @@ -0,0 +1,37 @@ +//===-- ARMWinEH.cpp - Windows on ARM EH Support Functions ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ARMWinEH.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace ARM { +namespace WinEH { +std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF) { + uint8_t NumRegisters = RF.Reg(); + uint8_t RegistersVFP = RF.R(); + uint8_t LinkRegister = RF.L(); + uint8_t ChainedFrame = RF.C(); + + uint16_t GPRMask = (ChainedFrame << 11) | (LinkRegister << 14); + uint32_t VFPMask = 0; + + if (RegistersVFP) + VFPMask |= (((1 << ((NumRegisters + 1) % 8)) - 1) << 8); + else + GPRMask |= (((1 << (NumRegisters + 1)) - 1) << 4); + + if (PrologueFolding(RF)) + GPRMask |= (((1 << (NumRegisters + 1)) - 1) << (~RF.StackAdjust() & 0x3)); + + return std::make_pair(GPRMask, VFPMask); +} +} +} +} + diff --git a/llvm/lib/Support/Allocator.cpp b/llvm/lib/Support/Allocator.cpp new file mode 100644 index 0000000000000..718d3fc0d8e1c --- /dev/null +++ b/llvm/lib/Support/Allocator.cpp @@ -0,0 +1,39 @@ +//===--- Allocator.cpp - Simple memory allocation abstraction -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the BumpPtrAllocator interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +namespace detail { + +void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated, + size_t TotalMemory) { + errs() << "\nNumber of memory regions: " << NumSlabs << '\n' + << "Bytes used: " << BytesAllocated << '\n' + << "Bytes allocated: " << TotalMemory << '\n' + << "Bytes wasted: " << (TotalMemory - BytesAllocated) + << " (includes alignment, etc)\n"; +} + +} // End namespace detail. + +void PrintRecyclerStats(size_t Size, + size_t Align, + size_t FreeListSize) { + errs() << "Recycler element size: " << Size << '\n' + << "Recycler element alignment: " << Align << '\n' + << "Number of elements free for recycling: " << FreeListSize << '\n'; +} + +} diff --git a/llvm/lib/Support/Atomic.cpp b/llvm/lib/Support/Atomic.cpp new file mode 100644 index 0000000000000..f6865405c2b85 --- /dev/null +++ b/llvm/lib/Support/Atomic.cpp @@ -0,0 +1,59 @@ +//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Atomic.h" +#include "llvm/Config/llvm-config.h" + +using namespace llvm; + +#if defined(_MSC_VER) +#include <intrin.h> + +// We must include windows.h after intrin.h. +#include <windows.h> +#undef MemoryFence +#endif + +#if defined(__GNUC__) || (defined(__IBMCPP__) && __IBMCPP__ >= 1210) +#define GNU_ATOMICS +#endif + +void sys::MemoryFence() { +#if LLVM_HAS_ATOMICS == 0 + return; +#else +# if defined(GNU_ATOMICS) + __sync_synchronize(); +# elif defined(_MSC_VER) + MemoryBarrier(); +# else +# error No memory fence implementation for your platform! +# endif +#endif +} + +sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr, + sys::cas_flag new_value, + sys::cas_flag old_value) { +#if LLVM_HAS_ATOMICS == 0 + sys::cas_flag result = *ptr; + if (result == old_value) + *ptr = new_value; + return result; +#elif defined(GNU_ATOMICS) + return __sync_val_compare_and_swap(ptr, old_value, new_value); +#elif defined(_MSC_VER) + return InterlockedCompareExchange(ptr, new_value, old_value); +#else +# error No compare-and-swap implementation for your platform! +#endif +} diff --git a/llvm/lib/Support/BinaryStreamError.cpp b/llvm/lib/Support/BinaryStreamError.cpp new file mode 100644 index 0000000000000..f22523f09ac80 --- /dev/null +++ b/llvm/lib/Support/BinaryStreamError.cpp @@ -0,0 +1,55 @@ +//===- BinaryStreamError.cpp - Error extensions for streams -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BinaryStreamError.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +char BinaryStreamError::ID = 0; + +BinaryStreamError::BinaryStreamError(stream_error_code C) + : BinaryStreamError(C, "") {} + +BinaryStreamError::BinaryStreamError(StringRef Context) + : BinaryStreamError(stream_error_code::unspecified, Context) {} + +BinaryStreamError::BinaryStreamError(stream_error_code C, StringRef Context) + : Code(C) { + ErrMsg = "Stream Error: "; + switch (C) { + case stream_error_code::unspecified: + ErrMsg += "An unspecified error has occurred."; + break; + case stream_error_code::stream_too_short: + ErrMsg += "The stream is too short to perform the requested operation."; + break; + case stream_error_code::invalid_array_size: + ErrMsg += "The buffer size is not a multiple of the array element size."; + break; + case stream_error_code::invalid_offset: + ErrMsg += "The specified offset is invalid for the current stream."; + break; + case stream_error_code::filesystem_error: + ErrMsg += "An I/O error occurred on the file system."; + break; + } + + if (!Context.empty()) { + ErrMsg += " "; + ErrMsg += Context; + } +} + +void BinaryStreamError::log(raw_ostream &OS) const { OS << ErrMsg; } + +StringRef BinaryStreamError::getErrorMessage() const { return ErrMsg; } + +std::error_code BinaryStreamError::convertToErrorCode() const { + return inconvertibleErrorCode(); +} diff --git a/llvm/lib/Support/BinaryStreamReader.cpp b/llvm/lib/Support/BinaryStreamReader.cpp new file mode 100644 index 0000000000000..b17786593bded --- /dev/null +++ b/llvm/lib/Support/BinaryStreamReader.cpp @@ -0,0 +1,179 @@ +//===- BinaryStreamReader.cpp - Reads objects from a binary stream --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BinaryStreamReader.h" + +#include "llvm/Support/BinaryStreamError.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; +using endianness = llvm::support::endianness; + +BinaryStreamReader::BinaryStreamReader(BinaryStreamRef Ref) : Stream(Ref) {} + +BinaryStreamReader::BinaryStreamReader(BinaryStream &Stream) : Stream(Stream) {} + +BinaryStreamReader::BinaryStreamReader(ArrayRef<uint8_t> Data, + endianness Endian) + : Stream(Data, Endian) {} + +BinaryStreamReader::BinaryStreamReader(StringRef Data, endianness Endian) + : Stream(Data, Endian) {} + +Error BinaryStreamReader::readLongestContiguousChunk( + ArrayRef<uint8_t> &Buffer) { + if (auto EC = Stream.readLongestContiguousChunk(Offset, Buffer)) + return EC; + Offset += Buffer.size(); + return Error::success(); +} + +Error BinaryStreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) { + if (auto EC = Stream.readBytes(Offset, Size, Buffer)) + return EC; + Offset += Size; + return Error::success(); +} + +Error BinaryStreamReader::readULEB128(uint64_t &Dest) { + SmallVector<uint8_t, 10> EncodedBytes; + ArrayRef<uint8_t> NextByte; + + // Copy the encoded ULEB into the buffer. + do { + if (auto Err = readBytes(NextByte, 1)) + return Err; + EncodedBytes.push_back(NextByte[0]); + } while (NextByte[0] & 0x80); + + Dest = decodeULEB128(EncodedBytes.begin(), nullptr, EncodedBytes.end()); + return Error::success(); +} + +Error BinaryStreamReader::readSLEB128(int64_t &Dest) { + SmallVector<uint8_t, 10> EncodedBytes; + ArrayRef<uint8_t> NextByte; + + // Copy the encoded ULEB into the buffer. + do { + if (auto Err = readBytes(NextByte, 1)) + return Err; + EncodedBytes.push_back(NextByte[0]); + } while (NextByte[0] & 0x80); + + Dest = decodeSLEB128(EncodedBytes.begin(), nullptr, EncodedBytes.end()); + return Error::success(); +} + +Error BinaryStreamReader::readCString(StringRef &Dest) { + uint32_t OriginalOffset = getOffset(); + uint32_t FoundOffset = 0; + while (true) { + uint32_t ThisOffset = getOffset(); + ArrayRef<uint8_t> Buffer; + if (auto EC = readLongestContiguousChunk(Buffer)) + return EC; + StringRef S(reinterpret_cast<const char *>(Buffer.begin()), Buffer.size()); + size_t Pos = S.find_first_of('\0'); + if (LLVM_LIKELY(Pos != StringRef::npos)) { + FoundOffset = Pos + ThisOffset; + break; + } + } + assert(FoundOffset >= OriginalOffset); + + setOffset(OriginalOffset); + size_t Length = FoundOffset - OriginalOffset; + + if (auto EC = readFixedString(Dest, Length)) + return EC; + + // Now set the offset back to after the null terminator. + setOffset(FoundOffset + 1); + return Error::success(); +} + +Error BinaryStreamReader::readWideString(ArrayRef<UTF16> &Dest) { + uint32_t Length = 0; + uint32_t OriginalOffset = getOffset(); + const UTF16 *C; + while (true) { + if (auto EC = readObject(C)) + return EC; + if (*C == 0x0000) + break; + ++Length; + } + uint32_t NewOffset = getOffset(); + setOffset(OriginalOffset); + + if (auto EC = readArray(Dest, Length)) + return EC; + setOffset(NewOffset); + return Error::success(); +} + +Error BinaryStreamReader::readFixedString(StringRef &Dest, uint32_t Length) { + ArrayRef<uint8_t> Bytes; + if (auto EC = readBytes(Bytes, Length)) + return EC; + Dest = StringRef(reinterpret_cast<const char *>(Bytes.begin()), Bytes.size()); + return Error::success(); +} + +Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref) { + return readStreamRef(Ref, bytesRemaining()); +} + +Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref, uint32_t Length) { + if (bytesRemaining() < Length) + return make_error<BinaryStreamError>(stream_error_code::stream_too_short); + Ref = Stream.slice(Offset, Length); + Offset += Length; + return Error::success(); +} + +Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Stream, + uint32_t Size) { + Stream.Offset = getOffset(); + return readStreamRef(Stream.StreamData, Size); +} + +Error BinaryStreamReader::skip(uint32_t Amount) { + if (Amount > bytesRemaining()) + return make_error<BinaryStreamError>(stream_error_code::stream_too_short); + Offset += Amount; + return Error::success(); +} + +Error BinaryStreamReader::padToAlignment(uint32_t Align) { + uint32_t NewOffset = alignTo(Offset, Align); + return skip(NewOffset - Offset); +} + +uint8_t BinaryStreamReader::peek() const { + ArrayRef<uint8_t> Buffer; + auto EC = Stream.readBytes(Offset, 1, Buffer); + assert(!EC && "Cannot peek an empty buffer!"); + llvm::consumeError(std::move(EC)); + return Buffer[0]; +} + +std::pair<BinaryStreamReader, BinaryStreamReader> +BinaryStreamReader::split(uint32_t Off) const { + assert(getLength() >= Off); + + BinaryStreamRef First = Stream.drop_front(Offset); + + BinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamReader W1{First}; + BinaryStreamReader W2{Second}; + return std::make_pair(W1, W2); +} diff --git a/llvm/lib/Support/BinaryStreamRef.cpp b/llvm/lib/Support/BinaryStreamRef.cpp new file mode 100644 index 0000000000000..6bcc504ffad5f --- /dev/null +++ b/llvm/lib/Support/BinaryStreamRef.cpp @@ -0,0 +1,130 @@ +//===- BinaryStreamRef.cpp - ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryByteStream.h" + +using namespace llvm; +using namespace llvm::support; + +namespace { + +class ArrayRefImpl : public BinaryStream { +public: + ArrayRefImpl(ArrayRef<uint8_t> Data, endianness Endian) : BBS(Data, Endian) {} + + llvm::support::endianness getEndian() const override { + return BBS.getEndian(); + } + Error readBytes(uint32_t Offset, uint32_t Size, + ArrayRef<uint8_t> &Buffer) override { + return BBS.readBytes(Offset, Size, Buffer); + } + Error readLongestContiguousChunk(uint32_t Offset, + ArrayRef<uint8_t> &Buffer) override { + return BBS.readLongestContiguousChunk(Offset, Buffer); + } + uint32_t getLength() override { return BBS.getLength(); } + +private: + BinaryByteStream BBS; +}; + +class MutableArrayRefImpl : public WritableBinaryStream { +public: + MutableArrayRefImpl(MutableArrayRef<uint8_t> Data, endianness Endian) + : BBS(Data, Endian) {} + + // Inherited via WritableBinaryStream + llvm::support::endianness getEndian() const override { + return BBS.getEndian(); + } + Error readBytes(uint32_t Offset, uint32_t Size, + ArrayRef<uint8_t> &Buffer) override { + return BBS.readBytes(Offset, Size, Buffer); + } + Error readLongestContiguousChunk(uint32_t Offset, + ArrayRef<uint8_t> &Buffer) override { + return BBS.readLongestContiguousChunk(Offset, Buffer); + } + uint32_t getLength() override { return BBS.getLength(); } + + Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override { + return BBS.writeBytes(Offset, Data); + } + Error commit() override { return BBS.commit(); } + +private: + MutableBinaryByteStream BBS; +}; +} + +BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream) + : BinaryStreamRefBase(Stream) {} +BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, + Optional<uint32_t> Length) + : BinaryStreamRefBase(Stream, Offset, Length) {} +BinaryStreamRef::BinaryStreamRef(ArrayRef<uint8_t> Data, endianness Endian) + : BinaryStreamRefBase(std::make_shared<ArrayRefImpl>(Data, Endian), 0, + Data.size()) {} +BinaryStreamRef::BinaryStreamRef(StringRef Data, endianness Endian) + : BinaryStreamRef(makeArrayRef(Data.bytes_begin(), Data.bytes_end()), + Endian) {} + +Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size, + ArrayRef<uint8_t> &Buffer) const { + if (auto EC = checkOffsetForRead(Offset, Size)) + return EC; + return BorrowedImpl->readBytes(ViewOffset + Offset, Size, Buffer); +} + +Error BinaryStreamRef::readLongestContiguousChunk( + uint32_t Offset, ArrayRef<uint8_t> &Buffer) const { + if (auto EC = checkOffsetForRead(Offset, 1)) + return EC; + + if (auto EC = + BorrowedImpl->readLongestContiguousChunk(ViewOffset + Offset, Buffer)) + return EC; + // This StreamRef might refer to a smaller window over a larger stream. In + // that case we will have read out more bytes than we should return, because + // we should not read past the end of the current view. + uint32_t MaxLength = getLength() - Offset; + if (Buffer.size() > MaxLength) + Buffer = Buffer.slice(0, MaxLength); + return Error::success(); +} + +WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream) + : BinaryStreamRefBase(Stream) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream, + uint32_t Offset, + Optional<uint32_t> Length) + : BinaryStreamRefBase(Stream, Offset, Length) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data, + endianness Endian) + : BinaryStreamRefBase(std::make_shared<MutableArrayRefImpl>(Data, Endian), + 0, Data.size()) {} + + +Error WritableBinaryStreamRef::writeBytes(uint32_t Offset, + ArrayRef<uint8_t> Data) const { + if (auto EC = checkOffsetForWrite(Offset, Data.size())) + return EC; + + return BorrowedImpl->writeBytes(ViewOffset + Offset, Data); +} + +WritableBinaryStreamRef::operator BinaryStreamRef() const { + return BinaryStreamRef(*BorrowedImpl, ViewOffset, Length); +} + +/// For buffered streams, commits changes to the backing store. +Error WritableBinaryStreamRef::commit() { return BorrowedImpl->commit(); } diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp new file mode 100644 index 0000000000000..986e18da281db --- /dev/null +++ b/llvm/lib/Support/BinaryStreamWriter.cpp @@ -0,0 +1,103 @@ +//===- BinaryStreamWriter.cpp - Writes objects to a BinaryStream ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BinaryStreamWriter.h" + +#include "llvm/Support/BinaryStreamError.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; + +BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef Ref) + : Stream(Ref) {} + +BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStream &Stream) + : Stream(Stream) {} + +BinaryStreamWriter::BinaryStreamWriter(MutableArrayRef<uint8_t> Data, + llvm::support::endianness Endian) + : Stream(Data, Endian) {} + +Error BinaryStreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) { + if (auto EC = Stream.writeBytes(Offset, Buffer)) + return EC; + Offset += Buffer.size(); + return Error::success(); +} + +Error BinaryStreamWriter::writeULEB128(uint64_t Value) { + uint8_t EncodedBytes[10] = {0}; + unsigned Size = encodeULEB128(Value, &EncodedBytes[0]); + return writeBytes({EncodedBytes, Size}); +} + +Error BinaryStreamWriter::writeSLEB128(int64_t Value) { + uint8_t EncodedBytes[10] = {0}; + unsigned Size = encodeSLEB128(Value, &EncodedBytes[0]); + return writeBytes({EncodedBytes, Size}); +} + +Error BinaryStreamWriter::writeCString(StringRef Str) { + if (auto EC = writeFixedString(Str)) + return EC; + if (auto EC = writeObject('\0')) + return EC; + + return Error::success(); +} + +Error BinaryStreamWriter::writeFixedString(StringRef Str) { + + return writeBytes(arrayRefFromStringRef(Str)); +} + +Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref) { + return writeStreamRef(Ref, Ref.getLength()); +} + +Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) { + BinaryStreamReader SrcReader(Ref.slice(0, Length)); + // This is a bit tricky. If we just call readBytes, we are requiring that it + // return us the entire stream as a contiguous buffer. There is no guarantee + // this can be satisfied by returning a reference straight from the buffer, as + // an implementation may not store all data in a single contiguous buffer. So + // we iterate over each contiguous chunk, writing each one in succession. + while (SrcReader.bytesRemaining() > 0) { + ArrayRef<uint8_t> Chunk; + if (auto EC = SrcReader.readLongestContiguousChunk(Chunk)) + return EC; + if (auto EC = writeBytes(Chunk)) + return EC; + } + return Error::success(); +} + +std::pair<BinaryStreamWriter, BinaryStreamWriter> +BinaryStreamWriter::split(uint32_t Off) const { + assert(getLength() >= Off); + + WritableBinaryStreamRef First = Stream.drop_front(Offset); + + WritableBinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamWriter W1{First}; + BinaryStreamWriter W2{Second}; + return std::make_pair(W1, W2); +} + +Error BinaryStreamWriter::padToAlignment(uint32_t Align) { + uint32_t NewOffset = alignTo(Offset, Align); + if (NewOffset > getLength()) + return make_error<BinaryStreamError>(stream_error_code::stream_too_short); + while (Offset < NewOffset) + if (auto EC = writeInteger('\0')) + return EC; + return Error::success(); +} diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp new file mode 100644 index 0000000000000..2b63294f3789e --- /dev/null +++ b/llvm/lib/Support/BlockFrequency.cpp @@ -0,0 +1,82 @@ +//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Block Frequency class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BlockFrequency.h" +#include <cassert> + +using namespace llvm; + +BlockFrequency &BlockFrequency::operator*=(BranchProbability Prob) { + Frequency = Prob.scale(Frequency); + return *this; +} + +BlockFrequency BlockFrequency::operator*(BranchProbability Prob) const { + BlockFrequency Freq(Frequency); + Freq *= Prob; + return Freq; +} + +BlockFrequency &BlockFrequency::operator/=(BranchProbability Prob) { + Frequency = Prob.scaleByInverse(Frequency); + return *this; +} + +BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const { + BlockFrequency Freq(Frequency); + Freq /= Prob; + return Freq; +} + +BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) { + uint64_t Before = Freq.Frequency; + Frequency += Freq.Frequency; + + // If overflow, set frequency to the maximum value. + if (Frequency < Before) + Frequency = UINT64_MAX; + + return *this; +} + +BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const { + BlockFrequency NewFreq(Frequency); + NewFreq += Freq; + return NewFreq; +} + +BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) { + // If underflow, set frequency to 0. + if (Frequency <= Freq.Frequency) + Frequency = 0; + else + Frequency -= Freq.Frequency; + return *this; +} + +BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const { + BlockFrequency NewFreq(Frequency); + NewFreq -= Freq; + return NewFreq; +} + +BlockFrequency &BlockFrequency::operator>>=(const unsigned count) { + // Frequency can never be 0 by design. + assert(Frequency != 0); + + // Shift right by count. + Frequency >>= count; + + // Saturate to 1 if we are 0. + Frequency |= Frequency == 0; + return *this; +} diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp new file mode 100644 index 0000000000000..195e2d58d8e19 --- /dev/null +++ b/llvm/lib/Support/BranchProbability.cpp @@ -0,0 +1,112 @@ +//===-------------- lib/Support/BranchProbability.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Branch Probability class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BranchProbability.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +using namespace llvm; + +const uint32_t BranchProbability::D; + +raw_ostream &BranchProbability::print(raw_ostream &OS) const { + if (isUnknown()) + return OS << "?%"; + + // Get a percentage rounded to two decimal digits. This avoids + // implementation-defined rounding inside printf. + double Percent = rint(((double)N / D) * 100.0 * 100.0) / 100.0; + return OS << format("0x%08" PRIx32 " / 0x%08" PRIx32 " = %.2f%%", N, D, + Percent); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void BranchProbability::dump() const { print(dbgs()) << '\n'; } +#endif + +BranchProbability::BranchProbability(uint32_t Numerator, uint32_t Denominator) { + assert(Denominator > 0 && "Denominator cannot be 0!"); + assert(Numerator <= Denominator && "Probability cannot be bigger than 1!"); + if (Denominator == D) + N = Numerator; + else { + uint64_t Prob64 = + (Numerator * static_cast<uint64_t>(D) + Denominator / 2) / Denominator; + N = static_cast<uint32_t>(Prob64); + } +} + +BranchProbability +BranchProbability::getBranchProbability(uint64_t Numerator, + uint64_t Denominator) { + assert(Numerator <= Denominator && "Probability cannot be bigger than 1!"); + // Scale down Denominator to fit in a 32-bit integer. + int Scale = 0; + while (Denominator > UINT32_MAX) { + Denominator >>= 1; + Scale++; + } + return BranchProbability(Numerator >> Scale, Denominator); +} + +// If ConstD is not zero, then replace D by ConstD so that division and modulo +// operations by D can be optimized, in case this function is not inlined by the +// compiler. +template <uint32_t ConstD> +static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) { + if (ConstD > 0) + D = ConstD; + + assert(D && "divide by 0"); + + // Fast path for multiplying by 1.0. + if (!Num || D == N) + return Num; + + // Split Num into upper and lower parts to multiply, then recombine. + uint64_t ProductHigh = (Num >> 32) * N; + uint64_t ProductLow = (Num & UINT32_MAX) * N; + + // Split into 32-bit digits. + uint32_t Upper32 = ProductHigh >> 32; + uint32_t Lower32 = ProductLow & UINT32_MAX; + uint32_t Mid32Partial = ProductHigh & UINT32_MAX; + uint32_t Mid32 = Mid32Partial + (ProductLow >> 32); + + // Carry. + Upper32 += Mid32 < Mid32Partial; + + uint64_t Rem = (uint64_t(Upper32) << 32) | Mid32; + uint64_t UpperQ = Rem / D; + + // Check for overflow. + if (UpperQ > UINT32_MAX) + return UINT64_MAX; + + Rem = ((Rem % D) << 32) | Lower32; + uint64_t LowerQ = Rem / D; + uint64_t Q = (UpperQ << 32) + LowerQ; + + // Check for overflow. + return Q < LowerQ ? UINT64_MAX : Q; +} + +uint64_t BranchProbability::scale(uint64_t Num) const { + return ::scale<D>(Num, N, D); +} + +uint64_t BranchProbability::scaleByInverse(uint64_t Num) const { + return ::scale<0>(Num, D, N); +} diff --git a/llvm/lib/Support/BuryPointer.cpp b/llvm/lib/Support/BuryPointer.cpp new file mode 100644 index 0000000000000..435f89010d410 --- /dev/null +++ b/llvm/lib/Support/BuryPointer.cpp @@ -0,0 +1,30 @@ +//===- BuryPointer.cpp - Memory Manipulation/Leak ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BuryPointer.h" +#include "llvm/Support/Compiler.h" +#include <atomic> + +namespace llvm { + +void BuryPointer(const void *Ptr) { + // This function may be called only a small fixed amount of times per each + // invocation, otherwise we do actually have a leak which we want to report. + // If this function is called more than kGraveYardMaxSize times, the pointers + // will not be properly buried and a leak detector will report a leak, which + // is what we want in such case. + static const size_t kGraveYardMaxSize = 16; + LLVM_ATTRIBUTE_UNUSED static const void *GraveYard[kGraveYardMaxSize]; + static std::atomic<unsigned> GraveYardSize; + unsigned Idx = GraveYardSize++; + if (Idx >= kGraveYardMaxSize) + return; + GraveYard[Idx] = Ptr; +} + +} diff --git a/llvm/lib/Support/COM.cpp b/llvm/lib/Support/COM.cpp new file mode 100644 index 0000000000000..f37b95ba86512 --- /dev/null +++ b/llvm/lib/Support/COM.cpp @@ -0,0 +1,22 @@ +//===-- COM.cpp - Implement COM utility classes -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements utility classes related to COM. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/COM.h" + +#include "llvm/Config/llvm-config.h" + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/COM.inc" +#elif defined(_WIN32) +#include "Windows/COM.inc" +#endif diff --git a/llvm/lib/Support/COPYRIGHT.regex b/llvm/lib/Support/COPYRIGHT.regex new file mode 100644 index 0000000000000..a6392fd37c3df --- /dev/null +++ b/llvm/lib/Support/COPYRIGHT.regex @@ -0,0 +1,54 @@ +$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ + +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 + */ diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp new file mode 100644 index 0000000000000..7c008d3b599da --- /dev/null +++ b/llvm/lib/Support/CRC.cpp @@ -0,0 +1,99 @@ +//===--- CRC.cpp - Cyclic Redundancy Check implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains implementations of CRC functions. +// +// The implementation technique is the one mentioned in: +// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table +// look-up. Commun. ACM 31, 8 (August 1988) +// +// See also Ross N. Williams "A Painless Guide to CRC Error Detection +// Algorithms" (https://zlib.net/crc_v3.txt) or Hacker's Delight (2nd ed.) +// Chapter 14 (Figure 14-7 in particular) for how the algorithm works. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CRC.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Config/config.h" + +using namespace llvm; + +#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H + +static const uint32_t CRCTable[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; + +uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) { + CRC ^= 0xFFFFFFFFU; + for (uint8_t Byte : Data) { + int TableIdx = (CRC ^ Byte) & 0xff; + CRC = CRCTable[TableIdx] ^ (CRC >> 8); + } + return CRC ^ 0xFFFFFFFFU; +} + +#else + +#include <zlib.h> +uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) { + return ::crc32(CRC, (const Bytef *)Data.data(), Data.size()); +} + +#endif + +uint32_t llvm::crc32(ArrayRef<uint8_t> Data) { return crc32(0, Data); } + +void JamCRC::update(ArrayRef<uint8_t> Data) { + CRC ^= 0xFFFFFFFFU; // Undo CRC-32 Init. + CRC = crc32(CRC, Data); + CRC ^= 0xFFFFFFFFU; // Undo CRC-32 XorOut. +} diff --git a/llvm/lib/Support/CachePruning.cpp b/llvm/lib/Support/CachePruning.cpp new file mode 100644 index 0000000000000..7a2f6c53435ad --- /dev/null +++ b/llvm/lib/Support/CachePruning.cpp @@ -0,0 +1,292 @@ +//===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the pruning of a directory based on least recently used. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CachePruning.h" + +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "cache-pruning" + +#include <set> +#include <system_error> + +using namespace llvm; + +namespace { +struct FileInfo { + sys::TimePoint<> Time; + uint64_t Size; + std::string Path; + + /// Used to determine which files to prune first. Also used to determine + /// set membership, so must take into account all fields. + bool operator<(const FileInfo &Other) const { + return std::tie(Time, Other.Size, Path) < + std::tie(Other.Time, Size, Other.Path); + } +}; +} // anonymous namespace + +/// Write a new timestamp file with the given path. This is used for the pruning +/// interval option. +static void writeTimestampFile(StringRef TimestampFile) { + std::error_code EC; + raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None); +} + +static Expected<std::chrono::seconds> parseDuration(StringRef Duration) { + if (Duration.empty()) + return make_error<StringError>("Duration must not be empty", + inconvertibleErrorCode()); + + StringRef NumStr = Duration.slice(0, Duration.size()-1); + uint64_t Num; + if (NumStr.getAsInteger(0, Num)) + return make_error<StringError>("'" + NumStr + "' not an integer", + inconvertibleErrorCode()); + + switch (Duration.back()) { + case 's': + return std::chrono::seconds(Num); + case 'm': + return std::chrono::minutes(Num); + case 'h': + return std::chrono::hours(Num); + default: + return make_error<StringError>("'" + Duration + + "' must end with one of 's', 'm' or 'h'", + inconvertibleErrorCode()); + } +} + +Expected<CachePruningPolicy> +llvm::parseCachePruningPolicy(StringRef PolicyStr) { + CachePruningPolicy Policy; + std::pair<StringRef, StringRef> P = {"", PolicyStr}; + while (!P.second.empty()) { + P = P.second.split(':'); + + StringRef Key, Value; + std::tie(Key, Value) = P.first.split('='); + if (Key == "prune_interval") { + auto DurationOrErr = parseDuration(Value); + if (!DurationOrErr) + return DurationOrErr.takeError(); + Policy.Interval = *DurationOrErr; + } else if (Key == "prune_after") { + auto DurationOrErr = parseDuration(Value); + if (!DurationOrErr) + return DurationOrErr.takeError(); + Policy.Expiration = *DurationOrErr; + } else if (Key == "cache_size") { + if (Value.back() != '%') + return make_error<StringError>("'" + Value + "' must be a percentage", + inconvertibleErrorCode()); + StringRef SizeStr = Value.drop_back(); + uint64_t Size; + if (SizeStr.getAsInteger(0, Size)) + return make_error<StringError>("'" + SizeStr + "' not an integer", + inconvertibleErrorCode()); + if (Size > 100) + return make_error<StringError>("'" + SizeStr + + "' must be between 0 and 100", + inconvertibleErrorCode()); + Policy.MaxSizePercentageOfAvailableSpace = Size; + } else if (Key == "cache_size_bytes") { + uint64_t Mult = 1; + switch (tolower(Value.back())) { + case 'k': + Mult = 1024; + Value = Value.drop_back(); + break; + case 'm': + Mult = 1024 * 1024; + Value = Value.drop_back(); + break; + case 'g': + Mult = 1024 * 1024 * 1024; + Value = Value.drop_back(); + break; + } + uint64_t Size; + if (Value.getAsInteger(0, Size)) + return make_error<StringError>("'" + Value + "' not an integer", + inconvertibleErrorCode()); + Policy.MaxSizeBytes = Size * Mult; + } else if (Key == "cache_size_files") { + if (Value.getAsInteger(0, Policy.MaxSizeFiles)) + return make_error<StringError>("'" + Value + "' not an integer", + inconvertibleErrorCode()); + } else { + return make_error<StringError>("Unknown key: '" + Key + "'", + inconvertibleErrorCode()); + } + } + + return Policy; +} + +/// Prune the cache of files that haven't been accessed in a long time. +bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { + using namespace std::chrono; + + if (Path.empty()) + return false; + + bool isPathDir; + if (sys::fs::is_directory(Path, isPathDir)) + return false; + + if (!isPathDir) + return false; + + Policy.MaxSizePercentageOfAvailableSpace = + std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u); + + if (Policy.Expiration == seconds(0) && + Policy.MaxSizePercentageOfAvailableSpace == 0 && + Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) { + LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n"); + // Nothing will be pruned, early exit + return false; + } + + // Try to stat() the timestamp file. + SmallString<128> TimestampFile(Path); + sys::path::append(TimestampFile, "llvmcache.timestamp"); + sys::fs::file_status FileStatus; + const auto CurrentTime = system_clock::now(); + if (auto EC = sys::fs::status(TimestampFile, FileStatus)) { + if (EC == errc::no_such_file_or_directory) { + // If the timestamp file wasn't there, create one now. + writeTimestampFile(TimestampFile); + } else { + // Unknown error? + return false; + } + } else { + if (!Policy.Interval) + return false; + if (Policy.Interval != seconds(0)) { + // Check whether the time stamp is older than our pruning interval. + // If not, do nothing. + const auto TimeStampModTime = FileStatus.getLastModificationTime(); + auto TimeStampAge = CurrentTime - TimeStampModTime; + if (TimeStampAge <= *Policy.Interval) { + LLVM_DEBUG(dbgs() << "Timestamp file too recent (" + << duration_cast<seconds>(TimeStampAge).count() + << "s old), do not prune.\n"); + return false; + } + } + // Write a new timestamp file so that nobody else attempts to prune. + // There is a benign race condition here, if two processes happen to + // notice at the same time that the timestamp is out-of-date. + writeTimestampFile(TimestampFile); + } + + // Keep track of files to delete to get below the size limit. + // Order by time of last use so that recently used files are preserved. + std::set<FileInfo> FileInfos; + uint64_t TotalSize = 0; + + // Walk the entire directory cache, looking for unused files. + std::error_code EC; + SmallString<128> CachePathNative; + sys::path::native(Path, CachePathNative); + // Walk all of the files within this directory. + for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd; + File != FileEnd && !EC; File.increment(EC)) { + // Ignore any files not beginning with the string "llvmcache-". This + // includes the timestamp file as well as any files created by the user. + // This acts as a safeguard against data loss if the user specifies the + // wrong directory as their cache directory. + if (!sys::path::filename(File->path()).startswith("llvmcache-")) + continue; + + // Look at this file. If we can't stat it, there's nothing interesting + // there. + ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status(); + if (!StatusOrErr) { + LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n"); + continue; + } + + // If the file hasn't been used recently enough, delete it + const auto FileAccessTime = StatusOrErr->getLastAccessedTime(); + auto FileAge = CurrentTime - FileAccessTime; + if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) { + LLVM_DEBUG(dbgs() << "Remove " << File->path() << " (" + << duration_cast<seconds>(FileAge).count() + << "s old)\n"); + sys::fs::remove(File->path()); + continue; + } + + // Leave it here for now, but add it to the list of size-based pruning. + TotalSize += StatusOrErr->getSize(); + FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()}); + } + + auto FileInfo = FileInfos.begin(); + size_t NumFiles = FileInfos.size(); + + auto RemoveCacheFile = [&]() { + // Remove the file. + sys::fs::remove(FileInfo->Path); + // Update size + TotalSize -= FileInfo->Size; + NumFiles--; + LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size " + << FileInfo->Size << "), new occupancy is " << TotalSize + << "%\n"); + ++FileInfo; + }; + + // Prune for number of files. + if (Policy.MaxSizeFiles) + while (NumFiles > Policy.MaxSizeFiles) + RemoveCacheFile(); + + // Prune for size now if needed + if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) { + auto ErrOrSpaceInfo = sys::fs::disk_space(Path); + if (!ErrOrSpaceInfo) { + report_fatal_error("Can't get available size"); + } + sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get(); + auto AvailableSpace = TotalSize + SpaceInfo.free; + + if (Policy.MaxSizePercentageOfAvailableSpace == 0) + Policy.MaxSizePercentageOfAvailableSpace = 100; + if (Policy.MaxSizeBytes == 0) + Policy.MaxSizeBytes = AvailableSpace; + auto TotalSizeTarget = std::min<uint64_t>( + AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull, + Policy.MaxSizeBytes); + + LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace) + << "% target is: " + << Policy.MaxSizePercentageOfAvailableSpace << "%, " + << Policy.MaxSizeBytes << " bytes\n"); + + // Remove the oldest accessed files first, till we get below the threshold. + while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end()) + RemoveCacheFile(); + } + return true; +} diff --git a/llvm/lib/Support/Chrono.cpp b/llvm/lib/Support/Chrono.cpp new file mode 100644 index 0000000000000..8c28d45d88228 --- /dev/null +++ b/llvm/lib/Support/Chrono.cpp @@ -0,0 +1,93 @@ +//===- Support/Chrono.cpp - Utilities for Timing Manipulation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Chrono.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +using namespace sys; + +const char llvm::detail::unit<std::ratio<3600>>::value[] = "h"; +const char llvm::detail::unit<std::ratio<60>>::value[] = "m"; +const char llvm::detail::unit<std::ratio<1>>::value[] = "s"; +const char llvm::detail::unit<std::milli>::value[] = "ms"; +const char llvm::detail::unit<std::micro>::value[] = "us"; +const char llvm::detail::unit<std::nano>::value[] = "ns"; + +static inline struct tm getStructTM(TimePoint<> TP) { + struct tm Storage; + std::time_t OurTime = toTimeT(TP); + +#if defined(LLVM_ON_UNIX) + struct tm *LT = ::localtime_r(&OurTime, &Storage); + assert(LT); + (void)LT; +#endif +#if defined(_WIN32) + int Error = ::localtime_s(&Storage, &OurTime); + assert(!Error); + (void)Error; +#endif + + return Storage; +} + +raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) { + struct tm LT = getStructTM(TP); + char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")]; + strftime(Buffer, sizeof(Buffer), "%Y-%m-%d %H:%M:%S", <); + return OS << Buffer << '.' + << format("%.9lu", + long((TP.time_since_epoch() % std::chrono::seconds(1)) + .count())); +} + +void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS, + StringRef Style) { + using namespace std::chrono; + TimePoint<seconds> Truncated = time_point_cast<seconds>(T); + auto Fractional = T - Truncated; + struct tm LT = getStructTM(Truncated); + // Handle extensions first. strftime mangles unknown %x on some platforms. + if (Style.empty()) Style = "%Y-%m-%d %H:%M:%S.%N"; + std::string Format; + raw_string_ostream FStream(Format); + for (unsigned I = 0; I < Style.size(); ++I) { + if (Style[I] == '%' && Style.size() > I + 1) switch (Style[I + 1]) { + case 'L': // Milliseconds, from Ruby. + FStream << llvm::format( + "%.3lu", (long)duration_cast<milliseconds>(Fractional).count()); + ++I; + continue; + case 'f': // Microseconds, from Python. + FStream << llvm::format( + "%.6lu", (long)duration_cast<microseconds>(Fractional).count()); + ++I; + continue; + case 'N': // Nanoseconds, from date(1). + FStream << llvm::format( + "%.6lu", (long)duration_cast<nanoseconds>(Fractional).count()); + ++I; + continue; + case '%': // Consume %%, so %%f parses as (%%)f not %(%f) + FStream << "%%"; + ++I; + continue; + } + FStream << Style[I]; + } + FStream.flush(); + char Buffer[256]; // Should be enough for anywhen. + size_t Len = strftime(Buffer, sizeof(Buffer), Format.c_str(), <); + OS << (Len ? Buffer : "BAD-DATE-FORMAT"); +} + +} // namespace llvm diff --git a/llvm/lib/Support/CodeGenCoverage.cpp b/llvm/lib/Support/CodeGenCoverage.cpp new file mode 100644 index 0000000000000..2db4193ce3825 --- /dev/null +++ b/llvm/lib/Support/CodeGenCoverage.cpp @@ -0,0 +1,124 @@ +//===- lib/Support/CodeGenCoverage.cpp -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the CodeGenCoverage class. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CodeGenCoverage.h" + +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/ToolOutputFile.h" + +#if LLVM_ON_UNIX +#include <unistd.h> +#elif defined(_WIN32) +#include <windows.h> +#endif + +using namespace llvm; + +static sys::SmartMutex<true> OutputMutex; + +CodeGenCoverage::CodeGenCoverage() {} + +void CodeGenCoverage::setCovered(uint64_t RuleID) { + if (RuleCoverage.size() <= RuleID) + RuleCoverage.resize(RuleID + 1, 0); + RuleCoverage[RuleID] = true; +} + +bool CodeGenCoverage::isCovered(uint64_t RuleID) const { + if (RuleCoverage.size() <= RuleID) + return false; + return RuleCoverage[RuleID]; +} + +iterator_range<CodeGenCoverage::const_covered_iterator> +CodeGenCoverage::covered() const { + return RuleCoverage.set_bits(); +} + +bool CodeGenCoverage::parse(MemoryBuffer &Buffer, StringRef BackendName) { + const char *CurPtr = Buffer.getBufferStart(); + + while (CurPtr != Buffer.getBufferEnd()) { + // Read the backend name from the input. + const char *LexedBackendName = CurPtr; + while (*CurPtr++ != 0) + ; + if (CurPtr == Buffer.getBufferEnd()) + return false; // Data is invalid, expected rule id's to follow. + + bool IsForThisBackend = BackendName.equals(LexedBackendName); + while (CurPtr != Buffer.getBufferEnd()) { + if (std::distance(CurPtr, Buffer.getBufferEnd()) < 8) + return false; // Data is invalid. Not enough bytes for another rule id. + + uint64_t RuleID = support::endian::read64(CurPtr, support::native); + CurPtr += 8; + + // ~0ull terminates the rule id list. + if (RuleID == ~0ull) + break; + + // Anything else, is recorded or ignored depending on whether it's + // intended for the backend we're interested in. + if (IsForThisBackend) + setCovered(RuleID); + } + } + + return true; +} + +bool CodeGenCoverage::emit(StringRef CoveragePrefix, + StringRef BackendName) const { + if (!CoveragePrefix.empty() && !RuleCoverage.empty()) { + sys::SmartScopedLock<true> Lock(OutputMutex); + + // We can handle locking within a process easily enough but we don't want to + // manage it between multiple processes. Use the process ID to ensure no + // more than one process is ever writing to the same file at the same time. + std::string Pid = +#if LLVM_ON_UNIX + llvm::to_string(::getpid()); +#elif defined(_WIN32) + llvm::to_string(::GetCurrentProcessId()); +#else + ""; +#endif + + std::string CoverageFilename = (CoveragePrefix + Pid).str(); + + std::error_code EC; + sys::fs::OpenFlags OpenFlags = sys::fs::OF_Append; + std::unique_ptr<ToolOutputFile> CoverageFile = + std::make_unique<ToolOutputFile>(CoverageFilename, EC, OpenFlags); + if (EC) + return false; + + uint64_t Zero = 0; + uint64_t InvZero = ~0ull; + CoverageFile->os() << BackendName; + CoverageFile->os().write((const char *)&Zero, sizeof(unsigned char)); + for (uint64_t I : RuleCoverage.set_bits()) + CoverageFile->os().write((const char *)&I, sizeof(uint64_t)); + CoverageFile->os().write((const char *)&InvZero, sizeof(uint64_t)); + + CoverageFile->keep(); + } + + return true; +} + +void CodeGenCoverage::reset() { RuleCoverage.resize(0); } diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp new file mode 100644 index 0000000000000..620f7ffd4c9fa --- /dev/null +++ b/llvm/lib/Support/CommandLine.cpp @@ -0,0 +1,2495 @@ +//===-- CommandLine.cpp - Command line parser implementation --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class implements a command line argument processor that is useful when +// creating a tool. It provides a simple, minimalistic interface that is easily +// extensible and supports nonlocal (library) command line options. +// +// Note that rather than trying to figure out what this code does, you could try +// reading the library documentation located in docs/CommandLine.html +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm-c/Support.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdlib> +#include <map> +using namespace llvm; +using namespace cl; + +#define DEBUG_TYPE "commandline" + +//===----------------------------------------------------------------------===// +// Template instantiations and anchors. +// +namespace llvm { +namespace cl { +template class basic_parser<bool>; +template class basic_parser<boolOrDefault>; +template class basic_parser<int>; +template class basic_parser<unsigned>; +template class basic_parser<unsigned long>; +template class basic_parser<unsigned long long>; +template class basic_parser<double>; +template class basic_parser<float>; +template class basic_parser<std::string>; +template class basic_parser<char>; + +template class opt<unsigned>; +template class opt<int>; +template class opt<std::string>; +template class opt<char>; +template class opt<bool>; +} +} // end namespace llvm::cl + +// Pin the vtables to this file. +void GenericOptionValue::anchor() {} +void OptionValue<boolOrDefault>::anchor() {} +void OptionValue<std::string>::anchor() {} +void Option::anchor() {} +void basic_parser_impl::anchor() {} +void parser<bool>::anchor() {} +void parser<boolOrDefault>::anchor() {} +void parser<int>::anchor() {} +void parser<unsigned>::anchor() {} +void parser<unsigned long>::anchor() {} +void parser<unsigned long long>::anchor() {} +void parser<double>::anchor() {} +void parser<float>::anchor() {} +void parser<std::string>::anchor() {} +void parser<char>::anchor() {} + +//===----------------------------------------------------------------------===// + +static StringRef ArgPrefix = " -"; +static StringRef ArgPrefixLong = " --"; +static StringRef ArgHelpPrefix = " - "; + +static size_t argPlusPrefixesSize(StringRef ArgName) { + size_t Len = ArgName.size(); + if (Len == 1) + return Len + ArgPrefix.size() + ArgHelpPrefix.size(); + return Len + ArgPrefixLong.size() + ArgHelpPrefix.size(); +} + +static StringRef argPrefix(StringRef ArgName) { + if (ArgName.size() == 1) + return ArgPrefix; + return ArgPrefixLong; +} + +// Option predicates... +static inline bool isGrouping(const Option *O) { + return O->getMiscFlags() & cl::Grouping; +} +static inline bool isPrefixedOrGrouping(const Option *O) { + return isGrouping(O) || O->getFormattingFlag() == cl::Prefix || + O->getFormattingFlag() == cl::AlwaysPrefix; +} + + +namespace { + +class PrintArg { + StringRef ArgName; +public: + PrintArg(StringRef ArgName) : ArgName(ArgName) {} + friend raw_ostream &operator<<(raw_ostream &OS, const PrintArg&); +}; + +raw_ostream &operator<<(raw_ostream &OS, const PrintArg& Arg) { + OS << argPrefix(Arg.ArgName) << Arg.ArgName; + return OS; +} + +class CommandLineParser { +public: + // Globals for name and overview of program. Program name is not a string to + // avoid static ctor/dtor issues. + std::string ProgramName; + StringRef ProgramOverview; + + // This collects additional help to be printed. + std::vector<StringRef> MoreHelp; + + // This collects Options added with the cl::DefaultOption flag. Since they can + // be overridden, they are not added to the appropriate SubCommands until + // ParseCommandLineOptions actually runs. + SmallVector<Option*, 4> DefaultOptions; + + // This collects the different option categories that have been registered. + SmallPtrSet<OptionCategory *, 16> RegisteredOptionCategories; + + // This collects the different subcommands that have been registered. + SmallPtrSet<SubCommand *, 4> RegisteredSubCommands; + + CommandLineParser() : ActiveSubCommand(nullptr) { + registerSubCommand(&*TopLevelSubCommand); + registerSubCommand(&*AllSubCommands); + } + + void ResetAllOptionOccurrences(); + + bool ParseCommandLineOptions(int argc, const char *const *argv, + StringRef Overview, raw_ostream *Errs = nullptr, + bool LongOptionsUseDoubleDash = false); + + void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) { + if (Opt.hasArgStr()) + return; + if (!SC->OptionsMap.insert(std::make_pair(Name, &Opt)).second) { + errs() << ProgramName << ": CommandLine Error: Option '" << Name + << "' registered more than once!\n"; + report_fatal_error("inconsistency in registered CommandLine options"); + } + + // If we're adding this to all sub-commands, add it to the ones that have + // already been registered. + if (SC == &*AllSubCommands) { + for (const auto &Sub : RegisteredSubCommands) { + if (SC == Sub) + continue; + addLiteralOption(Opt, Sub, Name); + } + } + } + + void addLiteralOption(Option &Opt, StringRef Name) { + if (Opt.Subs.empty()) + addLiteralOption(Opt, &*TopLevelSubCommand, Name); + else { + for (auto SC : Opt.Subs) + addLiteralOption(Opt, SC, Name); + } + } + + void addOption(Option *O, SubCommand *SC) { + bool HadErrors = false; + if (O->hasArgStr()) { + // If it's a DefaultOption, check to make sure it isn't already there. + if (O->isDefaultOption() && + SC->OptionsMap.find(O->ArgStr) != SC->OptionsMap.end()) + return; + + // Add argument to the argument map! + if (!SC->OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) { + errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr + << "' registered more than once!\n"; + HadErrors = true; + } + } + + // Remember information about positional options. + if (O->getFormattingFlag() == cl::Positional) + SC->PositionalOpts.push_back(O); + else if (O->getMiscFlags() & cl::Sink) // Remember sink options + SC->SinkOpts.push_back(O); + else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) { + if (SC->ConsumeAfterOpt) { + O->error("Cannot specify more than one option with cl::ConsumeAfter!"); + HadErrors = true; + } + SC->ConsumeAfterOpt = O; + } + + // Fail hard if there were errors. These are strictly unrecoverable and + // indicate serious issues such as conflicting option names or an + // incorrectly + // linked LLVM distribution. + if (HadErrors) + report_fatal_error("inconsistency in registered CommandLine options"); + + // If we're adding this to all sub-commands, add it to the ones that have + // already been registered. + if (SC == &*AllSubCommands) { + for (const auto &Sub : RegisteredSubCommands) { + if (SC == Sub) + continue; + addOption(O, Sub); + } + } + } + + void addOption(Option *O, bool ProcessDefaultOption = false) { + if (!ProcessDefaultOption && O->isDefaultOption()) { + DefaultOptions.push_back(O); + return; + } + + if (O->Subs.empty()) { + addOption(O, &*TopLevelSubCommand); + } else { + for (auto SC : O->Subs) + addOption(O, SC); + } + } + + void removeOption(Option *O, SubCommand *SC) { + SmallVector<StringRef, 16> OptionNames; + O->getExtraOptionNames(OptionNames); + if (O->hasArgStr()) + OptionNames.push_back(O->ArgStr); + + SubCommand &Sub = *SC; + auto End = Sub.OptionsMap.end(); + for (auto Name : OptionNames) { + auto I = Sub.OptionsMap.find(Name); + if (I != End && I->getValue() == O) + Sub.OptionsMap.erase(I); + } + + if (O->getFormattingFlag() == cl::Positional) + for (auto Opt = Sub.PositionalOpts.begin(); + Opt != Sub.PositionalOpts.end(); ++Opt) { + if (*Opt == O) { + Sub.PositionalOpts.erase(Opt); + break; + } + } + else if (O->getMiscFlags() & cl::Sink) + for (auto Opt = Sub.SinkOpts.begin(); Opt != Sub.SinkOpts.end(); ++Opt) { + if (*Opt == O) { + Sub.SinkOpts.erase(Opt); + break; + } + } + else if (O == Sub.ConsumeAfterOpt) + Sub.ConsumeAfterOpt = nullptr; + } + + void removeOption(Option *O) { + if (O->Subs.empty()) + removeOption(O, &*TopLevelSubCommand); + else { + if (O->isInAllSubCommands()) { + for (auto SC : RegisteredSubCommands) + removeOption(O, SC); + } else { + for (auto SC : O->Subs) + removeOption(O, SC); + } + } + } + + bool hasOptions(const SubCommand &Sub) const { + return (!Sub.OptionsMap.empty() || !Sub.PositionalOpts.empty() || + nullptr != Sub.ConsumeAfterOpt); + } + + bool hasOptions() const { + for (const auto &S : RegisteredSubCommands) { + if (hasOptions(*S)) + return true; + } + return false; + } + + SubCommand *getActiveSubCommand() { return ActiveSubCommand; } + + void updateArgStr(Option *O, StringRef NewName, SubCommand *SC) { + SubCommand &Sub = *SC; + if (!Sub.OptionsMap.insert(std::make_pair(NewName, O)).second) { + errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr + << "' registered more than once!\n"; + report_fatal_error("inconsistency in registered CommandLine options"); + } + Sub.OptionsMap.erase(O->ArgStr); + } + + void updateArgStr(Option *O, StringRef NewName) { + if (O->Subs.empty()) + updateArgStr(O, NewName, &*TopLevelSubCommand); + else { + if (O->isInAllSubCommands()) { + for (auto SC : RegisteredSubCommands) + updateArgStr(O, NewName, SC); + } else { + for (auto SC : O->Subs) + updateArgStr(O, NewName, SC); + } + } + } + + void printOptionValues(); + + void registerCategory(OptionCategory *cat) { + assert(count_if(RegisteredOptionCategories, + [cat](const OptionCategory *Category) { + return cat->getName() == Category->getName(); + }) == 0 && + "Duplicate option categories"); + + RegisteredOptionCategories.insert(cat); + } + + void registerSubCommand(SubCommand *sub) { + assert(count_if(RegisteredSubCommands, + [sub](const SubCommand *Sub) { + return (!sub->getName().empty()) && + (Sub->getName() == sub->getName()); + }) == 0 && + "Duplicate subcommands"); + RegisteredSubCommands.insert(sub); + + // For all options that have been registered for all subcommands, add the + // option to this subcommand now. + if (sub != &*AllSubCommands) { + for (auto &E : AllSubCommands->OptionsMap) { + Option *O = E.second; + if ((O->isPositional() || O->isSink() || O->isConsumeAfter()) || + O->hasArgStr()) + addOption(O, sub); + else + addLiteralOption(*O, sub, E.first()); + } + } + } + + void unregisterSubCommand(SubCommand *sub) { + RegisteredSubCommands.erase(sub); + } + + iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator> + getRegisteredSubcommands() { + return make_range(RegisteredSubCommands.begin(), + RegisteredSubCommands.end()); + } + + void reset() { + ActiveSubCommand = nullptr; + ProgramName.clear(); + ProgramOverview = StringRef(); + + MoreHelp.clear(); + RegisteredOptionCategories.clear(); + + ResetAllOptionOccurrences(); + RegisteredSubCommands.clear(); + + TopLevelSubCommand->reset(); + AllSubCommands->reset(); + registerSubCommand(&*TopLevelSubCommand); + registerSubCommand(&*AllSubCommands); + + DefaultOptions.clear(); + } + +private: + SubCommand *ActiveSubCommand; + + Option *LookupOption(SubCommand &Sub, StringRef &Arg, StringRef &Value); + Option *LookupLongOption(SubCommand &Sub, StringRef &Arg, StringRef &Value, + bool LongOptionsUseDoubleDash, bool HaveDoubleDash) { + Option *Opt = LookupOption(Sub, Arg, Value); + if (Opt && LongOptionsUseDoubleDash && !HaveDoubleDash && !isGrouping(Opt)) + return nullptr; + return Opt; + } + SubCommand *LookupSubCommand(StringRef Name); +}; + +} // namespace + +static ManagedStatic<CommandLineParser> GlobalParser; + +void cl::AddLiteralOption(Option &O, StringRef Name) { + GlobalParser->addLiteralOption(O, Name); +} + +extrahelp::extrahelp(StringRef Help) : morehelp(Help) { + GlobalParser->MoreHelp.push_back(Help); +} + +void Option::addArgument() { + GlobalParser->addOption(this); + FullyInitialized = true; +} + +void Option::removeArgument() { GlobalParser->removeOption(this); } + +void Option::setArgStr(StringRef S) { + if (FullyInitialized) + GlobalParser->updateArgStr(this, S); + assert((S.empty() || S[0] != '-') && "Option can't start with '-"); + ArgStr = S; + if (ArgStr.size() == 1) + setMiscFlag(Grouping); +} + +void Option::addCategory(OptionCategory &C) { + assert(!Categories.empty() && "Categories cannot be empty."); + // Maintain backward compatibility by replacing the default GeneralCategory + // if it's still set. Otherwise, just add the new one. The GeneralCategory + // must be explicitly added if you want multiple categories that include it. + if (&C != &GeneralCategory && Categories[0] == &GeneralCategory) + Categories[0] = &C; + else if (find(Categories, &C) == Categories.end()) + Categories.push_back(&C); +} + +void Option::reset() { + NumOccurrences = 0; + setDefault(); + if (isDefaultOption()) + removeArgument(); +} + +// Initialise the general option category. +OptionCategory llvm::cl::GeneralCategory("General options"); + +void OptionCategory::registerCategory() { + GlobalParser->registerCategory(this); +} + +// A special subcommand representing no subcommand. It is particularly important +// that this ManagedStatic uses constant initailization and not dynamic +// initialization because it is referenced from cl::opt constructors, which run +// dynamically in an arbitrary order. +LLVM_REQUIRE_CONSTANT_INITIALIZATION +ManagedStatic<SubCommand> llvm::cl::TopLevelSubCommand; + +// A special subcommand that can be used to put an option into all subcommands. +ManagedStatic<SubCommand> llvm::cl::AllSubCommands; + +void SubCommand::registerSubCommand() { + GlobalParser->registerSubCommand(this); +} + +void SubCommand::unregisterSubCommand() { + GlobalParser->unregisterSubCommand(this); +} + +void SubCommand::reset() { + PositionalOpts.clear(); + SinkOpts.clear(); + OptionsMap.clear(); + + ConsumeAfterOpt = nullptr; +} + +SubCommand::operator bool() const { + return (GlobalParser->getActiveSubCommand() == this); +} + +//===----------------------------------------------------------------------===// +// Basic, shared command line option processing machinery. +// + +/// LookupOption - Lookup the option specified by the specified option on the +/// command line. If there is a value specified (after an equal sign) return +/// that as well. This assumes that leading dashes have already been stripped. +Option *CommandLineParser::LookupOption(SubCommand &Sub, StringRef &Arg, + StringRef &Value) { + // Reject all dashes. + if (Arg.empty()) + return nullptr; + assert(&Sub != &*AllSubCommands); + + size_t EqualPos = Arg.find('='); + + // If we have an equals sign, remember the value. + if (EqualPos == StringRef::npos) { + // Look up the option. + auto I = Sub.OptionsMap.find(Arg); + if (I == Sub.OptionsMap.end()) + return nullptr; + + return I != Sub.OptionsMap.end() ? I->second : nullptr; + } + + // If the argument before the = is a valid option name and the option allows + // non-prefix form (ie is not AlwaysPrefix), we match. If not, signal match + // failure by returning nullptr. + auto I = Sub.OptionsMap.find(Arg.substr(0, EqualPos)); + if (I == Sub.OptionsMap.end()) + return nullptr; + + auto O = I->second; + if (O->getFormattingFlag() == cl::AlwaysPrefix) + return nullptr; + + Value = Arg.substr(EqualPos + 1); + Arg = Arg.substr(0, EqualPos); + return I->second; +} + +SubCommand *CommandLineParser::LookupSubCommand(StringRef Name) { + if (Name.empty()) + return &*TopLevelSubCommand; + for (auto S : RegisteredSubCommands) { + if (S == &*AllSubCommands) + continue; + if (S->getName().empty()) + continue; + + if (StringRef(S->getName()) == StringRef(Name)) + return S; + } + return &*TopLevelSubCommand; +} + +/// LookupNearestOption - Lookup the closest match to the option specified by +/// the specified option on the command line. If there is a value specified +/// (after an equal sign) return that as well. This assumes that leading dashes +/// have already been stripped. +static Option *LookupNearestOption(StringRef Arg, + const StringMap<Option *> &OptionsMap, + std::string &NearestString) { + // Reject all dashes. + if (Arg.empty()) + return nullptr; + + // Split on any equal sign. + std::pair<StringRef, StringRef> SplitArg = Arg.split('='); + StringRef &LHS = SplitArg.first; // LHS == Arg when no '=' is present. + StringRef &RHS = SplitArg.second; + + // Find the closest match. + Option *Best = nullptr; + unsigned BestDistance = 0; + for (StringMap<Option *>::const_iterator it = OptionsMap.begin(), + ie = OptionsMap.end(); + it != ie; ++it) { + Option *O = it->second; + SmallVector<StringRef, 16> OptionNames; + O->getExtraOptionNames(OptionNames); + if (O->hasArgStr()) + OptionNames.push_back(O->ArgStr); + + bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed; + StringRef Flag = PermitValue ? LHS : Arg; + for (auto Name : OptionNames) { + unsigned Distance = StringRef(Name).edit_distance( + Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance); + if (!Best || Distance < BestDistance) { + Best = O; + BestDistance = Distance; + if (RHS.empty() || !PermitValue) + NearestString = Name; + else + NearestString = (Twine(Name) + "=" + RHS).str(); + } + } + } + + return Best; +} + +/// CommaSeparateAndAddOccurrence - A wrapper around Handler->addOccurrence() +/// that does special handling of cl::CommaSeparated options. +static bool CommaSeparateAndAddOccurrence(Option *Handler, unsigned pos, + StringRef ArgName, StringRef Value, + bool MultiArg = false) { + // Check to see if this option accepts a comma separated list of values. If + // it does, we have to split up the value into multiple values. + if (Handler->getMiscFlags() & CommaSeparated) { + StringRef Val(Value); + StringRef::size_type Pos = Val.find(','); + + while (Pos != StringRef::npos) { + // Process the portion before the comma. + if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg)) + return true; + // Erase the portion before the comma, AND the comma. + Val = Val.substr(Pos + 1); + // Check for another comma. + Pos = Val.find(','); + } + + Value = Val; + } + + return Handler->addOccurrence(pos, ArgName, Value, MultiArg); +} + +/// ProvideOption - For Value, this differentiates between an empty value ("") +/// and a null value (StringRef()). The later is accepted for arguments that +/// don't allow a value (-foo) the former is rejected (-foo=). +static inline bool ProvideOption(Option *Handler, StringRef ArgName, + StringRef Value, int argc, + const char *const *argv, int &i) { + // Is this a multi-argument option? + unsigned NumAdditionalVals = Handler->getNumAdditionalVals(); + + // Enforce value requirements + switch (Handler->getValueExpectedFlag()) { + case ValueRequired: + if (!Value.data()) { // No value specified? + // If no other argument or the option only supports prefix form, we + // cannot look at the next argument. + if (i + 1 >= argc || Handler->getFormattingFlag() == cl::AlwaysPrefix) + return Handler->error("requires a value!"); + // Steal the next argument, like for '-o filename' + assert(argv && "null check"); + Value = StringRef(argv[++i]); + } + break; + case ValueDisallowed: + if (NumAdditionalVals > 0) + return Handler->error("multi-valued option specified" + " with ValueDisallowed modifier!"); + + if (Value.data()) + return Handler->error("does not allow a value! '" + Twine(Value) + + "' specified."); + break; + case ValueOptional: + break; + } + + // If this isn't a multi-arg option, just run the handler. + if (NumAdditionalVals == 0) + return CommaSeparateAndAddOccurrence(Handler, i, ArgName, Value); + + // If it is, run the handle several times. + bool MultiArg = false; + + if (Value.data()) { + if (CommaSeparateAndAddOccurrence(Handler, i, ArgName, Value, MultiArg)) + return true; + --NumAdditionalVals; + MultiArg = true; + } + + while (NumAdditionalVals > 0) { + if (i + 1 >= argc) + return Handler->error("not enough values!"); + assert(argv && "null check"); + Value = StringRef(argv[++i]); + + if (CommaSeparateAndAddOccurrence(Handler, i, ArgName, Value, MultiArg)) + return true; + MultiArg = true; + --NumAdditionalVals; + } + return false; +} + +bool llvm::cl::ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { + int Dummy = i; + return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy); +} + +// getOptionPred - Check to see if there are any options that satisfy the +// specified predicate with names that are the prefixes in Name. This is +// checked by progressively stripping characters off of the name, checking to +// see if there options that satisfy the predicate. If we find one, return it, +// otherwise return null. +// +static Option *getOptionPred(StringRef Name, size_t &Length, + bool (*Pred)(const Option *), + const StringMap<Option *> &OptionsMap) { + StringMap<Option *>::const_iterator OMI = OptionsMap.find(Name); + if (OMI != OptionsMap.end() && !Pred(OMI->getValue())) + OMI = OptionsMap.end(); + + // Loop while we haven't found an option and Name still has at least two + // characters in it (so that the next iteration will not be the empty + // string. + while (OMI == OptionsMap.end() && Name.size() > 1) { + Name = Name.substr(0, Name.size() - 1); // Chop off the last character. + OMI = OptionsMap.find(Name); + if (OMI != OptionsMap.end() && !Pred(OMI->getValue())) + OMI = OptionsMap.end(); + } + + if (OMI != OptionsMap.end() && Pred(OMI->second)) { + Length = Name.size(); + return OMI->second; // Found one! + } + return nullptr; // No option found! +} + +/// HandlePrefixedOrGroupedOption - The specified argument string (which started +/// with at least one '-') does not fully match an available option. Check to +/// see if this is a prefix or grouped option. If so, split arg into output an +/// Arg/Value pair and return the Option to parse it with. +static Option * +HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, + bool &ErrorParsing, + const StringMap<Option *> &OptionsMap) { + if (Arg.size() == 1) + return nullptr; + + // Do the lookup! + size_t Length = 0; + Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap); + if (!PGOpt) + return nullptr; + + do { + StringRef MaybeValue = + (Length < Arg.size()) ? Arg.substr(Length) : StringRef(); + Arg = Arg.substr(0, Length); + assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt); + + // cl::Prefix options do not preserve '=' when used separately. + // The behavior for them with grouped options should be the same. + if (MaybeValue.empty() || PGOpt->getFormattingFlag() == cl::AlwaysPrefix || + (PGOpt->getFormattingFlag() == cl::Prefix && MaybeValue[0] != '=')) { + Value = MaybeValue; + return PGOpt; + } + + if (MaybeValue[0] == '=') { + Value = MaybeValue.substr(1); + return PGOpt; + } + + // This must be a grouped option. + assert(isGrouping(PGOpt) && "Broken getOptionPred!"); + + // Grouping options inside a group can't have values. + if (PGOpt->getValueExpectedFlag() == cl::ValueRequired) { + ErrorParsing |= PGOpt->error("may not occur within a group!"); + return nullptr; + } + + // Because the value for the option is not required, we don't need to pass + // argc/argv in. + int Dummy = 0; + ErrorParsing |= ProvideOption(PGOpt, Arg, StringRef(), 0, nullptr, Dummy); + + // Get the next grouping option. + Arg = MaybeValue; + PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap); + } while (PGOpt); + + // We could not find a grouping option in the remainder of Arg. + return nullptr; +} + +static bool RequiresValue(const Option *O) { + return O->getNumOccurrencesFlag() == cl::Required || + O->getNumOccurrencesFlag() == cl::OneOrMore; +} + +static bool EatsUnboundedNumberOfValues(const Option *O) { + return O->getNumOccurrencesFlag() == cl::ZeroOrMore || + O->getNumOccurrencesFlag() == cl::OneOrMore; +} + +static bool isWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\r' || C == '\n'; +} + +static bool isWhitespaceOrNull(char C) { + return isWhitespace(C) || C == '\0'; +} + +static bool isQuote(char C) { return C == '\"' || C == '\''; } + +void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, + SmallVectorImpl<const char *> &NewArgv, + bool MarkEOLs) { + SmallString<128> Token; + for (size_t I = 0, E = Src.size(); I != E; ++I) { + // Consume runs of whitespace. + if (Token.empty()) { + while (I != E && isWhitespace(Src[I])) { + // Mark the end of lines in response files + if (MarkEOLs && Src[I] == '\n') + NewArgv.push_back(nullptr); + ++I; + } + if (I == E) + break; + } + + char C = Src[I]; + + // Backslash escapes the next character. + if (I + 1 < E && C == '\\') { + ++I; // Skip the escape. + Token.push_back(Src[I]); + continue; + } + + // Consume a quoted string. + if (isQuote(C)) { + ++I; + while (I != E && Src[I] != C) { + // Backslash escapes the next character. + if (Src[I] == '\\' && I + 1 != E) + ++I; + Token.push_back(Src[I]); + ++I; + } + if (I == E) + break; + continue; + } + + // End the token if this is whitespace. + if (isWhitespace(C)) { + if (!Token.empty()) + NewArgv.push_back(Saver.save(StringRef(Token)).data()); + Token.clear(); + continue; + } + + // This is a normal character. Append it. + Token.push_back(C); + } + + // Append the last token after hitting EOF with no whitespace. + if (!Token.empty()) + NewArgv.push_back(Saver.save(StringRef(Token)).data()); + // Mark the end of response files + if (MarkEOLs) + NewArgv.push_back(nullptr); +} + +/// Backslashes are interpreted in a rather complicated way in the Windows-style +/// command line, because backslashes are used both to separate path and to +/// escape double quote. This method consumes runs of backslashes as well as the +/// following double quote if it's escaped. +/// +/// * If an even number of backslashes is followed by a double quote, one +/// backslash is output for every pair of backslashes, and the last double +/// quote remains unconsumed. The double quote will later be interpreted as +/// the start or end of a quoted string in the main loop outside of this +/// function. +/// +/// * If an odd number of backslashes is followed by a double quote, one +/// backslash is output for every pair of backslashes, and a double quote is +/// output for the last pair of backslash-double quote. The double quote is +/// consumed in this case. +/// +/// * Otherwise, backslashes are interpreted literally. +static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) { + size_t E = Src.size(); + int BackslashCount = 0; + // Skip the backslashes. + do { + ++I; + ++BackslashCount; + } while (I != E && Src[I] == '\\'); + + bool FollowedByDoubleQuote = (I != E && Src[I] == '"'); + if (FollowedByDoubleQuote) { + Token.append(BackslashCount / 2, '\\'); + if (BackslashCount % 2 == 0) + return I - 1; + Token.push_back('"'); + return I; + } + Token.append(BackslashCount, '\\'); + return I - 1; +} + +void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, + SmallVectorImpl<const char *> &NewArgv, + bool MarkEOLs) { + SmallString<128> Token; + + // This is a small state machine to consume characters until it reaches the + // end of the source string. + enum { INIT, UNQUOTED, QUOTED } State = INIT; + for (size_t I = 0, E = Src.size(); I != E; ++I) { + char C = Src[I]; + + // INIT state indicates that the current input index is at the start of + // the string or between tokens. + if (State == INIT) { + if (isWhitespaceOrNull(C)) { + // Mark the end of lines in response files + if (MarkEOLs && C == '\n') + NewArgv.push_back(nullptr); + continue; + } + if (C == '"') { + State = QUOTED; + continue; + } + if (C == '\\') { + I = parseBackslash(Src, I, Token); + State = UNQUOTED; + continue; + } + Token.push_back(C); + State = UNQUOTED; + continue; + } + + // UNQUOTED state means that it's reading a token not quoted by double + // quotes. + if (State == UNQUOTED) { + // Whitespace means the end of the token. + if (isWhitespaceOrNull(C)) { + NewArgv.push_back(Saver.save(StringRef(Token)).data()); + Token.clear(); + State = INIT; + // Mark the end of lines in response files + if (MarkEOLs && C == '\n') + NewArgv.push_back(nullptr); + continue; + } + if (C == '"') { + State = QUOTED; + continue; + } + if (C == '\\') { + I = parseBackslash(Src, I, Token); + continue; + } + Token.push_back(C); + continue; + } + + // QUOTED state means that it's reading a token quoted by double quotes. + if (State == QUOTED) { + if (C == '"') { + if (I < (E - 1) && Src[I + 1] == '"') { + // Consecutive double-quotes inside a quoted string implies one + // double-quote. + Token.push_back('"'); + I = I + 1; + continue; + } + State = UNQUOTED; + continue; + } + if (C == '\\') { + I = parseBackslash(Src, I, Token); + continue; + } + Token.push_back(C); + } + } + // Append the last token after hitting EOF with no whitespace. + if (!Token.empty()) + NewArgv.push_back(Saver.save(StringRef(Token)).data()); + // Mark the end of response files + if (MarkEOLs) + NewArgv.push_back(nullptr); +} + +void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver, + SmallVectorImpl<const char *> &NewArgv, + bool MarkEOLs) { + for (const char *Cur = Source.begin(); Cur != Source.end();) { + SmallString<128> Line; + // Check for comment line. + if (isWhitespace(*Cur)) { + while (Cur != Source.end() && isWhitespace(*Cur)) + ++Cur; + continue; + } + if (*Cur == '#') { + while (Cur != Source.end() && *Cur != '\n') + ++Cur; + continue; + } + // Find end of the current line. + const char *Start = Cur; + for (const char *End = Source.end(); Cur != End; ++Cur) { + if (*Cur == '\\') { + if (Cur + 1 != End) { + ++Cur; + if (*Cur == '\n' || + (*Cur == '\r' && (Cur + 1 != End) && Cur[1] == '\n')) { + Line.append(Start, Cur - 1); + if (*Cur == '\r') + ++Cur; + Start = Cur + 1; + } + } + } else if (*Cur == '\n') + break; + } + // Tokenize line. + Line.append(Start, Cur); + cl::TokenizeGNUCommandLine(Line, Saver, NewArgv, MarkEOLs); + } +} + +// It is called byte order marker but the UTF-8 BOM is actually not affected +// by the host system's endianness. +static bool hasUTF8ByteOrderMark(ArrayRef<char> S) { + return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf'); +} + +static bool ExpandResponseFile(StringRef FName, StringSaver &Saver, + TokenizerCallback Tokenizer, + SmallVectorImpl<const char *> &NewArgv, + bool MarkEOLs, bool RelativeNames) { + ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr = + MemoryBuffer::getFile(FName); + if (!MemBufOrErr) + return false; + MemoryBuffer &MemBuf = *MemBufOrErr.get(); + StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); + + // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. + ArrayRef<char> BufRef(MemBuf.getBufferStart(), MemBuf.getBufferEnd()); + std::string UTF8Buf; + if (hasUTF16ByteOrderMark(BufRef)) { + if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) + return false; + Str = StringRef(UTF8Buf); + } + // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove + // these bytes before parsing. + // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark + else if (hasUTF8ByteOrderMark(BufRef)) + Str = StringRef(BufRef.data() + 3, BufRef.size() - 3); + + // Tokenize the contents into NewArgv. + Tokenizer(Str, Saver, NewArgv, MarkEOLs); + + // If names of nested response files should be resolved relative to including + // file, replace the included response file names with their full paths + // obtained by required resolution. + if (RelativeNames) + for (unsigned I = 0; I < NewArgv.size(); ++I) + if (NewArgv[I]) { + StringRef Arg = NewArgv[I]; + if (Arg.front() == '@') { + StringRef FileName = Arg.drop_front(); + if (llvm::sys::path::is_relative(FileName)) { + SmallString<128> ResponseFile; + ResponseFile.append(1, '@'); + if (llvm::sys::path::is_relative(FName)) { + SmallString<128> curr_dir; + llvm::sys::fs::current_path(curr_dir); + ResponseFile.append(curr_dir.str()); + } + llvm::sys::path::append( + ResponseFile, llvm::sys::path::parent_path(FName), FileName); + NewArgv[I] = Saver.save(ResponseFile.c_str()).data(); + } + } + } + + return true; +} + +/// Expand response files on a command line recursively using the given +/// StringSaver and tokenization strategy. +bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, + SmallVectorImpl<const char *> &Argv, + bool MarkEOLs, bool RelativeNames) { + bool AllExpanded = true; + struct ResponseFileRecord { + const char *File; + size_t End; + }; + + // To detect recursive response files, we maintain a stack of files and the + // position of the last argument in the file. This position is updated + // dynamically as we recursively expand files. + SmallVector<ResponseFileRecord, 3> FileStack; + + // Push a dummy entry that represents the initial command line, removing + // the need to check for an empty list. + FileStack.push_back({"", Argv.size()}); + + // Don't cache Argv.size() because it can change. + for (unsigned I = 0; I != Argv.size();) { + while (I == FileStack.back().End) { + // Passing the end of a file's argument list, so we can remove it from the + // stack. + FileStack.pop_back(); + } + + const char *Arg = Argv[I]; + // Check if it is an EOL marker + if (Arg == nullptr) { + ++I; + continue; + } + + if (Arg[0] != '@') { + ++I; + continue; + } + + const char *FName = Arg + 1; + auto IsEquivalent = [FName](const ResponseFileRecord &RFile) { + return sys::fs::equivalent(RFile.File, FName); + }; + + // Check for recursive response files. + if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) { + // This file is recursive, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + // Replace this response file argument with the tokenization of its + // contents. Nested response files are expanded in subsequent iterations. + SmallVector<const char *, 0> ExpandedArgv; + if (!ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs, + RelativeNames)) { + // We couldn't read this file, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + for (ResponseFileRecord &Record : FileStack) { + // Increase the end of all active records by the number of newly expanded + // arguments, minus the response file itself. + Record.End += ExpandedArgv.size() - 1; + } + + FileStack.push_back({FName, I + ExpandedArgv.size()}); + Argv.erase(Argv.begin() + I); + Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); + } + + // If successful, the top of the file stack will mark the end of the Argv + // stream. A failure here indicates a bug in the stack popping logic above. + // Note that FileStack may have more than one element at this point because we + // don't have a chance to pop the stack when encountering recursive files at + // the end of the stream, so seeing that doesn't indicate a bug. + assert(FileStack.size() > 0 && Argv.size() == FileStack.back().End); + return AllExpanded; +} + +bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, + SmallVectorImpl<const char *> &Argv) { + if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true)) + return false; + return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true); +} + +/// ParseEnvironmentOptions - An alternative entry point to the +/// CommandLine library, which allows you to read the program's name +/// from the caller (as PROGNAME) and its command-line arguments from +/// an environment variable (whose name is given in ENVVAR). +/// +void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, + const char *Overview) { + // Check args. + assert(progName && "Program name not specified"); + assert(envVar && "Environment variable name missing"); + + // Get the environment variable they want us to parse options out of. + llvm::Optional<std::string> envValue = sys::Process::GetEnv(StringRef(envVar)); + if (!envValue) + return; + + // Get program's "name", which we wouldn't know without the caller + // telling us. + SmallVector<const char *, 20> newArgv; + BumpPtrAllocator A; + StringSaver Saver(A); + newArgv.push_back(Saver.save(progName).data()); + + // Parse the value of the environment variable into a "command line" + // and hand it off to ParseCommandLineOptions(). + TokenizeGNUCommandLine(*envValue, Saver, newArgv); + int newArgc = static_cast<int>(newArgv.size()); + ParseCommandLineOptions(newArgc, &newArgv[0], StringRef(Overview)); +} + +bool cl::ParseCommandLineOptions(int argc, const char *const *argv, + StringRef Overview, raw_ostream *Errs, + const char *EnvVar, + bool LongOptionsUseDoubleDash) { + SmallVector<const char *, 20> NewArgv; + BumpPtrAllocator A; + StringSaver Saver(A); + NewArgv.push_back(argv[0]); + + // Parse options from environment variable. + if (EnvVar) { + if (llvm::Optional<std::string> EnvValue = + sys::Process::GetEnv(StringRef(EnvVar))) + TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv); + } + + // Append options from command line. + for (int I = 1; I < argc; ++I) + NewArgv.push_back(argv[I]); + int NewArgc = static_cast<int>(NewArgv.size()); + + // Parse all options. + return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview, + Errs, LongOptionsUseDoubleDash); +} + +void CommandLineParser::ResetAllOptionOccurrences() { + // So that we can parse different command lines multiple times in succession + // we reset all option values to look like they have never been seen before. + for (auto SC : RegisteredSubCommands) { + for (auto &O : SC->OptionsMap) + O.second->reset(); + } +} + +bool CommandLineParser::ParseCommandLineOptions(int argc, + const char *const *argv, + StringRef Overview, + raw_ostream *Errs, + bool LongOptionsUseDoubleDash) { + assert(hasOptions() && "No options specified!"); + + // Expand response files. + SmallVector<const char *, 20> newArgv(argv, argv + argc); + BumpPtrAllocator A; + StringSaver Saver(A); + ExpandResponseFiles(Saver, + Triple(sys::getProcessTriple()).isOSWindows() ? + cl::TokenizeWindowsCommandLine : cl::TokenizeGNUCommandLine, + newArgv); + argv = &newArgv[0]; + argc = static_cast<int>(newArgv.size()); + + // Copy the program name into ProgName, making sure not to overflow it. + ProgramName = sys::path::filename(StringRef(argv[0])); + + ProgramOverview = Overview; + bool IgnoreErrors = Errs; + if (!Errs) + Errs = &errs(); + bool ErrorParsing = false; + + // Check out the positional arguments to collect information about them. + unsigned NumPositionalRequired = 0; + + // Determine whether or not there are an unlimited number of positionals + bool HasUnlimitedPositionals = false; + + int FirstArg = 1; + SubCommand *ChosenSubCommand = &*TopLevelSubCommand; + if (argc >= 2 && argv[FirstArg][0] != '-') { + // If the first argument specifies a valid subcommand, start processing + // options from the second argument. + ChosenSubCommand = LookupSubCommand(StringRef(argv[FirstArg])); + if (ChosenSubCommand != &*TopLevelSubCommand) + FirstArg = 2; + } + GlobalParser->ActiveSubCommand = ChosenSubCommand; + + assert(ChosenSubCommand); + auto &ConsumeAfterOpt = ChosenSubCommand->ConsumeAfterOpt; + auto &PositionalOpts = ChosenSubCommand->PositionalOpts; + auto &SinkOpts = ChosenSubCommand->SinkOpts; + auto &OptionsMap = ChosenSubCommand->OptionsMap; + + for (auto O: DefaultOptions) { + addOption(O, true); + } + + if (ConsumeAfterOpt) { + assert(PositionalOpts.size() > 0 && + "Cannot specify cl::ConsumeAfter without a positional argument!"); + } + if (!PositionalOpts.empty()) { + + // Calculate how many positional values are _required_. + bool UnboundedFound = false; + for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) { + Option *Opt = PositionalOpts[i]; + if (RequiresValue(Opt)) + ++NumPositionalRequired; + else if (ConsumeAfterOpt) { + // ConsumeAfter cannot be combined with "optional" positional options + // unless there is only one positional argument... + if (PositionalOpts.size() > 1) { + if (!IgnoreErrors) + Opt->error("error - this positional option will never be matched, " + "because it does not Require a value, and a " + "cl::ConsumeAfter option is active!"); + ErrorParsing = true; + } + } else if (UnboundedFound && !Opt->hasArgStr()) { + // This option does not "require" a value... Make sure this option is + // not specified after an option that eats all extra arguments, or this + // one will never get any! + // + if (!IgnoreErrors) + Opt->error("error - option can never match, because " + "another positional argument will match an " + "unbounded number of values, and this option" + " does not require a value!"); + *Errs << ProgramName << ": CommandLine Error: Option '" << Opt->ArgStr + << "' is all messed up!\n"; + *Errs << PositionalOpts.size(); + ErrorParsing = true; + } + UnboundedFound |= EatsUnboundedNumberOfValues(Opt); + } + HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt; + } + + // PositionalVals - A vector of "positional" arguments we accumulate into + // the process at the end. + // + SmallVector<std::pair<StringRef, unsigned>, 4> PositionalVals; + + // If the program has named positional arguments, and the name has been run + // across, keep track of which positional argument was named. Otherwise put + // the positional args into the PositionalVals list... + Option *ActivePositionalArg = nullptr; + + // Loop over all of the arguments... processing them. + bool DashDashFound = false; // Have we read '--'? + for (int i = FirstArg; i < argc; ++i) { + Option *Handler = nullptr; + Option *NearestHandler = nullptr; + std::string NearestHandlerString; + StringRef Value; + StringRef ArgName = ""; + bool HaveDoubleDash = false; + + // Check to see if this is a positional argument. This argument is + // considered to be positional if it doesn't start with '-', if it is "-" + // itself, or if we have seen "--" already. + // + if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) { + // Positional argument! + if (ActivePositionalArg) { + ProvidePositionalOption(ActivePositionalArg, StringRef(argv[i]), i); + continue; // We are done! + } + + if (!PositionalOpts.empty()) { + PositionalVals.push_back(std::make_pair(StringRef(argv[i]), i)); + + // All of the positional arguments have been fulfulled, give the rest to + // the consume after option... if it's specified... + // + if (PositionalVals.size() >= NumPositionalRequired && ConsumeAfterOpt) { + for (++i; i < argc; ++i) + PositionalVals.push_back(std::make_pair(StringRef(argv[i]), i)); + break; // Handle outside of the argument processing loop... + } + + // Delay processing positional arguments until the end... + continue; + } + } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 && + !DashDashFound) { + DashDashFound = true; // This is the mythical "--"? + continue; // Don't try to process it as an argument itself. + } else if (ActivePositionalArg && + (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) { + // If there is a positional argument eating options, check to see if this + // option is another positional argument. If so, treat it as an argument, + // otherwise feed it to the eating positional. + ArgName = StringRef(argv[i] + 1); + // Eat second dash. + if (!ArgName.empty() && ArgName[0] == '-') { + HaveDoubleDash = true; + ArgName = ArgName.substr(1); + } + + Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value, + LongOptionsUseDoubleDash, HaveDoubleDash); + if (!Handler || Handler->getFormattingFlag() != cl::Positional) { + ProvidePositionalOption(ActivePositionalArg, StringRef(argv[i]), i); + continue; // We are done! + } + } else { // We start with a '-', must be an argument. + ArgName = StringRef(argv[i] + 1); + // Eat second dash. + if (!ArgName.empty() && ArgName[0] == '-') { + HaveDoubleDash = true; + ArgName = ArgName.substr(1); + } + + Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value, + LongOptionsUseDoubleDash, HaveDoubleDash); + + // Check to see if this "option" is really a prefixed or grouped argument. + if (!Handler && !(LongOptionsUseDoubleDash && HaveDoubleDash)) + Handler = HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing, + OptionsMap); + + // Otherwise, look for the closest available option to report to the user + // in the upcoming error. + if (!Handler && SinkOpts.empty()) + NearestHandler = + LookupNearestOption(ArgName, OptionsMap, NearestHandlerString); + } + + if (!Handler) { + if (SinkOpts.empty()) { + *Errs << ProgramName << ": Unknown command line argument '" << argv[i] + << "'. Try: '" << argv[0] << " --help'\n"; + + if (NearestHandler) { + // If we know a near match, report it as well. + *Errs << ProgramName << ": Did you mean '" + << PrintArg(NearestHandlerString) << "'?\n"; + } + + ErrorParsing = true; + } else { + for (SmallVectorImpl<Option *>::iterator I = SinkOpts.begin(), + E = SinkOpts.end(); + I != E; ++I) + (*I)->addOccurrence(i, "", StringRef(argv[i])); + } + continue; + } + + // If this is a named positional argument, just remember that it is the + // active one... + if (Handler->getFormattingFlag() == cl::Positional) { + if ((Handler->getMiscFlags() & PositionalEatsArgs) && !Value.empty()) { + Handler->error("This argument does not take a value.\n" + "\tInstead, it consumes any positional arguments until " + "the next recognized option.", *Errs); + ErrorParsing = true; + } + ActivePositionalArg = Handler; + } + else + ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i); + } + + // Check and handle positional arguments now... + if (NumPositionalRequired > PositionalVals.size()) { + *Errs << ProgramName + << ": Not enough positional command line arguments specified!\n" + << "Must specify at least " << NumPositionalRequired + << " positional argument" << (NumPositionalRequired > 1 ? "s" : "") + << ": See: " << argv[0] << " --help\n"; + + ErrorParsing = true; + } else if (!HasUnlimitedPositionals && + PositionalVals.size() > PositionalOpts.size()) { + *Errs << ProgramName << ": Too many positional arguments specified!\n" + << "Can specify at most " << PositionalOpts.size() + << " positional arguments: See: " << argv[0] << " --help\n"; + ErrorParsing = true; + + } else if (!ConsumeAfterOpt) { + // Positional args have already been handled if ConsumeAfter is specified. + unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size()); + for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) { + if (RequiresValue(PositionalOpts[i])) { + ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + --NumPositionalRequired; // We fulfilled our duty... + } + + // If we _can_ give this option more arguments, do so now, as long as we + // do not give it values that others need. 'Done' controls whether the + // option even _WANTS_ any more. + // + bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required; + while (NumVals - ValNo > NumPositionalRequired && !Done) { + switch (PositionalOpts[i]->getNumOccurrencesFlag()) { + case cl::Optional: + Done = true; // Optional arguments want _at most_ one value + LLVM_FALLTHROUGH; + case cl::ZeroOrMore: // Zero or more will take all they can get... + case cl::OneOrMore: // One or more will take all they can get... + ProvidePositionalOption(PositionalOpts[i], + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + break; + default: + llvm_unreachable("Internal error, unexpected NumOccurrences flag in " + "positional argument processing!"); + } + } + } + } else { + assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size()); + unsigned ValNo = 0; + for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j) + if (RequiresValue(PositionalOpts[j])) { + ErrorParsing |= ProvidePositionalOption(PositionalOpts[j], + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + } + + // Handle the case where there is just one positional option, and it's + // optional. In this case, we want to give JUST THE FIRST option to the + // positional option and keep the rest for the consume after. The above + // loop would have assigned no values to positional options in this case. + // + if (PositionalOpts.size() == 1 && ValNo == 0 && !PositionalVals.empty()) { + ErrorParsing |= ProvidePositionalOption(PositionalOpts[0], + PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + ValNo++; + } + + // Handle over all of the rest of the arguments to the + // cl::ConsumeAfter command line option... + for (; ValNo != PositionalVals.size(); ++ValNo) + ErrorParsing |= + ProvidePositionalOption(ConsumeAfterOpt, PositionalVals[ValNo].first, + PositionalVals[ValNo].second); + } + + // Loop over args and make sure all required args are specified! + for (const auto &Opt : OptionsMap) { + switch (Opt.second->getNumOccurrencesFlag()) { + case Required: + case OneOrMore: + if (Opt.second->getNumOccurrences() == 0) { + Opt.second->error("must be specified at least once!"); + ErrorParsing = true; + } + LLVM_FALLTHROUGH; + default: + break; + } + } + + // Now that we know if -debug is specified, we can use it. + // Note that if ReadResponseFiles == true, this must be done before the + // memory allocated for the expanded command line is free()d below. + LLVM_DEBUG(dbgs() << "Args: "; + for (int i = 0; i < argc; ++i) dbgs() << argv[i] << ' '; + dbgs() << '\n';); + + // Free all of the memory allocated to the map. Command line options may only + // be processed once! + MoreHelp.clear(); + + // If we had an error processing our arguments, don't let the program execute + if (ErrorParsing) { + if (!IgnoreErrors) + exit(1); + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// Option Base class implementation +// + +bool Option::error(const Twine &Message, StringRef ArgName, raw_ostream &Errs) { + if (!ArgName.data()) + ArgName = ArgStr; + if (ArgName.empty()) + Errs << HelpStr; // Be nice for positional arguments + else + Errs << GlobalParser->ProgramName << ": for the " << PrintArg(ArgName); + + Errs << " option: " << Message << "\n"; + return true; +} + +bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value, + bool MultiArg) { + if (!MultiArg) + NumOccurrences++; // Increment the number of times we have been seen + + switch (getNumOccurrencesFlag()) { + case Optional: + if (NumOccurrences > 1) + return error("may only occur zero or one times!", ArgName); + break; + case Required: + if (NumOccurrences > 1) + return error("must occur exactly one time!", ArgName); + LLVM_FALLTHROUGH; + case OneOrMore: + case ZeroOrMore: + case ConsumeAfter: + break; + } + + return handleOccurrence(pos, ArgName, Value); +} + +// getValueStr - Get the value description string, using "DefaultMsg" if nothing +// has been specified yet. +// +static StringRef getValueStr(const Option &O, StringRef DefaultMsg) { + if (O.ValueStr.empty()) + return DefaultMsg; + return O.ValueStr; +} + +//===----------------------------------------------------------------------===// +// cl::alias class implementation +// + +// Return the width of the option tag for printing... +size_t alias::getOptionWidth() const { + return argPlusPrefixesSize(ArgStr); +} + +void Option::printHelpStr(StringRef HelpStr, size_t Indent, + size_t FirstLineIndentedBy) { + assert(Indent >= FirstLineIndentedBy); + std::pair<StringRef, StringRef> Split = HelpStr.split('\n'); + outs().indent(Indent - FirstLineIndentedBy) + << ArgHelpPrefix << Split.first << "\n"; + while (!Split.second.empty()) { + Split = Split.second.split('\n'); + outs().indent(Indent) << Split.first << "\n"; + } +} + +// Print out the option for the alias. +void alias::printOptionInfo(size_t GlobalWidth) const { + outs() << PrintArg(ArgStr); + printHelpStr(HelpStr, GlobalWidth, argPlusPrefixesSize(ArgStr)); +} + +//===----------------------------------------------------------------------===// +// Parser Implementation code... +// + +// basic_parser implementation +// + +// Return the width of the option tag for printing... +size_t basic_parser_impl::getOptionWidth(const Option &O) const { + size_t Len = argPlusPrefixesSize(O.ArgStr); + auto ValName = getValueName(); + if (!ValName.empty()) { + size_t FormattingLen = 3; + if (O.getMiscFlags() & PositionalEatsArgs) + FormattingLen = 6; + Len += getValueStr(O, ValName).size() + FormattingLen; + } + + return Len; +} + +// printOptionInfo - Print out information about this option. The +// to-be-maintained width is specified. +// +void basic_parser_impl::printOptionInfo(const Option &O, + size_t GlobalWidth) const { + outs() << PrintArg(O.ArgStr); + + auto ValName = getValueName(); + if (!ValName.empty()) { + if (O.getMiscFlags() & PositionalEatsArgs) { + outs() << " <" << getValueStr(O, ValName) << ">..."; + } else { + outs() << "=<" << getValueStr(O, ValName) << '>'; + } + } + + Option::printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O)); +} + +void basic_parser_impl::printOptionName(const Option &O, + size_t GlobalWidth) const { + outs() << PrintArg(O.ArgStr); + outs().indent(GlobalWidth - O.ArgStr.size()); +} + +// parser<bool> implementation +// +bool parser<bool>::parse(Option &O, StringRef ArgName, StringRef Arg, + bool &Value) { + if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || + Arg == "1") { + Value = true; + return false; + } + + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { + Value = false; + return false; + } + return O.error("'" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); +} + +// parser<boolOrDefault> implementation +// +bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName, StringRef Arg, + boolOrDefault &Value) { + if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || + Arg == "1") { + Value = BOU_TRUE; + return false; + } + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { + Value = BOU_FALSE; + return false; + } + + return O.error("'" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); +} + +// parser<int> implementation +// +bool parser<int>::parse(Option &O, StringRef ArgName, StringRef Arg, + int &Value) { + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for integer argument!"); + return false; +} + +// parser<unsigned> implementation +// +bool parser<unsigned>::parse(Option &O, StringRef ArgName, StringRef Arg, + unsigned &Value) { + + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for uint argument!"); + return false; +} + +// parser<unsigned long> implementation +// +bool parser<unsigned long>::parse(Option &O, StringRef ArgName, StringRef Arg, + unsigned long &Value) { + + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for ulong argument!"); + return false; +} + +// parser<unsigned long long> implementation +// +bool parser<unsigned long long>::parse(Option &O, StringRef ArgName, + StringRef Arg, + unsigned long long &Value) { + + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for ullong argument!"); + return false; +} + +// parser<double>/parser<float> implementation +// +static bool parseDouble(Option &O, StringRef Arg, double &Value) { + if (to_float(Arg, Value)) + return false; + return O.error("'" + Arg + "' value invalid for floating point argument!"); +} + +bool parser<double>::parse(Option &O, StringRef ArgName, StringRef Arg, + double &Val) { + return parseDouble(O, Arg, Val); +} + +bool parser<float>::parse(Option &O, StringRef ArgName, StringRef Arg, + float &Val) { + double dVal; + if (parseDouble(O, Arg, dVal)) + return true; + Val = (float)dVal; + return false; +} + +// generic_parser_base implementation +// + +// findOption - Return the option number corresponding to the specified +// argument string. If the option is not found, getNumOptions() is returned. +// +unsigned generic_parser_base::findOption(StringRef Name) { + unsigned e = getNumOptions(); + + for (unsigned i = 0; i != e; ++i) { + if (getOption(i) == Name) + return i; + } + return e; +} + +static StringRef EqValue = "=<value>"; +static StringRef EmptyOption = "<empty>"; +static StringRef OptionPrefix = " ="; +static size_t OptionPrefixesSize = OptionPrefix.size() + ArgHelpPrefix.size(); + +static bool shouldPrintOption(StringRef Name, StringRef Description, + const Option &O) { + return O.getValueExpectedFlag() != ValueOptional || !Name.empty() || + !Description.empty(); +} + +// Return the width of the option tag for printing... +size_t generic_parser_base::getOptionWidth(const Option &O) const { + if (O.hasArgStr()) { + size_t Size = + argPlusPrefixesSize(O.ArgStr) + EqValue.size(); + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { + StringRef Name = getOption(i); + if (!shouldPrintOption(Name, getDescription(i), O)) + continue; + size_t NameSize = Name.empty() ? EmptyOption.size() : Name.size(); + Size = std::max(Size, NameSize + OptionPrefixesSize); + } + return Size; + } else { + size_t BaseSize = 0; + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) + BaseSize = std::max(BaseSize, getOption(i).size() + 8); + return BaseSize; + } +} + +// printOptionInfo - Print out information about this option. The +// to-be-maintained width is specified. +// +void generic_parser_base::printOptionInfo(const Option &O, + size_t GlobalWidth) const { + if (O.hasArgStr()) { + // When the value is optional, first print a line just describing the + // option without values. + if (O.getValueExpectedFlag() == ValueOptional) { + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { + if (getOption(i).empty()) { + outs() << PrintArg(O.ArgStr); + Option::printHelpStr(O.HelpStr, GlobalWidth, + argPlusPrefixesSize(O.ArgStr)); + break; + } + } + } + + outs() << PrintArg(O.ArgStr) << EqValue; + Option::printHelpStr(O.HelpStr, GlobalWidth, + EqValue.size() + + argPlusPrefixesSize(O.ArgStr)); + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { + StringRef OptionName = getOption(i); + StringRef Description = getDescription(i); + if (!shouldPrintOption(OptionName, Description, O)) + continue; + assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize); + size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize; + outs() << OptionPrefix << OptionName; + if (OptionName.empty()) { + outs() << EmptyOption; + assert(NumSpaces >= EmptyOption.size()); + NumSpaces -= EmptyOption.size(); + } + if (!Description.empty()) + outs().indent(NumSpaces) << ArgHelpPrefix << " " << Description; + outs() << '\n'; + } + } else { + if (!O.HelpStr.empty()) + outs() << " " << O.HelpStr << '\n'; + for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { + StringRef Option = getOption(i); + outs() << " " << PrintArg(Option); + Option::printHelpStr(getDescription(i), GlobalWidth, Option.size() + 8); + } + } +} + +static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff + +// printGenericOptionDiff - Print the value of this option and it's default. +// +// "Generic" options have each value mapped to a name. +void generic_parser_base::printGenericOptionDiff( + const Option &O, const GenericOptionValue &Value, + const GenericOptionValue &Default, size_t GlobalWidth) const { + outs() << " " << PrintArg(O.ArgStr); + outs().indent(GlobalWidth - O.ArgStr.size()); + + unsigned NumOpts = getNumOptions(); + for (unsigned i = 0; i != NumOpts; ++i) { + if (Value.compare(getOptionValue(i))) + continue; + + outs() << "= " << getOption(i); + size_t L = getOption(i).size(); + size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0; + outs().indent(NumSpaces) << " (default: "; + for (unsigned j = 0; j != NumOpts; ++j) { + if (Default.compare(getOptionValue(j))) + continue; + outs() << getOption(j); + break; + } + outs() << ")\n"; + return; + } + outs() << "= *unknown option value*\n"; +} + +// printOptionDiff - Specializations for printing basic value types. +// +#define PRINT_OPT_DIFF(T) \ + void parser<T>::printOptionDiff(const Option &O, T V, OptionValue<T> D, \ + size_t GlobalWidth) const { \ + printOptionName(O, GlobalWidth); \ + std::string Str; \ + { \ + raw_string_ostream SS(Str); \ + SS << V; \ + } \ + outs() << "= " << Str; \ + size_t NumSpaces = \ + MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0; \ + outs().indent(NumSpaces) << " (default: "; \ + if (D.hasValue()) \ + outs() << D.getValue(); \ + else \ + outs() << "*no default*"; \ + outs() << ")\n"; \ + } + +PRINT_OPT_DIFF(bool) +PRINT_OPT_DIFF(boolOrDefault) +PRINT_OPT_DIFF(int) +PRINT_OPT_DIFF(unsigned) +PRINT_OPT_DIFF(unsigned long) +PRINT_OPT_DIFF(unsigned long long) +PRINT_OPT_DIFF(double) +PRINT_OPT_DIFF(float) +PRINT_OPT_DIFF(char) + +void parser<std::string>::printOptionDiff(const Option &O, StringRef V, + const OptionValue<std::string> &D, + size_t GlobalWidth) const { + printOptionName(O, GlobalWidth); + outs() << "= " << V; + size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0; + outs().indent(NumSpaces) << " (default: "; + if (D.hasValue()) + outs() << D.getValue(); + else + outs() << "*no default*"; + outs() << ")\n"; +} + +// Print a placeholder for options that don't yet support printOptionDiff(). +void basic_parser_impl::printOptionNoValue(const Option &O, + size_t GlobalWidth) const { + printOptionName(O, GlobalWidth); + outs() << "= *cannot print option value*\n"; +} + +//===----------------------------------------------------------------------===// +// -help and -help-hidden option implementation +// + +static int OptNameCompare(const std::pair<const char *, Option *> *LHS, + const std::pair<const char *, Option *> *RHS) { + return strcmp(LHS->first, RHS->first); +} + +static int SubNameCompare(const std::pair<const char *, SubCommand *> *LHS, + const std::pair<const char *, SubCommand *> *RHS) { + return strcmp(LHS->first, RHS->first); +} + +// Copy Options into a vector so we can sort them as we like. +static void sortOpts(StringMap<Option *> &OptMap, + SmallVectorImpl<std::pair<const char *, Option *>> &Opts, + bool ShowHidden) { + SmallPtrSet<Option *, 32> OptionSet; // Duplicate option detection. + + for (StringMap<Option *>::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) { + // Ignore really-hidden options. + if (I->second->getOptionHiddenFlag() == ReallyHidden) + continue; + + // Unless showhidden is set, ignore hidden flags. + if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) + continue; + + // If we've already seen this option, don't add it to the list again. + if (!OptionSet.insert(I->second).second) + continue; + + Opts.push_back( + std::pair<const char *, Option *>(I->getKey().data(), I->second)); + } + + // Sort the options list alphabetically. + array_pod_sort(Opts.begin(), Opts.end(), OptNameCompare); +} + +static void +sortSubCommands(const SmallPtrSetImpl<SubCommand *> &SubMap, + SmallVectorImpl<std::pair<const char *, SubCommand *>> &Subs) { + for (const auto &S : SubMap) { + if (S->getName().empty()) + continue; + Subs.push_back(std::make_pair(S->getName().data(), S)); + } + array_pod_sort(Subs.begin(), Subs.end(), SubNameCompare); +} + +namespace { + +class HelpPrinter { +protected: + const bool ShowHidden; + typedef SmallVector<std::pair<const char *, Option *>, 128> + StrOptionPairVector; + typedef SmallVector<std::pair<const char *, SubCommand *>, 128> + StrSubCommandPairVector; + // Print the options. Opts is assumed to be alphabetically sorted. + virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) { + for (size_t i = 0, e = Opts.size(); i != e; ++i) + Opts[i].second->printOptionInfo(MaxArgLen); + } + + void printSubCommands(StrSubCommandPairVector &Subs, size_t MaxSubLen) { + for (const auto &S : Subs) { + outs() << " " << S.first; + if (!S.second->getDescription().empty()) { + outs().indent(MaxSubLen - strlen(S.first)); + outs() << " - " << S.second->getDescription(); + } + outs() << "\n"; + } + } + +public: + explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {} + virtual ~HelpPrinter() {} + + // Invoke the printer. + void operator=(bool Value) { + if (!Value) + return; + printHelp(); + + // Halt the program since help information was printed + exit(0); + } + + void printHelp() { + SubCommand *Sub = GlobalParser->getActiveSubCommand(); + auto &OptionsMap = Sub->OptionsMap; + auto &PositionalOpts = Sub->PositionalOpts; + auto &ConsumeAfterOpt = Sub->ConsumeAfterOpt; + + StrOptionPairVector Opts; + sortOpts(OptionsMap, Opts, ShowHidden); + + StrSubCommandPairVector Subs; + sortSubCommands(GlobalParser->RegisteredSubCommands, Subs); + + if (!GlobalParser->ProgramOverview.empty()) + outs() << "OVERVIEW: " << GlobalParser->ProgramOverview << "\n"; + + if (Sub == &*TopLevelSubCommand) { + outs() << "USAGE: " << GlobalParser->ProgramName; + if (Subs.size() > 2) + outs() << " [subcommand]"; + outs() << " [options]"; + } else { + if (!Sub->getDescription().empty()) { + outs() << "SUBCOMMAND '" << Sub->getName() + << "': " << Sub->getDescription() << "\n\n"; + } + outs() << "USAGE: " << GlobalParser->ProgramName << " " << Sub->getName() + << " [options]"; + } + + for (auto Opt : PositionalOpts) { + if (Opt->hasArgStr()) + outs() << " --" << Opt->ArgStr; + outs() << " " << Opt->HelpStr; + } + + // Print the consume after option info if it exists... + if (ConsumeAfterOpt) + outs() << " " << ConsumeAfterOpt->HelpStr; + + if (Sub == &*TopLevelSubCommand && !Subs.empty()) { + // Compute the maximum subcommand length... + size_t MaxSubLen = 0; + for (size_t i = 0, e = Subs.size(); i != e; ++i) + MaxSubLen = std::max(MaxSubLen, strlen(Subs[i].first)); + + outs() << "\n\n"; + outs() << "SUBCOMMANDS:\n\n"; + printSubCommands(Subs, MaxSubLen); + outs() << "\n"; + outs() << " Type \"" << GlobalParser->ProgramName + << " <subcommand> --help\" to get more help on a specific " + "subcommand"; + } + + outs() << "\n\n"; + + // Compute the maximum argument length... + size_t MaxArgLen = 0; + for (size_t i = 0, e = Opts.size(); i != e; ++i) + MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); + + outs() << "OPTIONS:\n"; + printOptions(Opts, MaxArgLen); + + // Print any extra help the user has declared. + for (auto I : GlobalParser->MoreHelp) + outs() << I; + GlobalParser->MoreHelp.clear(); + } +}; + +class CategorizedHelpPrinter : public HelpPrinter { +public: + explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {} + + // Helper function for printOptions(). + // It shall return a negative value if A's name should be lexicographically + // ordered before B's name. It returns a value greater than zero if B's name + // should be ordered before A's name, and it returns 0 otherwise. + static int OptionCategoryCompare(OptionCategory *const *A, + OptionCategory *const *B) { + return (*A)->getName().compare((*B)->getName()); + } + + // Make sure we inherit our base class's operator=() + using HelpPrinter::operator=; + +protected: + void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) override { + std::vector<OptionCategory *> SortedCategories; + std::map<OptionCategory *, std::vector<Option *>> CategorizedOptions; + + // Collect registered option categories into vector in preparation for + // sorting. + for (auto I = GlobalParser->RegisteredOptionCategories.begin(), + E = GlobalParser->RegisteredOptionCategories.end(); + I != E; ++I) { + SortedCategories.push_back(*I); + } + + // Sort the different option categories alphabetically. + assert(SortedCategories.size() > 0 && "No option categories registered!"); + array_pod_sort(SortedCategories.begin(), SortedCategories.end(), + OptionCategoryCompare); + + // Create map to empty vectors. + for (std::vector<OptionCategory *>::const_iterator + I = SortedCategories.begin(), + E = SortedCategories.end(); + I != E; ++I) + CategorizedOptions[*I] = std::vector<Option *>(); + + // Walk through pre-sorted options and assign into categories. + // Because the options are already alphabetically sorted the + // options within categories will also be alphabetically sorted. + for (size_t I = 0, E = Opts.size(); I != E; ++I) { + Option *Opt = Opts[I].second; + for (auto &Cat : Opt->Categories) { + assert(CategorizedOptions.count(Cat) > 0 && + "Option has an unregistered category"); + CategorizedOptions[Cat].push_back(Opt); + } + } + + // Now do printing. + for (std::vector<OptionCategory *>::const_iterator + Category = SortedCategories.begin(), + E = SortedCategories.end(); + Category != E; ++Category) { + // Hide empty categories for --help, but show for --help-hidden. + const auto &CategoryOptions = CategorizedOptions[*Category]; + bool IsEmptyCategory = CategoryOptions.empty(); + if (!ShowHidden && IsEmptyCategory) + continue; + + // Print category information. + outs() << "\n"; + outs() << (*Category)->getName() << ":\n"; + + // Check if description is set. + if (!(*Category)->getDescription().empty()) + outs() << (*Category)->getDescription() << "\n\n"; + else + outs() << "\n"; + + // When using --help-hidden explicitly state if the category has no + // options associated with it. + if (IsEmptyCategory) { + outs() << " This option category has no options.\n"; + continue; + } + // Loop over the options in the category and print. + for (const Option *Opt : CategoryOptions) + Opt->printOptionInfo(MaxArgLen); + } + } +}; + +// This wraps the Uncategorizing and Categorizing printers and decides +// at run time which should be invoked. +class HelpPrinterWrapper { +private: + HelpPrinter &UncategorizedPrinter; + CategorizedHelpPrinter &CategorizedPrinter; + +public: + explicit HelpPrinterWrapper(HelpPrinter &UncategorizedPrinter, + CategorizedHelpPrinter &CategorizedPrinter) + : UncategorizedPrinter(UncategorizedPrinter), + CategorizedPrinter(CategorizedPrinter) {} + + // Invoke the printer. + void operator=(bool Value); +}; + +} // End anonymous namespace + +// Declare the four HelpPrinter instances that are used to print out help, or +// help-hidden as an uncategorized list or in categories. +static HelpPrinter UncategorizedNormalPrinter(false); +static HelpPrinter UncategorizedHiddenPrinter(true); +static CategorizedHelpPrinter CategorizedNormalPrinter(false); +static CategorizedHelpPrinter CategorizedHiddenPrinter(true); + +// Declare HelpPrinter wrappers that will decide whether or not to invoke +// a categorizing help printer +static HelpPrinterWrapper WrappedNormalPrinter(UncategorizedNormalPrinter, + CategorizedNormalPrinter); +static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter, + CategorizedHiddenPrinter); + +// Define a category for generic options that all tools should have. +static cl::OptionCategory GenericCategory("Generic Options"); + +// Define uncategorized help printers. +// --help-list is hidden by default because if Option categories are being used +// then --help behaves the same as --help-list. +static cl::opt<HelpPrinter, true, parser<bool>> HLOp( + "help-list", + cl::desc("Display list of available options (--help-list-hidden for more)"), + cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed, + cl::cat(GenericCategory), cl::sub(*AllSubCommands)); + +static cl::opt<HelpPrinter, true, parser<bool>> + HLHOp("help-list-hidden", cl::desc("Display list of all available options"), + cl::location(UncategorizedHiddenPrinter), cl::Hidden, + cl::ValueDisallowed, cl::cat(GenericCategory), + cl::sub(*AllSubCommands)); + +// Define uncategorized/categorized help printers. These printers change their +// behaviour at runtime depending on whether one or more Option categories have +// been declared. +static cl::opt<HelpPrinterWrapper, true, parser<bool>> + HOp("help", cl::desc("Display available options (--help-hidden for more)"), + cl::location(WrappedNormalPrinter), cl::ValueDisallowed, + cl::cat(GenericCategory), cl::sub(*AllSubCommands)); + +static cl::alias HOpA("h", cl::desc("Alias for --help"), cl::aliasopt(HOp), + cl::DefaultOption); + +static cl::opt<HelpPrinterWrapper, true, parser<bool>> + HHOp("help-hidden", cl::desc("Display all available options"), + cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed, + cl::cat(GenericCategory), cl::sub(*AllSubCommands)); + +static cl::opt<bool> PrintOptions( + "print-options", + cl::desc("Print non-default options after command line parsing"), + cl::Hidden, cl::init(false), cl::cat(GenericCategory), + cl::sub(*AllSubCommands)); + +static cl::opt<bool> PrintAllOptions( + "print-all-options", + cl::desc("Print all option values after command line parsing"), cl::Hidden, + cl::init(false), cl::cat(GenericCategory), cl::sub(*AllSubCommands)); + +void HelpPrinterWrapper::operator=(bool Value) { + if (!Value) + return; + + // Decide which printer to invoke. If more than one option category is + // registered then it is useful to show the categorized help instead of + // uncategorized help. + if (GlobalParser->RegisteredOptionCategories.size() > 1) { + // unhide --help-list option so user can have uncategorized output if they + // want it. + HLOp.setHiddenFlag(NotHidden); + + CategorizedPrinter = true; // Invoke categorized printer + } else + UncategorizedPrinter = true; // Invoke uncategorized printer +} + +// Print the value of each option. +void cl::PrintOptionValues() { GlobalParser->printOptionValues(); } + +void CommandLineParser::printOptionValues() { + if (!PrintOptions && !PrintAllOptions) + return; + + SmallVector<std::pair<const char *, Option *>, 128> Opts; + sortOpts(ActiveSubCommand->OptionsMap, Opts, /*ShowHidden*/ true); + + // Compute the maximum argument length... + size_t MaxArgLen = 0; + for (size_t i = 0, e = Opts.size(); i != e; ++i) + MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); + + for (size_t i = 0, e = Opts.size(); i != e; ++i) + Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); +} + +static VersionPrinterTy OverrideVersionPrinter = nullptr; + +static std::vector<VersionPrinterTy> *ExtraVersionPrinters = nullptr; + +namespace { +class VersionPrinter { +public: + void print() { + raw_ostream &OS = outs(); +#ifdef PACKAGE_VENDOR + OS << PACKAGE_VENDOR << " "; +#else + OS << "LLVM (http://llvm.org/):\n "; +#endif + OS << PACKAGE_NAME << " version " << PACKAGE_VERSION; +#ifdef LLVM_VERSION_INFO + OS << " " << LLVM_VERSION_INFO; +#endif + OS << "\n "; +#ifndef __OPTIMIZE__ + OS << "DEBUG build"; +#else + OS << "Optimized build"; +#endif +#ifndef NDEBUG + OS << " with assertions"; +#endif +#if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO + std::string CPU = sys::getHostCPUName(); + if (CPU == "generic") + CPU = "(unknown)"; + OS << ".\n" + << " Default target: " << sys::getDefaultTargetTriple() << '\n' + << " Host CPU: " << CPU; +#endif + OS << '\n'; + } + void operator=(bool OptionWasSpecified) { + if (!OptionWasSpecified) + return; + + if (OverrideVersionPrinter != nullptr) { + OverrideVersionPrinter(outs()); + exit(0); + } + print(); + + // Iterate over any registered extra printers and call them to add further + // information. + if (ExtraVersionPrinters != nullptr) { + outs() << '\n'; + for (auto I : *ExtraVersionPrinters) + I(outs()); + } + + exit(0); + } +}; +} // End anonymous namespace + +// Define the --version option that prints out the LLVM version for the tool +static VersionPrinter VersionPrinterInstance; + +static cl::opt<VersionPrinter, true, parser<bool>> + VersOp("version", cl::desc("Display the version of this program"), + cl::location(VersionPrinterInstance), cl::ValueDisallowed, + cl::cat(GenericCategory)); + +// Utility function for printing the help message. +void cl::PrintHelpMessage(bool Hidden, bool Categorized) { + if (!Hidden && !Categorized) + UncategorizedNormalPrinter.printHelp(); + else if (!Hidden && Categorized) + CategorizedNormalPrinter.printHelp(); + else if (Hidden && !Categorized) + UncategorizedHiddenPrinter.printHelp(); + else + CategorizedHiddenPrinter.printHelp(); +} + +/// Utility function for printing version number. +void cl::PrintVersionMessage() { VersionPrinterInstance.print(); } + +void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; } + +void cl::AddExtraVersionPrinter(VersionPrinterTy func) { + if (!ExtraVersionPrinters) + ExtraVersionPrinters = new std::vector<VersionPrinterTy>; + + ExtraVersionPrinters->push_back(func); +} + +StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) { + auto &Subs = GlobalParser->RegisteredSubCommands; + (void)Subs; + assert(is_contained(Subs, &Sub)); + return Sub.OptionsMap; +} + +iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator> +cl::getRegisteredSubcommands() { + return GlobalParser->getRegisteredSubcommands(); +} + +void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) { + for (auto &I : Sub.OptionsMap) { + for (auto &Cat : I.second->Categories) { + if (Cat != &Category && + Cat != &GenericCategory) + I.second->setHiddenFlag(cl::ReallyHidden); + } + } +} + +void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories, + SubCommand &Sub) { + for (auto &I : Sub.OptionsMap) { + for (auto &Cat : I.second->Categories) { + if (find(Categories, Cat) == Categories.end() && Cat != &GenericCategory) + I.second->setHiddenFlag(cl::ReallyHidden); + } + } +} + +void cl::ResetCommandLineParser() { GlobalParser->reset(); } +void cl::ResetAllOptionOccurrences() { + GlobalParser->ResetAllOptionOccurrences(); +} + +void LLVMParseCommandLineOptions(int argc, const char *const *argv, + const char *Overview) { + llvm::cl::ParseCommandLineOptions(argc, argv, StringRef(Overview), + &llvm::nulls()); +} diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp new file mode 100644 index 0000000000000..97d5ffaadf827 --- /dev/null +++ b/llvm/lib/Support/Compression.cpp @@ -0,0 +1,106 @@ +//===--- Compression.cpp - Compression implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements compression functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Compression.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H +#include <zlib.h> +#endif + +using namespace llvm; + +#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ +static Error createError(StringRef Err) { + return make_error<StringError>(Err, inconvertibleErrorCode()); +} + +static StringRef convertZlibCodeToString(int Code) { + switch (Code) { + case Z_MEM_ERROR: + return "zlib error: Z_MEM_ERROR"; + case Z_BUF_ERROR: + return "zlib error: Z_BUF_ERROR"; + case Z_STREAM_ERROR: + return "zlib error: Z_STREAM_ERROR"; + case Z_DATA_ERROR: + return "zlib error: Z_DATA_ERROR"; + case Z_OK: + default: + llvm_unreachable("unknown or unexpected zlib status code"); + } +} + +bool zlib::isAvailable() { return true; } + +Error zlib::compress(StringRef InputBuffer, + SmallVectorImpl<char> &CompressedBuffer, int Level) { + unsigned long CompressedSize = ::compressBound(InputBuffer.size()); + CompressedBuffer.reserve(CompressedSize); + int Res = + ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize, + (const Bytef *)InputBuffer.data(), InputBuffer.size(), Level); + // Tell MemorySanitizer that zlib output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(CompressedBuffer.data(), CompressedSize); + CompressedBuffer.set_size(CompressedSize); + return Res ? createError(convertZlibCodeToString(Res)) : Error::success(); +} + +Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, + size_t &UncompressedSize) { + int Res = + ::uncompress((Bytef *)UncompressedBuffer, (uLongf *)&UncompressedSize, + (const Bytef *)InputBuffer.data(), InputBuffer.size()); + // Tell MemorySanitizer that zlib output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(UncompressedBuffer, UncompressedSize); + return Res ? createError(convertZlibCodeToString(Res)) : Error::success(); +} + +Error zlib::uncompress(StringRef InputBuffer, + SmallVectorImpl<char> &UncompressedBuffer, + size_t UncompressedSize) { + UncompressedBuffer.resize(UncompressedSize); + Error E = + uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize); + UncompressedBuffer.resize(UncompressedSize); + return E; +} + +uint32_t zlib::crc32(StringRef Buffer) { + return ::crc32(0, (const Bytef *)Buffer.data(), Buffer.size()); +} + +#else +bool zlib::isAvailable() { return false; } +Error zlib::compress(StringRef InputBuffer, + SmallVectorImpl<char> &CompressedBuffer, int Level) { + llvm_unreachable("zlib::compress is unavailable"); +} +Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, + size_t &UncompressedSize) { + llvm_unreachable("zlib::uncompress is unavailable"); +} +Error zlib::uncompress(StringRef InputBuffer, + SmallVectorImpl<char> &UncompressedBuffer, + size_t UncompressedSize) { + llvm_unreachable("zlib::uncompress is unavailable"); +} +uint32_t zlib::crc32(StringRef Buffer) { + llvm_unreachable("zlib::crc32 is unavailable"); +} +#endif diff --git a/llvm/lib/Support/ConvertUTF.cpp b/llvm/lib/Support/ConvertUTF.cpp new file mode 100644 index 0000000000000..e24a918c5c898 --- /dev/null +++ b/llvm/lib/Support/ConvertUTF.cpp @@ -0,0 +1,738 @@ +/*===--- ConvertUTF.c - Universal Character Names conversions ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=*/ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + +#include "llvm/Support/ConvertUTF.h" +#ifdef CVTUTF_DEBUG +#include <stdio.h> +#endif +#include <assert.h> + +/* + * This code extensively uses fall-through switches. + * Keep the compiler from warning about that. + */ +#if defined(__clang__) && defined(__has_warning) +# if __has_warning("-Wimplicit-fallthrough") +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("clang diagnostic pop") +# endif +#elif defined(__GNUC__) && __GNUC__ > 6 +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#endif +#ifndef ConvertUTF_DISABLE_WARNINGS +# define ConvertUTF_DISABLE_WARNINGS +#endif +#ifndef ConvertUTF_RESTORE_WARNINGS +# define ConvertUTF_RESTORE_WARNINGS +#endif + +ConvertUTF_DISABLE_WARNINGS + +namespace llvm { + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG +if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); +} +#endif + return result; +} +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static Boolean isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (length > sourceEnd - source) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +static unsigned +findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source, + const UTF8 *sourceEnd) { + UTF8 b1, b2, b3; + + assert(!isLegalUTF8Sequence(source, sourceEnd)); + + /* + * Unicode 6.3.0, D93b: + * + * Maximal subpart of an ill-formed subsequence: The longest code unit + * subsequence starting at an unconvertible offset that is either: + * a. the initial subsequence of a well-formed code unit sequence, or + * b. a subsequence of length one. + */ + + if (source == sourceEnd) + return 0; + + /* + * Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8 + * Byte Sequences. + */ + + b1 = *source; + ++source; + if (b1 >= 0xC2 && b1 <= 0xDF) { + /* + * First byte is valid, but we know that this code unit sequence is + * invalid, so the maximal subpart has to end after the first byte. + */ + return 1; + } + + if (source == sourceEnd) + return 1; + + b2 = *source; + ++source; + + if (b1 == 0xE0) { + return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1; + } + if (b1 >= 0xE1 && b1 <= 0xEC) { + return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; + } + if (b1 == 0xED) { + return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1; + } + if (b1 >= 0xEE && b1 <= 0xEF) { + return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; + } + if (b1 == 0xF0) { + if (b2 >= 0x90 && b2 <= 0xBF) { + if (source == sourceEnd) + return 2; + + b3 = *source; + return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; + } + return 1; + } + if (b1 >= 0xF1 && b1 <= 0xF3) { + if (b2 >= 0x80 && b2 <= 0xBF) { + if (source == sourceEnd) + return 2; + + b3 = *source; + return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; + } + return 1; + } + if (b1 == 0xF4) { + if (b2 >= 0x80 && b2 <= 0x8F) { + if (source == sourceEnd) + return 2; + + b3 = *source; + return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; + } + return 1; + } + + assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5); + /* + * There are no valid sequences that start with these bytes. Maximal subpart + * is defined to have length 1 in these cases. + */ + return 1; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return the total number of bytes in a codepoint + * represented in UTF-8, given the value of the first byte. + */ +unsigned getNumBytesForUTF8(UTF8 first) { + return trailingBytesForUTF8[first] + 1; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 string is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { + while (*source != sourceEnd) { + int length = trailingBytesForUTF8[**source] + 1; + if (length > sourceEnd - *source || !isLegalUTF8(*source, length)) + return false; + *source += length; + } + return true; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (extraBytesToRead >= sourceEnd - source) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +static ConversionResult ConvertUTF8toUTF32Impl( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags, + Boolean InputIsPartial) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (extraBytesToRead >= sourceEnd - source) { + if (flags == strictConversion || InputIsPartial) { + result = sourceExhausted; + break; + } else { + result = sourceIllegal; + + /* + * Replace the maximal subpart of ill-formed sequence with + * replacement character. + */ + source += findMaximalSubpartOfIllFormedUTF8Sequence(source, + sourceEnd); + *target++ = UNI_REPLACEMENT_CHAR; + continue; + } + } + if (target >= targetEnd) { + result = targetExhausted; break; + } + + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + if (flags == strictConversion) { + /* Abort conversion. */ + break; + } else { + /* + * Replace the maximal subpart of ill-formed sequence with + * replacement character. + */ + source += findMaximalSubpartOfIllFormedUTF8Sequence(source, + sourceEnd); + *target++ = UNI_REPLACEMENT_CHAR; + continue; + } + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, + const UTF8 *sourceEnd, + UTF32 **targetStart, + UTF32 *targetEnd, + ConversionFlags flags) { + return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd, + flags, /*InputIsPartial=*/true); +} + +ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, + const UTF8 *sourceEnd, UTF32 **targetStart, + UTF32 *targetEnd, ConversionFlags flags) { + return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd, + flags, /*InputIsPartial=*/false); +} + +/* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ + +} // namespace llvm + +ConvertUTF_RESTORE_WARNINGS diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp new file mode 100644 index 0000000000000..eb4ead6b46b45 --- /dev/null +++ b/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -0,0 +1,250 @@ +//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----=== +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SwapByteOrder.h" +#include <string> +#include <vector> + +namespace llvm { + +bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, + char *&ResultPtr, const UTF8 *&ErrorPtr) { + assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); + ConversionResult result = conversionOK; + // Copy the character span over. + if (WideCharWidth == 1) { + const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.begin()); + if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.end()))) { + result = sourceIllegal; + ErrorPtr = Pos; + } else { + memcpy(ResultPtr, Source.data(), Source.size()); + ResultPtr += Source.size(); + } + } else if (WideCharWidth == 2) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF16( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast<char*>(targetStart); + else + ErrorPtr = sourceStart; + } else if (WideCharWidth == 4) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF32( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast<char*>(targetStart); + else + ErrorPtr = sourceStart; + } + assert((result != targetExhausted) + && "ConvertUTF8toUTFXX exhausted target buffer"); + return result == conversionOK; +} + +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { + const UTF32 *SourceStart = &Source; + const UTF32 *SourceEnd = SourceStart + 1; + UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr); + UTF8 *TargetEnd = TargetStart + 4; + ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, + &TargetStart, TargetEnd, + strictConversion); + if (CR != conversionOK) + return false; + + ResultPtr = reinterpret_cast<char*>(TargetStart); + return true; +} + +bool hasUTF16ByteOrderMark(ArrayRef<char> S) { + return (S.size() >= 2 && + ((S[0] == '\xff' && S[1] == '\xfe') || + (S[0] == '\xfe' && S[1] == '\xff'))); +} + +bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) { + assert(Out.empty()); + + // Error out on an uneven byte count. + if (SrcBytes.size() % 2) + return false; + + // Avoid OOB by returning early on empty input. + if (SrcBytes.empty()) + return true; + + const UTF16 *Src = reinterpret_cast<const UTF16 *>(SrcBytes.begin()); + const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(SrcBytes.end()); + + // Byteswap if necessary. + std::vector<UTF16> ByteSwapped; + if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { + ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); + for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I) + ByteSwapped[I] = llvm::sys::SwapByteOrder_16(ByteSwapped[I]); + Src = &ByteSwapped[0]; + SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; + } + + // Skip the BOM for conversion. + if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) + Src++; + + // Just allocate enough space up front. We'll shrink it later. Allocate + // enough that we can fit a null terminator without reallocating. + Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); + UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]); + UTF8 *DstEnd = Dst + Out.size(); + + ConversionResult CR = + ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion); + assert(CR != targetExhausted); + + if (CR != conversionOK) { + Out.clear(); + return false; + } + + Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]); + Out.push_back(0); + Out.pop_back(); + return true; +} + +bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out) +{ + return convertUTF16ToUTF8String( + llvm::ArrayRef<char>(reinterpret_cast<const char *>(Src.data()), + Src.size() * sizeof(UTF16)), Out); +} + +bool convertUTF8ToUTF16String(StringRef SrcUTF8, + SmallVectorImpl<UTF16> &DstUTF16) { + assert(DstUTF16.empty()); + + // Avoid OOB by returning early on empty input. + if (SrcUTF8.empty()) { + DstUTF16.push_back(0); + DstUTF16.pop_back(); + return true; + } + + const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin()); + const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end()); + + // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding + // as UTF-16 should always require the same amount or less code units than the + // UTF-8 encoding. Allocate one extra byte for the null terminator though, + // so that someone calling DstUTF16.data() gets a null terminated string. + // We resize down later so we don't have to worry that this over allocates. + DstUTF16.resize(SrcUTF8.size()+1); + UTF16 *Dst = &DstUTF16[0]; + UTF16 *DstEnd = Dst + DstUTF16.size(); + + ConversionResult CR = + ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion); + assert(CR != targetExhausted); + + if (CR != conversionOK) { + DstUTF16.clear(); + return false; + } + + DstUTF16.resize(Dst - &DstUTF16[0]); + DstUTF16.push_back(0); + DstUTF16.pop_back(); + return true; +} + +static_assert(sizeof(wchar_t) == 1 || sizeof(wchar_t) == 2 || + sizeof(wchar_t) == 4, + "Expected wchar_t to be 1, 2, or 4 bytes"); + +template <typename TResult> +static inline bool ConvertUTF8toWideInternal(llvm::StringRef Source, + TResult &Result) { + // Even in the case of UTF-16, the number of bytes in a UTF-8 string is + // at least as large as the number of elements in the resulting wide + // string, because surrogate pairs take at least 4 bytes in UTF-8. + Result.resize(Source.size() + 1); + char *ResultPtr = reinterpret_cast<char *>(&Result[0]); + const UTF8 *ErrorPtr; + if (!ConvertUTF8toWide(sizeof(wchar_t), Source, ResultPtr, ErrorPtr)) { + Result.clear(); + return false; + } + Result.resize(reinterpret_cast<wchar_t *>(ResultPtr) - &Result[0]); + return true; +} + +bool ConvertUTF8toWide(llvm::StringRef Source, std::wstring &Result) { + return ConvertUTF8toWideInternal(Source, Result); +} + +bool ConvertUTF8toWide(const char *Source, std::wstring &Result) { + if (!Source) { + Result.clear(); + return true; + } + return ConvertUTF8toWide(llvm::StringRef(Source), Result); +} + +bool convertWideToUTF8(const std::wstring &Source, std::string &Result) { + if (sizeof(wchar_t) == 1) { + const UTF8 *Start = reinterpret_cast<const UTF8 *>(Source.data()); + const UTF8 *End = + reinterpret_cast<const UTF8 *>(Source.data() + Source.size()); + if (!isLegalUTF8String(&Start, End)) + return false; + Result.resize(Source.size()); + memcpy(&Result[0], Source.data(), Source.size()); + return true; + } else if (sizeof(wchar_t) == 2) { + return convertUTF16ToUTF8String( + llvm::ArrayRef<UTF16>(reinterpret_cast<const UTF16 *>(Source.data()), + Source.size()), + Result); + } else if (sizeof(wchar_t) == 4) { + const UTF32 *Start = reinterpret_cast<const UTF32 *>(Source.data()); + const UTF32 *End = + reinterpret_cast<const UTF32 *>(Source.data() + Source.size()); + Result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * Source.size()); + UTF8 *ResultPtr = reinterpret_cast<UTF8 *>(&Result[0]); + UTF8 *ResultEnd = reinterpret_cast<UTF8 *>(&Result[0] + Result.size()); + if (ConvertUTF32toUTF8(&Start, End, &ResultPtr, ResultEnd, + strictConversion) == conversionOK) { + Result.resize(reinterpret_cast<char *>(ResultPtr) - &Result[0]); + return true; + } else { + Result.clear(); + return false; + } + } else { + llvm_unreachable( + "Control should never reach this point; see static_assert further up"); + } +} + +} // end namespace llvm + diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp new file mode 100644 index 0000000000000..9d13fce9cc52a --- /dev/null +++ b/llvm/lib/Support/CrashRecoveryContext.cpp @@ -0,0 +1,411 @@ +//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/ThreadLocal.h" +#include <mutex> +#include <setjmp.h> +using namespace llvm; + +namespace { + +struct CrashRecoveryContextImpl; + +static ManagedStatic< + sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext; + +struct CrashRecoveryContextImpl { + // When threads are disabled, this links up all active + // CrashRecoveryContextImpls. When threads are enabled there's one thread + // per CrashRecoveryContext and CurrentContext is a thread-local, so only one + // CrashRecoveryContextImpl is active per thread and this is always null. + const CrashRecoveryContextImpl *Next; + + CrashRecoveryContext *CRC; + ::jmp_buf JumpBuffer; + volatile unsigned Failed : 1; + unsigned SwitchedThread : 1; + +public: + CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC), + Failed(false), + SwitchedThread(false) { + Next = CurrentContext->get(); + CurrentContext->set(this); + } + ~CrashRecoveryContextImpl() { + if (!SwitchedThread) + CurrentContext->set(Next); + } + + /// Called when the separate crash-recovery thread was finished, to + /// indicate that we don't need to clear the thread-local CurrentContext. + void setSwitchedThread() { +#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 + SwitchedThread = true; +#endif + } + + void HandleCrash() { + // Eliminate the current context entry, to avoid re-entering in case the + // cleanup code crashes. + CurrentContext->set(Next); + + assert(!Failed && "Crash recovery context already failed!"); + Failed = true; + + // FIXME: Stash the backtrace. + + // Jump back to the RunSafely we were called under. + longjmp(JumpBuffer, 1); + } +}; + +} + +static ManagedStatic<std::mutex> gCrashRecoveryContextMutex; +static bool gCrashRecoveryEnabled = false; + +static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>> + tlIsRecoveringFromCrash; + +static void installExceptionOrSignalHandlers(); +static void uninstallExceptionOrSignalHandlers(); + +CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} + +CrashRecoveryContext::~CrashRecoveryContext() { + // Reclaim registered resources. + CrashRecoveryContextCleanup *i = head; + const CrashRecoveryContext *PC = tlIsRecoveringFromCrash->get(); + tlIsRecoveringFromCrash->set(this); + while (i) { + CrashRecoveryContextCleanup *tmp = i; + i = tmp->next; + tmp->cleanupFired = true; + tmp->recoverResources(); + delete tmp; + } + tlIsRecoveringFromCrash->set(PC); + + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; + delete CRCI; +} + +bool CrashRecoveryContext::isRecoveringFromCrash() { + return tlIsRecoveringFromCrash->get() != nullptr; +} + +CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { + if (!gCrashRecoveryEnabled) + return nullptr; + + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); + if (!CRCI) + return nullptr; + + return CRCI->CRC; +} + +void CrashRecoveryContext::Enable() { + std::lock_guard<std::mutex> L(*gCrashRecoveryContextMutex); + // FIXME: Shouldn't this be a refcount or something? + if (gCrashRecoveryEnabled) + return; + gCrashRecoveryEnabled = true; + installExceptionOrSignalHandlers(); +} + +void CrashRecoveryContext::Disable() { + std::lock_guard<std::mutex> L(*gCrashRecoveryContextMutex); + if (!gCrashRecoveryEnabled) + return; + gCrashRecoveryEnabled = false; + uninstallExceptionOrSignalHandlers(); +} + +void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup) +{ + if (!cleanup) + return; + if (head) + head->prev = cleanup; + cleanup->next = head; + head = cleanup; +} + +void +CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { + if (!cleanup) + return; + if (cleanup == head) { + head = cleanup->next; + if (head) + head->prev = nullptr; + } + else { + cleanup->prev->next = cleanup->next; + if (cleanup->next) + cleanup->next->prev = cleanup->prev; + } + delete cleanup; +} + +#if defined(_MSC_VER) +// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way +// better than VEH. Vectored exception handling catches all exceptions happening +// on the thread with installed exception handlers, so it can interfere with +// internal exception handling of other libraries on that thread. SEH works +// exactly as you would expect normal exception handling to work: it only +// catches exceptions if they would bubble out from the stack frame with __try / +// __except. + +static void installExceptionOrSignalHandlers() {} +static void uninstallExceptionOrSignalHandlers() {} + +bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) { + if (!gCrashRecoveryEnabled) { + Fn(); + return true; + } + + bool Result = true; + __try { + Fn(); + } __except (1) { // Catch any exception. + Result = false; + } + return Result; +} + +#else // !_MSC_VER + +#if defined(_WIN32) +// This is a non-MSVC compiler, probably mingw gcc or clang without +// -fms-extensions. Use vectored exception handling (VEH). +// +// On Windows, we can make use of vectored exception handling to catch most +// crashing situations. Note that this does mean we will be alerted of +// exceptions *before* structured exception handling has the opportunity to +// catch it. Unfortunately, this causes problems in practice with other code +// running on threads with LLVM crash recovery contexts, so we would like to +// eventually move away from VEH. +// +// Vectored works on a per-thread basis, which is an advantage over +// SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have +// any native support for chaining exception handlers, but VEH allows more than +// one. +// +// The vectored exception handler functionality was added in Windows +// XP, so if support for older versions of Windows is required, +// it will have to be added. + +#include "Windows/WindowsSupport.h" + +static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) +{ + // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported + // compilers and platforms, so we define it manually. + constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL; + switch (ExceptionInfo->ExceptionRecord->ExceptionCode) + { + case DBG_PRINTEXCEPTION_C: + case DbgPrintExceptionWideC: + case 0x406D1388: // set debugger thread name + return EXCEPTION_CONTINUE_EXECUTION; + } + + // Lookup the current thread local recovery object. + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); + + if (!CRCI) { + // Something has gone horribly wrong, so let's just tell everyone + // to keep searching + CrashRecoveryContext::Disable(); + return EXCEPTION_CONTINUE_SEARCH; + } + + // TODO: We can capture the stack backtrace here and store it on the + // implementation if we so choose. + + // Handle the crash + const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash(); + + // Note that we don't actually get here because HandleCrash calls + // longjmp, which means the HandleCrash function never returns. + llvm_unreachable("Handled the crash, should have longjmp'ed out of here"); +} + +// Because the Enable and Disable calls are static, it means that +// there may not actually be an Impl available, or even a current +// CrashRecoveryContext at all. So we make use of a thread-local +// exception table. The handles contained in here will either be +// non-NULL, valid VEH handles, or NULL. +static sys::ThreadLocal<const void> sCurrentExceptionHandle; + +static void installExceptionOrSignalHandlers() { + // We can set up vectored exception handling now. We will install our + // handler as the front of the list, though there's no assurances that + // it will remain at the front (another call could install itself before + // our handler). This 1) isn't likely, and 2) shouldn't cause problems. + PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler); + sCurrentExceptionHandle.set(handle); +} + +static void uninstallExceptionOrSignalHandlers() { + PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get()); + if (currentHandle) { + // Now we can remove the vectored exception handler from the chain + ::RemoveVectoredExceptionHandler(currentHandle); + + // Reset the handle in our thread-local set. + sCurrentExceptionHandle.set(NULL); + } +} + +#else // !_WIN32 + +// Generic POSIX implementation. +// +// This implementation relies on synchronous signals being delivered to the +// current thread. We use a thread local object to keep track of the active +// crash recovery context, and install signal handlers to invoke HandleCrash on +// the active object. +// +// This implementation does not to attempt to chain signal handlers in any +// reliable fashion -- if we get a signal outside of a crash recovery context we +// simply disable crash recovery and raise the signal again. + +#include <signal.h> + +static const int Signals[] = + { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; +static const unsigned NumSignals = array_lengthof(Signals); +static struct sigaction PrevActions[NumSignals]; + +static void CrashRecoverySignalHandler(int Signal) { + // Lookup the current thread local recovery object. + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); + + if (!CRCI) { + // We didn't find a crash recovery context -- this means either we got a + // signal on a thread we didn't expect it on, the application got a signal + // outside of a crash recovery context, or something else went horribly + // wrong. + // + // Disable crash recovery and raise the signal again. The assumption here is + // that the enclosing application will terminate soon, and we won't want to + // attempt crash recovery again. + // + // This call of Disable isn't thread safe, but it doesn't actually matter. + CrashRecoveryContext::Disable(); + raise(Signal); + + // The signal will be thrown once the signal mask is restored. + return; + } + + // Unblock the signal we received. + sigset_t SigMask; + sigemptyset(&SigMask); + sigaddset(&SigMask, Signal); + sigprocmask(SIG_UNBLOCK, &SigMask, nullptr); + + if (CRCI) + const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash(); +} + +static void installExceptionOrSignalHandlers() { + // Setup the signal handler. + struct sigaction Handler; + Handler.sa_handler = CrashRecoverySignalHandler; + Handler.sa_flags = 0; + sigemptyset(&Handler.sa_mask); + + for (unsigned i = 0; i != NumSignals; ++i) { + sigaction(Signals[i], &Handler, &PrevActions[i]); + } +} + +static void uninstallExceptionOrSignalHandlers() { + // Restore the previous signal handlers. + for (unsigned i = 0; i != NumSignals; ++i) + sigaction(Signals[i], &PrevActions[i], nullptr); +} + +#endif // !_WIN32 + +bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) { + // If crash recovery is disabled, do nothing. + if (gCrashRecoveryEnabled) { + assert(!Impl && "Crash recovery context already initialized!"); + CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this); + Impl = CRCI; + + if (setjmp(CRCI->JumpBuffer) != 0) { + return false; + } + } + + Fn(); + return true; +} + +#endif // !_MSC_VER + +void CrashRecoveryContext::HandleCrash() { + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; + assert(CRCI && "Crash recovery context never initialized!"); + CRCI->HandleCrash(); +} + +// FIXME: Portability. +static void setThreadBackgroundPriority() { +#ifdef __APPLE__ + setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG); +#endif +} + +static bool hasThreadBackgroundPriority() { +#ifdef __APPLE__ + return getpriority(PRIO_DARWIN_THREAD, 0) == 1; +#else + return false; +#endif +} + +namespace { +struct RunSafelyOnThreadInfo { + function_ref<void()> Fn; + CrashRecoveryContext *CRC; + bool UseBackgroundPriority; + bool Result; +}; +} + +static void RunSafelyOnThread_Dispatch(void *UserData) { + RunSafelyOnThreadInfo *Info = + reinterpret_cast<RunSafelyOnThreadInfo*>(UserData); + + if (Info->UseBackgroundPriority) + setThreadBackgroundPriority(); + + Info->Result = Info->CRC->RunSafely(Info->Fn); +} +bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn, + unsigned RequestedStackSize) { + bool UseBackgroundPriority = hasThreadBackgroundPriority(); + RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false }; + llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize); + if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl) + CRC->setSwitchedThread(); + return Info.Result; +} diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp new file mode 100644 index 0000000000000..4675fe3a94010 --- /dev/null +++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp @@ -0,0 +1,361 @@ +//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +// +// The algorithm we use attempts to exploit the dependency information by +// minimizing top-down. We start by constructing an initial root set R, and +// then iteratively: +// +// 1. Minimize the set R using the test predicate: +// P'(S) = P(S union pred*(S)) +// +// 2. Extend R to R' = R union pred(R). +// +// until a fixed point is reached. +// +// The idea is that we want to quickly prune entire portions of the graph, so we +// try to find high-level nodes that can be eliminated with all of their +// dependents. +// +// FIXME: The current algorithm doesn't actually provide a strong guarantee +// about the minimality of the result. The problem is that after adding nodes to +// the required set, we no longer consider them for elimination. For strictly +// well formed predicates, this doesn't happen, but it commonly occurs in +// practice when there are unmodelled dependencies. I believe we can resolve +// this by allowing the required set to be minimized as well, but need more test +// cases first. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DAGDeltaAlgorithm.h" +#include "llvm/ADT/DeltaAlgorithm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <map> +using namespace llvm; + +#define DEBUG_TYPE "dag-delta" + +namespace { + +class DAGDeltaAlgorithmImpl { + friend class DeltaActiveSetHelper; + +public: + typedef DAGDeltaAlgorithm::change_ty change_ty; + typedef DAGDeltaAlgorithm::changeset_ty changeset_ty; + typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty; + typedef DAGDeltaAlgorithm::edge_ty edge_ty; + +private: + typedef std::vector<change_ty>::iterator pred_iterator_ty; + typedef std::vector<change_ty>::iterator succ_iterator_ty; + typedef std::set<change_ty>::iterator pred_closure_iterator_ty; + typedef std::set<change_ty>::iterator succ_closure_iterator_ty; + + DAGDeltaAlgorithm &DDA; + + std::vector<change_ty> Roots; + + /// Cache of failed test results. Successful test results are never cached + /// since we always reduce following a success. We maintain an independent + /// cache from that used by the individual delta passes because we may get + /// hits across multiple individual delta invocations. + mutable std::set<changeset_ty> FailedTestsCache; + + // FIXME: Gross. + std::map<change_ty, std::vector<change_ty> > Predecessors; + std::map<change_ty, std::vector<change_ty> > Successors; + + std::map<change_ty, std::set<change_ty> > PredClosure; + std::map<change_ty, std::set<change_ty> > SuccClosure; + +private: + pred_iterator_ty pred_begin(change_ty Node) { + assert(Predecessors.count(Node) && "Invalid node!"); + return Predecessors[Node].begin(); + } + pred_iterator_ty pred_end(change_ty Node) { + assert(Predecessors.count(Node) && "Invalid node!"); + return Predecessors[Node].end(); + } + + pred_closure_iterator_ty pred_closure_begin(change_ty Node) { + assert(PredClosure.count(Node) && "Invalid node!"); + return PredClosure[Node].begin(); + } + pred_closure_iterator_ty pred_closure_end(change_ty Node) { + assert(PredClosure.count(Node) && "Invalid node!"); + return PredClosure[Node].end(); + } + + succ_iterator_ty succ_begin(change_ty Node) { + assert(Successors.count(Node) && "Invalid node!"); + return Successors[Node].begin(); + } + succ_iterator_ty succ_end(change_ty Node) { + assert(Successors.count(Node) && "Invalid node!"); + return Successors[Node].end(); + } + + succ_closure_iterator_ty succ_closure_begin(change_ty Node) { + assert(SuccClosure.count(Node) && "Invalid node!"); + return SuccClosure[Node].begin(); + } + succ_closure_iterator_ty succ_closure_end(change_ty Node) { + assert(SuccClosure.count(Node) && "Invalid node!"); + return SuccClosure[Node].end(); + } + + void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets, + const changeset_ty &Required) { + DDA.UpdatedSearchState(Changes, Sets, Required); + } + + /// ExecuteOneTest - Execute a single test predicate on the change set \p S. + bool ExecuteOneTest(const changeset_ty &S) { + // Check dependencies invariant. + LLVM_DEBUG({ + for (changeset_ty::const_iterator it = S.begin(), ie = S.end(); it != ie; + ++it) + for (succ_iterator_ty it2 = succ_begin(*it), ie2 = succ_end(*it); + it2 != ie2; ++it2) + assert(S.count(*it2) && "Attempt to run invalid changeset!"); + }); + + return DDA.ExecuteOneTest(S); + } + +public: + DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &DDA, const changeset_ty &Changes, + const std::vector<edge_ty> &Dependencies); + + changeset_ty Run(); + + /// GetTestResult - Get the test result for the active set \p Changes with + /// \p Required changes from the cache, executing the test if necessary. + /// + /// \param Changes - The set of active changes being minimized, which should + /// have their pred closure included in the test. + /// \param Required - The set of changes which have previously been + /// established to be required. + /// \return - The test result. + bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required); +}; + +/// Helper object for minimizing an active set of changes. +class DeltaActiveSetHelper : public DeltaAlgorithm { + DAGDeltaAlgorithmImpl &DDAI; + + const changeset_ty &Required; + +protected: + /// UpdatedSearchState - Callback used when the search state changes. + void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets) override { + DDAI.UpdatedSearchState(Changes, Sets, Required); + } + + bool ExecuteOneTest(const changeset_ty &S) override { + return DDAI.GetTestResult(S, Required); + } + +public: + DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &DDAI, + const changeset_ty &Required) + : DDAI(DDAI), Required(Required) {} +}; + +} + +DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl( + DAGDeltaAlgorithm &DDA, const changeset_ty &Changes, + const std::vector<edge_ty> &Dependencies) + : DDA(DDA) { + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + Predecessors.insert(std::make_pair(*it, std::vector<change_ty>())); + Successors.insert(std::make_pair(*it, std::vector<change_ty>())); + } + for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(), + ie = Dependencies.end(); it != ie; ++it) { + Predecessors[it->second].push_back(it->first); + Successors[it->first].push_back(it->second); + } + + // Compute the roots. + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + if (succ_begin(*it) == succ_end(*it)) + Roots.push_back(*it); + + // Pre-compute the closure of the successor relation. + std::vector<change_ty> Worklist(Roots.begin(), Roots.end()); + while (!Worklist.empty()) { + change_ty Change = Worklist.back(); + Worklist.pop_back(); + + std::set<change_ty> &ChangeSuccs = SuccClosure[Change]; + for (pred_iterator_ty it = pred_begin(Change), + ie = pred_end(Change); it != ie; ++it) { + SuccClosure[*it].insert(Change); + SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end()); + Worklist.push_back(*it); + } + } + + // Invert to form the predecessor closure map. + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + PredClosure.insert(std::make_pair(*it, std::set<change_ty>())); + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), + ie2 = succ_closure_end(*it); it2 != ie2; ++it2) + PredClosure[*it2].insert(*it); + + // Dump useful debug info. + LLVM_DEBUG({ + llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n"; + llvm::errs() << "Changes: ["; + for (changeset_ty::const_iterator it = Changes.begin(), ie = Changes.end(); + it != ie; ++it) { + if (it != Changes.begin()) + llvm::errs() << ", "; + llvm::errs() << *it; + + if (succ_begin(*it) != succ_end(*it)) { + llvm::errs() << "("; + for (succ_iterator_ty it2 = succ_begin(*it), ie2 = succ_end(*it); + it2 != ie2; ++it2) { + if (it2 != succ_begin(*it)) + llvm::errs() << ", "; + llvm::errs() << "->" << *it2; + } + llvm::errs() << ")"; + } + } + llvm::errs() << "]\n"; + + llvm::errs() << "Roots: ["; + for (std::vector<change_ty>::const_iterator it = Roots.begin(), + ie = Roots.end(); + it != ie; ++it) { + if (it != Roots.begin()) + llvm::errs() << ", "; + llvm::errs() << *it; + } + llvm::errs() << "]\n"; + + llvm::errs() << "Predecessor Closure:\n"; + for (changeset_ty::const_iterator it = Changes.begin(), ie = Changes.end(); + it != ie; ++it) { + llvm::errs() << format(" %-4d: [", *it); + for (pred_closure_iterator_ty it2 = pred_closure_begin(*it), + ie2 = pred_closure_end(*it); + it2 != ie2; ++it2) { + if (it2 != pred_closure_begin(*it)) + llvm::errs() << ", "; + llvm::errs() << *it2; + } + llvm::errs() << "]\n"; + } + + llvm::errs() << "Successor Closure:\n"; + for (changeset_ty::const_iterator it = Changes.begin(), ie = Changes.end(); + it != ie; ++it) { + llvm::errs() << format(" %-4d: [", *it); + for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), + ie2 = succ_closure_end(*it); + it2 != ie2; ++it2) { + if (it2 != succ_closure_begin(*it)) + llvm::errs() << ", "; + llvm::errs() << *it2; + } + llvm::errs() << "]\n"; + } + + llvm::errs() << "\n\n"; + }); +} + +bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes, + const changeset_ty &Required) { + changeset_ty Extended(Required); + Extended.insert(Changes.begin(), Changes.end()); + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + Extended.insert(pred_closure_begin(*it), pred_closure_end(*it)); + + if (FailedTestsCache.count(Extended)) + return false; + + bool Result = ExecuteOneTest(Extended); + if (!Result) + FailedTestsCache.insert(Extended); + + return Result; +} + +DAGDeltaAlgorithm::changeset_ty +DAGDeltaAlgorithmImpl::Run() { + // The current set of changes we are minimizing, starting at the roots. + changeset_ty CurrentSet(Roots.begin(), Roots.end()); + + // The set of required changes. + changeset_ty Required; + + // Iterate until the active set of changes is empty. Convergence is guaranteed + // assuming input was a DAG. + // + // Invariant: CurrentSet intersect Required == {} + // Invariant: Required == (Required union succ*(Required)) + while (!CurrentSet.empty()) { + LLVM_DEBUG({ + llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, " + << Required.size() << " required changes\n"; + }); + + // Minimize the current set of changes. + DeltaActiveSetHelper Helper(*this, Required); + changeset_ty CurrentMinSet = Helper.Run(CurrentSet); + + // Update the set of required changes. Since + // CurrentMinSet subset CurrentSet + // and after the last iteration, + // succ(CurrentSet) subset Required + // then + // succ(CurrentMinSet) subset Required + // and our invariant on Required is maintained. + Required.insert(CurrentMinSet.begin(), CurrentMinSet.end()); + + // Replace the current set with the predecssors of the minimized set of + // active changes. + CurrentSet.clear(); + for (changeset_ty::const_iterator it = CurrentMinSet.begin(), + ie = CurrentMinSet.end(); it != ie; ++it) + CurrentSet.insert(pred_begin(*it), pred_end(*it)); + + // FIXME: We could enforce CurrentSet intersect Required == {} here if we + // wanted to protect against cyclic graphs. + } + + return Required; +} + +void DAGDeltaAlgorithm::anchor() { +} + +DAGDeltaAlgorithm::changeset_ty +DAGDeltaAlgorithm::Run(const changeset_ty &Changes, + const std::vector<edge_ty> &Dependencies) { + return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run(); +} diff --git a/llvm/lib/Support/DJB.cpp b/llvm/lib/Support/DJB.cpp new file mode 100644 index 0000000000000..f06af7dfde444 --- /dev/null +++ b/llvm/lib/Support/DJB.cpp @@ -0,0 +1,82 @@ +//===-- Support/DJB.cpp ---DJB Hash -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for the DJ Bernstein hash function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DJB.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Unicode.h" + +using namespace llvm; + +static UTF32 chopOneUTF32(StringRef &Buffer) { + UTF32 C; + const UTF8 *const Begin8Const = + reinterpret_cast<const UTF8 *>(Buffer.begin()); + const UTF8 *Begin8 = Begin8Const; + UTF32 *Begin32 = &C; + + // In lenient mode we will always end up with a "reasonable" value in C for + // non-empty input. + assert(!Buffer.empty()); + ConvertUTF8toUTF32(&Begin8, reinterpret_cast<const UTF8 *>(Buffer.end()), + &Begin32, &C + 1, lenientConversion); + Buffer = Buffer.drop_front(Begin8 - Begin8Const); + return C; +} + +static StringRef toUTF8(UTF32 C, MutableArrayRef<UTF8> Storage) { + const UTF32 *Begin32 = &C; + UTF8 *Begin8 = Storage.begin(); + + // The case-folded output should always be a valid unicode character, so use + // strict mode here. + ConversionResult CR = ConvertUTF32toUTF8(&Begin32, &C + 1, &Begin8, + Storage.end(), strictConversion); + assert(CR == conversionOK && "Case folding produced invalid char?"); + (void)CR; + return StringRef(reinterpret_cast<char *>(Storage.begin()), + Begin8 - Storage.begin()); +} + +static UTF32 foldCharDwarf(UTF32 C) { + // DWARF v5 addition to the unicode folding rules. + // Fold "Latin Small Letter Dotless I" and "Latin Capital Letter I With Dot + // Above" into "i". + if (C == 0x130 || C == 0x131) + return 'i'; + return sys::unicode::foldCharSimple(C); +} + +static Optional<uint32_t> fastCaseFoldingDjbHash(StringRef Buffer, uint32_t H) { + bool AllASCII = true; + for (unsigned char C : Buffer) { + H = H * 33 + ('A' <= C && C <= 'Z' ? C - 'A' + 'a' : C); + AllASCII &= C <= 0x7f; + } + if (AllASCII) + return H; + return None; +} + +uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) { + if (Optional<uint32_t> Result = fastCaseFoldingDjbHash(Buffer, H)) + return *Result; + + std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage; + while (!Buffer.empty()) { + UTF32 C = foldCharDwarf(chopOneUTF32(Buffer)); + StringRef Folded = toUTF8(C, Storage); + H = djbHash(Folded, H); + } + return H; +} diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp new file mode 100644 index 0000000000000..a98297cdb35f2 --- /dev/null +++ b/llvm/lib/Support/DataExtractor.cpp @@ -0,0 +1,218 @@ +//===-- DataExtractor.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/SwapByteOrder.h" + +using namespace llvm; + +static void unexpectedEndReached(Error *E) { + if (E) + *E = createStringError(errc::illegal_byte_sequence, + "unexpected end of data"); +} + +static bool isError(Error *E) { return E && *E; } + +template <typename T> +static T getU(uint64_t *offset_ptr, const DataExtractor *de, + bool isLittleEndian, const char *Data, llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + T val = 0; + if (isError(Err)) + return val; + + uint64_t offset = *offset_ptr; + if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) { + unexpectedEndReached(Err); + return val; + } + std::memcpy(&val, &Data[offset], sizeof(val)); + if (sys::IsLittleEndianHost != isLittleEndian) + sys::swapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); + return val; +} + +template <typename T> +static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count, + const DataExtractor *de, bool isLittleEndian, const char *Data, + llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return nullptr; + + uint64_t offset = *offset_ptr; + + if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) { + unexpectedEndReached(Err); + return nullptr; + } + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data, Err); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; +} + +uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); +} + +uint8_t * +DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const { + return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data(), nullptr); +} + +uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const { + return getUs<uint8_t>(&C.Offset, Dst, Count, this, IsLittleEndian, + Data.data(), &C.Err); +} + +uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); +} + +uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst, + uint32_t count) const { + return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data(), nullptr); +} + +uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const { + uint24_t ExtractedVal = + getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data(), nullptr); + // The 3 bytes are in the correct byte order for the host. + return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); +} + +uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); +} + +uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst, + uint32_t count) const { + return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data(), nullptr); +} + +uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); +} + +uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst, + uint32_t count) const { + return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian, + Data.data(), nullptr); +} + +uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, + llvm::Error *Err) const { + switch (byte_size) { + case 1: + return getU8(offset_ptr, Err); + case 2: + return getU16(offset_ptr, Err); + case 4: + return getU32(offset_ptr, Err); + case 8: + return getU64(offset_ptr, Err); + } + llvm_unreachable("getUnsigned unhandled case!"); +} + +int64_t +DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const { + switch (byte_size) { + case 1: + return (int8_t)getU8(offset_ptr); + case 2: + return (int16_t)getU16(offset_ptr); + case 4: + return (int32_t)getU32(offset_ptr); + case 8: + return (int64_t)getU64(offset_ptr); + } + llvm_unreachable("getSigned unhandled case!"); +} + +const char *DataExtractor::getCStr(uint64_t *offset_ptr) const { + uint64_t offset = *offset_ptr; + StringRef::size_type pos = Data.find('\0', offset); + if (pos != StringRef::npos) { + *offset_ptr = pos + 1; + return Data.data() + offset; + } + return nullptr; +} + +StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const { + uint64_t Start = *offset_ptr; + StringRef::size_type Pos = Data.find('\0', Start); + if (Pos != StringRef::npos) { + *offset_ptr = Pos + 1; + return StringRef(Data.data() + Start, Pos - Start); + } + return StringRef(); +} + +uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, + llvm::Error *Err) const { + assert(*offset_ptr <= Data.size()); + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return 0; + + const char *error; + unsigned bytes_read; + uint64_t result = decodeULEB128( + reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read, + reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error); + if (error) { + if (Err) + *Err = createStringError(errc::illegal_byte_sequence, error); + return 0; + } + *offset_ptr += bytes_read; + return result; +} + +int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const { + assert(*offset_ptr <= Data.size()); + + const char *error; + unsigned bytes_read; + int64_t result = decodeSLEB128( + reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read, + reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error); + if (error) + return 0; + *offset_ptr += bytes_read; + return result; +} + +void DataExtractor::skip(Cursor &C, uint64_t Length) const { + ErrorAsOutParameter ErrAsOut(&C.Err); + if (isError(&C.Err)) + return; + + if (isValidOffsetForDataOfSize(C.Offset, Length)) + C.Offset += Length; + else + unexpectedEndReached(&C.Err); +} diff --git a/llvm/lib/Support/Debug.cpp b/llvm/lib/Support/Debug.cpp new file mode 100644 index 0000000000000..737cd576ed80f --- /dev/null +++ b/llvm/lib/Support/Debug.cpp @@ -0,0 +1,165 @@ +//===-- Debug.cpp - An easy way to add debug output to your code ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a handy way of adding debugging information to your +// code, without it being enabled all of the time, and without having to add +// command line options to enable it. +// +// In particular, just wrap your code with the LLVM_DEBUG() macro, and it will +// be enabled automatically if you specify '-debug' on the command-line. +// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify +// that your debug code belongs to class "foo". Then, on the command line, you +// can specify '-debug-only=foo' to enable JUST the debug information for the +// foo class. +// +// When compiling without assertions, the -debug-* options and all code in +// LLVM_DEBUG() statements disappears, so it does not affect the runtime of the +// code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/raw_ostream.h" + +#undef isCurrentDebugType +#undef setCurrentDebugType +#undef setCurrentDebugTypes + +using namespace llvm; + +// Even though LLVM might be built with NDEBUG, define symbols that the code +// built without NDEBUG can depend on via the llvm/Support/Debug.h header. +namespace llvm { +/// Exported boolean set by the -debug option. +bool DebugFlag = false; + +static ManagedStatic<std::vector<std::string>> CurrentDebugType; + +/// Return true if the specified string is the debug type +/// specified on the command line, or if none was specified on the command line +/// with the -debug-only=X option. +bool isCurrentDebugType(const char *DebugType) { + if (CurrentDebugType->empty()) + return true; + // See if DebugType is in list. Note: do not use find() as that forces us to + // unnecessarily create an std::string instance. + for (auto &d : *CurrentDebugType) { + if (d == DebugType) + return true; + } + return false; +} + +/// Set the current debug type, as if the -debug-only=X +/// option were specified. Note that DebugFlag also needs to be set to true for +/// debug output to be produced. +/// +void setCurrentDebugTypes(const char **Types, unsigned Count); + +void setCurrentDebugType(const char *Type) { + setCurrentDebugTypes(&Type, 1); +} + +void setCurrentDebugTypes(const char **Types, unsigned Count) { + CurrentDebugType->clear(); + for (size_t T = 0; T < Count; ++T) + CurrentDebugType->push_back(Types[T]); +} +} // namespace llvm + +// All Debug.h functionality is a no-op in NDEBUG mode. +#ifndef NDEBUG + +// -debug - Command line option to enable the DEBUG statements in the passes. +// This flag may only be enabled in debug builds. +static cl::opt<bool, true> +Debug("debug", cl::desc("Enable debug output"), cl::Hidden, + cl::location(DebugFlag)); + +// -debug-buffer-size - Buffer the last N characters of debug output +//until program termination. +static cl::opt<unsigned> +DebugBufferSize("debug-buffer-size", + cl::desc("Buffer the last N characters of debug output " + "until program termination. " + "[default 0 -- immediate print-out]"), + cl::Hidden, + cl::init(0)); + +namespace { + +struct DebugOnlyOpt { + void operator=(const std::string &Val) const { + if (Val.empty()) + return; + DebugFlag = true; + SmallVector<StringRef,8> dbgTypes; + StringRef(Val).split(dbgTypes, ',', -1, false); + for (auto dbgType : dbgTypes) + CurrentDebugType->push_back(dbgType); + } +}; + +} + +static DebugOnlyOpt DebugOnlyOptLoc; + +static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> > +DebugOnly("debug-only", cl::desc("Enable a specific type of debug output (comma separated list of types)"), + cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"), + cl::location(DebugOnlyOptLoc), cl::ValueRequired); +// Signal handlers - dump debug output on termination. +static void debug_user_sig_handler(void *Cookie) { + // This is a bit sneaky. Since this is under #ifndef NDEBUG, we + // know that debug mode is enabled and dbgs() really is a + // circular_raw_ostream. If NDEBUG is defined, then dbgs() == + // errs() but this will never be invoked. + llvm::circular_raw_ostream &dbgout = + static_cast<circular_raw_ostream &>(llvm::dbgs()); + dbgout.flushBufferWithBanner(); +} + +/// dbgs - Return a circular-buffered debug stream. +raw_ostream &llvm::dbgs() { + // Do one-time initialization in a thread-safe way. + static struct dbgstream { + circular_raw_ostream strm; + + dbgstream() : + strm(errs(), "*** Debug Log Output ***\n", + (!EnableDebugBuffering || !DebugFlag) ? 0 : DebugBufferSize) { + if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0) + // TODO: Add a handler for SIGUSER1-type signals so the user can + // force a debug dump. + sys::AddSignalHandler(&debug_user_sig_handler, nullptr); + // Otherwise we've already set the debug stream buffer size to + // zero, disabling buffering so it will output directly to errs(). + } + } thestrm; + + return thestrm.strm; +} + +#else +// Avoid "has no symbols" warning. +namespace llvm { + /// dbgs - Return errs(). + raw_ostream &dbgs() { + return errs(); + } +} + +#endif + +/// EnableDebugBuffering - Turn on signal handler installation. +/// +bool llvm::EnableDebugBuffering = false; diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp new file mode 100644 index 0000000000000..6598103658daa --- /dev/null +++ b/llvm/lib/Support/DebugCounter.cpp @@ -0,0 +1,136 @@ +#include "llvm/Support/DebugCounter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Options.h" + +using namespace llvm; + +namespace { +// This class overrides the default list implementation of printing so we +// can pretty print the list of debug counter options. This type of +// dynamic option is pretty rare (basically this and pass lists). +class DebugCounterList : public cl::list<std::string, DebugCounter> { +private: + using Base = cl::list<std::string, DebugCounter>; + +public: + template <class... Mods> + explicit DebugCounterList(Mods &&... Ms) : Base(std::forward<Mods>(Ms)...) {} + +private: + void printOptionInfo(size_t GlobalWidth) const override { + // This is a variant of from generic_parser_base::printOptionInfo. Sadly, + // it's not easy to make it more usable. We could get it to print these as + // options if we were a cl::opt and registered them, but lists don't have + // options, nor does the parser for std::string. The other mechanisms for + // options are global and would pollute the global namespace with our + // counters. Rather than go that route, we have just overridden the + // printing, which only a few things call anyway. + outs() << " -" << ArgStr; + // All of the other options in CommandLine.cpp use ArgStr.size() + 6 for + // width, so we do the same. + Option::printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6); + const auto &CounterInstance = DebugCounter::instance(); + for (auto Name : CounterInstance) { + const auto Info = + CounterInstance.getCounterInfo(CounterInstance.getCounterId(Name)); + size_t NumSpaces = GlobalWidth - Info.first.size() - 8; + outs() << " =" << Info.first; + outs().indent(NumSpaces) << " - " << Info.second << '\n'; + } + } +}; +} // namespace + +// Create our command line option. +static DebugCounterList DebugCounterOption( + "debug-counter", cl::Hidden, + cl::desc("Comma separated list of debug counter skip and count"), + cl::CommaSeparated, cl::ZeroOrMore, cl::location(DebugCounter::instance())); + +static cl::opt<bool> PrintDebugCounter( + "print-debug-counter", cl::Hidden, cl::init(false), cl::Optional, + cl::desc("Print out debug counter info after all counters accumulated")); + +static ManagedStatic<DebugCounter> DC; + +// Print information when destroyed, iff command line option is specified. +DebugCounter::~DebugCounter() { + if (isCountingEnabled() && PrintDebugCounter) + print(dbgs()); +} + +DebugCounter &DebugCounter::instance() { return *DC; } + +// This is called by the command line parser when it sees a value for the +// debug-counter option defined above. +void DebugCounter::push_back(const std::string &Val) { + if (Val.empty()) + return; + // The strings should come in as counter=value + auto CounterPair = StringRef(Val).split('='); + if (CounterPair.second.empty()) { + errs() << "DebugCounter Error: " << Val << " does not have an = in it\n"; + return; + } + // Now we have counter=value. + // First, process value. + int64_t CounterVal; + if (CounterPair.second.getAsInteger(0, CounterVal)) { + errs() << "DebugCounter Error: " << CounterPair.second + << " is not a number\n"; + return; + } + // Now we need to see if this is the skip or the count, remove the suffix, and + // add it to the counter values. + if (CounterPair.first.endswith("-skip")) { + auto CounterName = CounterPair.first.drop_back(5); + unsigned CounterID = getCounterId(CounterName); + if (!CounterID) { + errs() << "DebugCounter Error: " << CounterName + << " is not a registered counter\n"; + return; + } + enableAllCounters(); + + CounterInfo &Counter = Counters[CounterID]; + Counter.Skip = CounterVal; + Counter.IsSet = true; + } else if (CounterPair.first.endswith("-count")) { + auto CounterName = CounterPair.first.drop_back(6); + unsigned CounterID = getCounterId(CounterName); + if (!CounterID) { + errs() << "DebugCounter Error: " << CounterName + << " is not a registered counter\n"; + return; + } + enableAllCounters(); + + CounterInfo &Counter = Counters[CounterID]; + Counter.StopAfter = CounterVal; + Counter.IsSet = true; + } else { + errs() << "DebugCounter Error: " << CounterPair.first + << " does not end with -skip or -count\n"; + } +} + +void DebugCounter::print(raw_ostream &OS) const { + SmallVector<StringRef, 16> CounterNames(RegisteredCounters.begin(), + RegisteredCounters.end()); + sort(CounterNames.begin(), CounterNames.end()); + + auto &Us = instance(); + OS << "Counters and values:\n"; + for (auto &CounterName : CounterNames) { + unsigned CounterID = getCounterId(CounterName); + OS << left_justify(RegisteredCounters[CounterID], 32) << ": {" + << Us.Counters[CounterID].Count << "," << Us.Counters[CounterID].Skip + << "," << Us.Counters[CounterID].StopAfter << "}\n"; + } +} + +LLVM_DUMP_METHOD void DebugCounter::dump() const { + print(dbgs()); +} diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp new file mode 100644 index 0000000000000..6aee69f434054 --- /dev/null +++ b/llvm/lib/Support/DeltaAlgorithm.cpp @@ -0,0 +1,114 @@ +//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DeltaAlgorithm.h" +#include <algorithm> +#include <iterator> +#include <set> +using namespace llvm; + +DeltaAlgorithm::~DeltaAlgorithm() { +} + +bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) { + if (FailedTestsCache.count(Changes)) + return false; + + bool Result = ExecuteOneTest(Changes); + if (!Result) + FailedTestsCache.insert(Changes); + + return Result; +} + +void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) { + // FIXME: Allow clients to provide heuristics for improved splitting. + + // FIXME: This is really slow. + changeset_ty LHS, RHS; + unsigned idx = 0, N = S.size() / 2; + for (changeset_ty::const_iterator it = S.begin(), + ie = S.end(); it != ie; ++it, ++idx) + ((idx < N) ? LHS : RHS).insert(*it); + if (!LHS.empty()) + Res.push_back(LHS); + if (!RHS.empty()) + Res.push_back(RHS); +} + +DeltaAlgorithm::changeset_ty +DeltaAlgorithm::Delta(const changeset_ty &Changes, + const changesetlist_ty &Sets) { + // Invariant: union(Res) == Changes + UpdatedSearchState(Changes, Sets); + + // If there is nothing left we can remove, we are done. + if (Sets.size() <= 1) + return Changes; + + // Look for a passing subset. + changeset_ty Res; + if (Search(Changes, Sets, Res)) + return Res; + + // Otherwise, partition the sets if possible; if not we are done. + changesetlist_ty SplitSets; + for (changesetlist_ty::const_iterator it = Sets.begin(), + ie = Sets.end(); it != ie; ++it) + Split(*it, SplitSets); + if (SplitSets.size() == Sets.size()) + return Changes; + + return Delta(Changes, SplitSets); +} + +bool DeltaAlgorithm::Search(const changeset_ty &Changes, + const changesetlist_ty &Sets, + changeset_ty &Res) { + // FIXME: Parallelize. + for (changesetlist_ty::const_iterator it = Sets.begin(), + ie = Sets.end(); it != ie; ++it) { + // If the test passes on this subset alone, recurse. + if (GetTestResult(*it)) { + changesetlist_ty Sets; + Split(*it, Sets); + Res = Delta(*it, Sets); + return true; + } + + // Otherwise, if we have more than two sets, see if test passes on the + // complement. + if (Sets.size() > 2) { + // FIXME: This is really slow. + changeset_ty Complement; + std::set_difference( + Changes.begin(), Changes.end(), it->begin(), it->end(), + std::insert_iterator<changeset_ty>(Complement, Complement.begin())); + if (GetTestResult(Complement)) { + changesetlist_ty ComplementSets; + ComplementSets.insert(ComplementSets.end(), Sets.begin(), it); + ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end()); + Res = Delta(Complement, ComplementSets); + return true; + } + } + } + + return false; +} + +DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) { + // Check empty set first to quickly find poor test functions. + if (GetTestResult(changeset_ty())) + return changeset_ty(); + + // Otherwise run the real delta algorithm. + changesetlist_ty Sets; + Split(Changes, Sets); + + return Delta(Changes, Sets); +} diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp new file mode 100644 index 0000000000000..d23716016fb29 --- /dev/null +++ b/llvm/lib/Support/DynamicLibrary.cpp @@ -0,0 +1,215 @@ +//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system DynamicLibrary concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DynamicLibrary.h" +#include "llvm-c/Support.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Config/config.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include <cstdio> +#include <cstring> +#include <vector> + +using namespace llvm; +using namespace llvm::sys; + +// All methods for HandleSet should be used holding SymbolsMutex. +class DynamicLibrary::HandleSet { + typedef std::vector<void *> HandleList; + HandleList Handles; + void *Process; + +public: + static void *DLOpen(const char *Filename, std::string *Err); + static void DLClose(void *Handle); + static void *DLSym(void *Handle, const char *Symbol); + + HandleSet() : Process(nullptr) {} + ~HandleSet(); + + HandleList::iterator Find(void *Handle) { + return std::find(Handles.begin(), Handles.end(), Handle); + } + + bool Contains(void *Handle) { + return Handle == Process || Find(Handle) != Handles.end(); + } + + bool AddLibrary(void *Handle, bool IsProcess = false, bool CanClose = true) { +#ifdef _WIN32 + assert((Handle == this ? IsProcess : !IsProcess) && "Bad Handle."); +#endif + + if (LLVM_LIKELY(!IsProcess)) { + if (Find(Handle) != Handles.end()) { + if (CanClose) + DLClose(Handle); + return false; + } + Handles.push_back(Handle); + } else { +#ifndef _WIN32 + if (Process) { + if (CanClose) + DLClose(Process); + if (Process == Handle) + return false; + } +#endif + Process = Handle; + } + return true; + } + + void *LibLookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + if (Order & SO_LoadOrder) { + for (void *Handle : Handles) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } else { + for (void *Handle : llvm::reverse(Handles)) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } + return nullptr; + } + + void *Lookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + assert(!((Order & SO_LoadedFirst) && (Order & SO_LoadedLast)) && + "Invalid Ordering"); + + if (!Process || (Order & SO_LoadedFirst)) { + if (void *Ptr = LibLookup(Symbol, Order)) + return Ptr; + } + if (Process) { + // Use OS facilities to search the current binary and all loaded libs. + if (void *Ptr = DLSym(Process, Symbol)) + return Ptr; + + // Search any libs that might have been skipped because of RTLD_LOCAL. + if (Order & SO_LoadedLast) { + if (void *Ptr = LibLookup(Symbol, Order)) + return Ptr; + } + } + return nullptr; + } +}; + +namespace { +// Collection of symbol name/value pairs to be searched prior to any libraries. +static llvm::ManagedStatic<llvm::StringMap<void *>> ExplicitSymbols; +// Collection of known library handles. +static llvm::ManagedStatic<DynamicLibrary::HandleSet> OpenedHandles; +// Lock for ExplicitSymbols and OpenedHandles. +static llvm::ManagedStatic<llvm::sys::SmartMutex<true>> SymbolsMutex; +} + +#ifdef _WIN32 + +#include "Windows/DynamicLibrary.inc" + +#else + +#include "Unix/DynamicLibrary.inc" + +#endif + +char DynamicLibrary::Invalid; +DynamicLibrary::SearchOrdering DynamicLibrary::SearchOrder = + DynamicLibrary::SO_Linker; + +namespace llvm { +void *SearchForAddressOfSpecialSymbol(const char *SymbolName) { + return DoSearch(SymbolName); // DynamicLibrary.inc +} +} + +void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) { + SmartScopedLock<true> Lock(*SymbolsMutex); + (*ExplicitSymbols)[SymbolName] = SymbolValue; +} + +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, + std::string *Err) { + // Force OpenedHandles to be added into the ManagedStatic list before any + // ManagedStatic can be added from static constructors in HandleSet::DLOpen. + HandleSet& HS = *OpenedHandles; + + void *Handle = HandleSet::DLOpen(FileName, Err); + if (Handle != &Invalid) { + SmartScopedLock<true> Lock(*SymbolsMutex); + HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); + } + + return DynamicLibrary(Handle); +} + +DynamicLibrary DynamicLibrary::addPermanentLibrary(void *Handle, + std::string *Err) { + SmartScopedLock<true> Lock(*SymbolsMutex); + // If we've already loaded this library, tell the caller. + if (!OpenedHandles->AddLibrary(Handle, /*IsProcess*/false, /*CanClose*/false)) + *Err = "Library already loaded"; + + return DynamicLibrary(Handle); +} + +void *DynamicLibrary::getAddressOfSymbol(const char *SymbolName) { + if (!isValid()) + return nullptr; + return HandleSet::DLSym(Data, SymbolName); +} + +void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) { + { + SmartScopedLock<true> Lock(*SymbolsMutex); + + // First check symbols added via AddSymbol(). + if (ExplicitSymbols.isConstructed()) { + StringMap<void *>::iterator i = ExplicitSymbols->find(SymbolName); + + if (i != ExplicitSymbols->end()) + return i->second; + } + + // Now search the libraries. + if (OpenedHandles.isConstructed()) { + if (void *Ptr = OpenedHandles->Lookup(SymbolName, SearchOrder)) + return Ptr; + } + } + + return llvm::SearchForAddressOfSpecialSymbol(SymbolName); +} + +//===----------------------------------------------------------------------===// +// C API. +//===----------------------------------------------------------------------===// + +LLVMBool LLVMLoadLibraryPermanently(const char *Filename) { + return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); +} + +void *LLVMSearchForAddressOfSymbol(const char *symbolName) { + return llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(symbolName); +} + +void LLVMAddSymbol(const char *symbolName, void *symbolValue) { + return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue); +} diff --git a/llvm/lib/Support/Errno.cpp b/llvm/lib/Support/Errno.cpp new file mode 100644 index 0000000000000..d18231c6ebf5e --- /dev/null +++ b/llvm/lib/Support/Errno.cpp @@ -0,0 +1,75 @@ +//===- Errno.cpp - errno support --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the errno wrappers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Errno.h" +#include "llvm/Config/config.h" +#include "llvm/Support/raw_ostream.h" +#include <string.h> + +#if HAVE_ERRNO_H +#include <errno.h> +#endif + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +namespace llvm { +namespace sys { + +#if HAVE_ERRNO_H +std::string StrError() { + return StrError(errno); +} +#endif // HAVE_ERRNO_H + +std::string StrError(int errnum) { + std::string str; + if (errnum == 0) + return str; +#if defined(HAVE_STRERROR_R) || HAVE_DECL_STRERROR_S + const int MaxErrStrLen = 2000; + char buffer[MaxErrStrLen]; + buffer[0] = '\0'; +#endif + +#ifdef HAVE_STRERROR_R + // strerror_r is thread-safe. +#if defined(__GLIBC__) && defined(_GNU_SOURCE) + // glibc defines its own incompatible version of strerror_r + // which may not use the buffer supplied. + str = strerror_r(errnum, buffer, MaxErrStrLen - 1); +#else + strerror_r(errnum, buffer, MaxErrStrLen - 1); + str = buffer; +#endif +#elif HAVE_DECL_STRERROR_S // "Windows Secure API" + strerror_s(buffer, MaxErrStrLen - 1, errnum); + str = buffer; +#elif defined(HAVE_STRERROR) + // Copy the thread un-safe result of strerror into + // the buffer as fast as possible to minimize impact + // of collision of strerror in multiple threads. + str = strerror(errnum); +#else + // Strange that this system doesn't even have strerror + // but, oh well, just use a generic message + raw_string_ostream stream(str); + stream << "Error #" << errnum; + stream.flush(); +#endif + return str; +} + +} // namespace sys +} // namespace llvm diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp new file mode 100644 index 0000000000000..9ea08c37478e9 --- /dev/null +++ b/llvm/lib/Support/Error.cpp @@ -0,0 +1,169 @@ +//===----- lib/Support/Error.cpp - Error and associated utilities ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include <system_error> + +using namespace llvm; + +namespace { + + enum class ErrorErrorCode : int { + MultipleErrors = 1, + FileError, + InconvertibleError + }; + + // FIXME: This class is only here to support the transition to llvm::Error. It + // will be removed once this transition is complete. Clients should prefer to + // deal with the Error value directly, rather than converting to error_code. + class ErrorErrorCategory : public std::error_category { + public: + const char *name() const noexcept override { return "Error"; } + + std::string message(int condition) const override { + switch (static_cast<ErrorErrorCode>(condition)) { + case ErrorErrorCode::MultipleErrors: + return "Multiple errors"; + case ErrorErrorCode::InconvertibleError: + return "Inconvertible error value. An error has occurred that could " + "not be converted to a known std::error_code. Please file a " + "bug."; + case ErrorErrorCode::FileError: + return "A file error occurred."; + } + llvm_unreachable("Unhandled error code"); + } + }; + +} + +static ManagedStatic<ErrorErrorCategory> ErrorErrorCat; + +namespace llvm { + +void ErrorInfoBase::anchor() {} +char ErrorInfoBase::ID = 0; +char ErrorList::ID = 0; +void ECError::anchor() {} +char ECError::ID = 0; +char StringError::ID = 0; +char FileError::ID = 0; + +void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) { + if (!E) + return; + OS << ErrorBanner; + handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { + EI.log(OS); + OS << "\n"; + }); +} + + +std::error_code ErrorList::convertToErrorCode() const { + return std::error_code(static_cast<int>(ErrorErrorCode::MultipleErrors), + *ErrorErrorCat); +} + +std::error_code inconvertibleErrorCode() { + return std::error_code(static_cast<int>(ErrorErrorCode::InconvertibleError), + *ErrorErrorCat); +} + +std::error_code FileError::convertToErrorCode() const { + return std::error_code(static_cast<int>(ErrorErrorCode::FileError), + *ErrorErrorCat); +} + +Error errorCodeToError(std::error_code EC) { + if (!EC) + return Error::success(); + return Error(std::make_unique<ECError>(ECError(EC))); +} + +std::error_code errorToErrorCode(Error Err) { + std::error_code EC; + handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { + EC = EI.convertToErrorCode(); + }); + if (EC == inconvertibleErrorCode()) + report_fatal_error(EC.message()); + return EC; +} + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +void Error::fatalUncheckedError() const { + dbgs() << "Program aborted due to an unhandled Error:\n"; + if (getPtr()) + getPtr()->log(dbgs()); + else + dbgs() << "Error value was Success. (Note: Success values must still be " + "checked prior to being destroyed).\n"; + abort(); +} +#endif + +StringError::StringError(std::error_code EC, const Twine &S) + : Msg(S.str()), EC(EC) {} + +StringError::StringError(const Twine &S, std::error_code EC) + : Msg(S.str()), EC(EC), PrintMsgOnly(true) {} + +void StringError::log(raw_ostream &OS) const { + if (PrintMsgOnly) { + OS << Msg; + } else { + OS << EC.message(); + if (!Msg.empty()) + OS << (" " + Msg); + } +} + +std::error_code StringError::convertToErrorCode() const { + return EC; +} + +Error createStringError(std::error_code EC, char const *Msg) { + return make_error<StringError>(Msg, EC); +} + +void report_fatal_error(Error Err, bool GenCrashDiag) { + assert(Err && "report_fatal_error called with success value"); + std::string ErrMsg; + { + raw_string_ostream ErrStream(ErrMsg); + logAllUnhandledErrors(std::move(Err), ErrStream); + } + report_fatal_error(ErrMsg); +} + +} // end namespace llvm + +LLVMErrorTypeId LLVMGetErrorTypeId(LLVMErrorRef Err) { + return reinterpret_cast<ErrorInfoBase *>(Err)->dynamicClassID(); +} + +void LLVMConsumeError(LLVMErrorRef Err) { consumeError(unwrap(Err)); } + +char *LLVMGetErrorMessage(LLVMErrorRef Err) { + std::string Tmp = toString(unwrap(Err)); + char *ErrMsg = new char[Tmp.size() + 1]; + memcpy(ErrMsg, Tmp.data(), Tmp.size()); + ErrMsg[Tmp.size()] = '\0'; + return ErrMsg; +} + +void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; } + +LLVMErrorTypeId LLVMGetStringErrorTypeId() { + return reinterpret_cast<void *>(&StringError::ID); +} diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp new file mode 100644 index 0000000000000..0f13f7a536f1d --- /dev/null +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -0,0 +1,298 @@ +//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an API used to indicate fatal error conditions. Non-fatal +// errors (most of them) should be handled through LLVMContext. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ErrorHandling.h" +#include "llvm-c/ErrorHandling.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/WindowsError.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdlib> +#include <mutex> +#include <new> + +#if defined(HAVE_UNISTD_H) +# include <unistd.h> +#endif +#if defined(_MSC_VER) +# include <io.h> +# include <fcntl.h> +#endif + +using namespace llvm; + +static fatal_error_handler_t ErrorHandler = nullptr; +static void *ErrorHandlerUserData = nullptr; + +static fatal_error_handler_t BadAllocErrorHandler = nullptr; +static void *BadAllocErrorHandlerUserData = nullptr; + +#if LLVM_ENABLE_THREADS == 1 +// Mutexes to synchronize installing error handlers and calling error handlers. +// Do not use ManagedStatic, or that may allocate memory while attempting to +// report an OOM. +// +// This usage of std::mutex has to be conditionalized behind ifdefs because +// of this script: +// compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +// That script attempts to statically link the LLVM symbolizer library with the +// STL and hide all of its symbols with 'opt -internalize'. To reduce size, it +// cuts out the threading portions of the hermetic copy of libc++ that it +// builds. We can remove these ifdefs if that script goes away. +static std::mutex ErrorHandlerMutex; +static std::mutex BadAllocErrorHandlerMutex; +#endif + +void llvm::install_fatal_error_handler(fatal_error_handler_t handler, + void *user_data) { +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard<std::mutex> Lock(ErrorHandlerMutex); +#endif + assert(!ErrorHandler && "Error handler already registered!\n"); + ErrorHandler = handler; + ErrorHandlerUserData = user_data; +} + +void llvm::remove_fatal_error_handler() { +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard<std::mutex> Lock(ErrorHandlerMutex); +#endif + ErrorHandler = nullptr; + ErrorHandlerUserData = nullptr; +} + +void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) { + report_fatal_error(Twine(Reason), GenCrashDiag); +} + +void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) { + report_fatal_error(Twine(Reason), GenCrashDiag); +} + +void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) { + report_fatal_error(Twine(Reason), GenCrashDiag); +} + +void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { + llvm::fatal_error_handler_t handler = nullptr; + void* handlerData = nullptr; + { + // Only acquire the mutex while reading the handler, so as not to invoke a + // user-supplied callback under a lock. +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard<std::mutex> Lock(ErrorHandlerMutex); +#endif + handler = ErrorHandler; + handlerData = ErrorHandlerUserData; + } + + if (handler) { + handler(handlerData, Reason.str(), GenCrashDiag); + } else { + // Blast the result out to stderr. We don't try hard to make sure this + // succeeds (e.g. handling EINTR) and we can't use errs() here because + // raw ostreams can call report_fatal_error. + SmallVector<char, 64> Buffer; + raw_svector_ostream OS(Buffer); + OS << "LLVM ERROR: " << Reason << "\n"; + StringRef MessageStr = OS.str(); + ssize_t written = ::write(2, MessageStr.data(), MessageStr.size()); + (void)written; // If something went wrong, we deliberately just give up. + } + + // If we reached here, we are failing ungracefully. Run the interrupt handlers + // to make sure any special cleanups get done, in particular that we remove + // files registered with RemoveFileOnSignal. + sys::RunInterruptHandlers(); + + exit(1); +} + +void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, + void *user_data) { +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex); +#endif + assert(!ErrorHandler && "Bad alloc error handler already registered!\n"); + BadAllocErrorHandler = handler; + BadAllocErrorHandlerUserData = user_data; +} + +void llvm::remove_bad_alloc_error_handler() { +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex); +#endif + BadAllocErrorHandler = nullptr; + BadAllocErrorHandlerUserData = nullptr; +} + +void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { + fatal_error_handler_t Handler = nullptr; + void *HandlerData = nullptr; + { + // Only acquire the mutex while reading the handler, so as not to invoke a + // user-supplied callback under a lock. +#if LLVM_ENABLE_THREADS == 1 + std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex); +#endif + Handler = BadAllocErrorHandler; + HandlerData = BadAllocErrorHandlerUserData; + } + + if (Handler) { + Handler(HandlerData, Reason, GenCrashDiag); + llvm_unreachable("bad alloc handler should not return"); + } + +#ifdef LLVM_ENABLE_EXCEPTIONS + // If exceptions are enabled, make OOM in malloc look like OOM in new. + throw std::bad_alloc(); +#else + // Don't call the normal error handler. It may allocate memory. Directly write + // an OOM to stderr and abort. + char OOMMessage[] = "LLVM ERROR: out of memory\n"; + ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage)); + (void)written; + abort(); +#endif +} + +#ifdef LLVM_ENABLE_EXCEPTIONS +// Do not set custom new handler if exceptions are enabled. In this case OOM +// errors are handled by throwing 'std::bad_alloc'. +void llvm::install_out_of_memory_new_handler() { +} +#else +// Causes crash on allocation failure. It is called prior to the handler set by +// 'install_bad_alloc_error_handler'. +static void out_of_memory_new_handler() { + llvm::report_bad_alloc_error("Allocation failed"); +} + +// Installs new handler that causes crash on allocation failure. It is called by +// InitLLVM. +void llvm::install_out_of_memory_new_handler() { + std::new_handler old = std::set_new_handler(out_of_memory_new_handler); + (void)old; + assert(old == nullptr && "new-handler already installed"); +} +#endif + +void llvm::llvm_unreachable_internal(const char *msg, const char *file, + unsigned line) { + // This code intentionally doesn't call the ErrorHandler callback, because + // llvm_unreachable is intended to be used to indicate "impossible" + // situations, and not legitimate runtime errors. + if (msg) + dbgs() << msg << "\n"; + dbgs() << "UNREACHABLE executed"; + if (file) + dbgs() << " at " << file << ":" << line; + dbgs() << "!\n"; + abort(); +#ifdef LLVM_BUILTIN_UNREACHABLE + // Windows systems and possibly others don't declare abort() to be noreturn, + // so use the unreachable builtin to avoid a Clang self-host warning. + LLVM_BUILTIN_UNREACHABLE; +#endif +} + +static void bindingsErrorHandler(void *user_data, const std::string& reason, + bool gen_crash_diag) { + LLVMFatalErrorHandler handler = + LLVM_EXTENSION reinterpret_cast<LLVMFatalErrorHandler>(user_data); + handler(reason.c_str()); +} + +void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) { + install_fatal_error_handler(bindingsErrorHandler, + LLVM_EXTENSION reinterpret_cast<void *>(Handler)); +} + +void LLVMResetFatalErrorHandler() { + remove_fatal_error_handler(); +} + +#ifdef _WIN32 + +#include <winerror.h> + +// I'd rather not double the line count of the following. +#define MAP_ERR_TO_COND(x, y) \ + case x: \ + return make_error_code(errc::y) + +std::error_code llvm::mapWindowsError(unsigned EV) { + switch (EV) { + MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied); + MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists); + MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device); + MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long); + MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied); + MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error); + MAP_ERR_TO_COND(ERROR_CANTREAD, io_error); + MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error); + MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied); + MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device); + MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty); + MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument); + MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device); + MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists); + MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory); + MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device); + MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied); + MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device); + MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported); + MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument); + MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument); + MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available); + MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available); + MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument); + MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied); + MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory); + MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again); + MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error); + MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory); + MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory); + MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory); + MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error); + MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again); + MAP_ERR_TO_COND(ERROR_SEEK, io_error); + MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied); + MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open); + MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error); + MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied); + MAP_ERR_TO_COND(WSAEACCES, permission_denied); + MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor); + MAP_ERR_TO_COND(WSAEFAULT, bad_address); + MAP_ERR_TO_COND(WSAEINTR, interrupted); + MAP_ERR_TO_COND(WSAEINVAL, invalid_argument); + MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open); + MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long); + default: + return std::error_code(EV, std::system_category()); + } +} + +#endif diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp new file mode 100644 index 0000000000000..841e406a7b694 --- /dev/null +++ b/llvm/lib/Support/FileCheck.cpp @@ -0,0 +1,1990 @@ +//===- FileCheck.cpp - Check that File's Contents match what is expected --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// FileCheck does a line-by line check of a file that validates whether it +// contains the expected content. This is useful for regression tests etc. +// +// This file implements most of the API that will be used by the FileCheck utility +// as well as various unittests. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileCheck.h" +#include "FileCheckImpl.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/FormatVariadic.h" +#include <cstdint> +#include <list> +#include <tuple> +#include <utility> + +using namespace llvm; + +Expected<uint64_t> FileCheckNumericVariableUse::eval() const { + Optional<uint64_t> Value = NumericVariable->getValue(); + if (Value) + return *Value; + + return make_error<FileCheckUndefVarError>(Name); +} + +Expected<uint64_t> FileCheckASTBinop::eval() const { + Expected<uint64_t> LeftOp = LeftOperand->eval(); + Expected<uint64_t> RightOp = RightOperand->eval(); + + // Bubble up any error (e.g. undefined variables) in the recursive + // evaluation. + if (!LeftOp || !RightOp) { + Error Err = Error::success(); + if (!LeftOp) + Err = joinErrors(std::move(Err), LeftOp.takeError()); + if (!RightOp) + Err = joinErrors(std::move(Err), RightOp.takeError()); + return std::move(Err); + } + + return EvalBinop(*LeftOp, *RightOp); +} + +Expected<std::string> FileCheckNumericSubstitution::getResult() const { + Expected<uint64_t> EvaluatedValue = ExpressionAST->eval(); + if (!EvaluatedValue) + return EvaluatedValue.takeError(); + return utostr(*EvaluatedValue); +} + +Expected<std::string> FileCheckStringSubstitution::getResult() const { + // Look up the value and escape it so that we can put it into the regex. + Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); + if (!VarVal) + return VarVal.takeError(); + return Regex::escape(*VarVal); +} + +bool FileCheckPattern::isValidVarNameStart(char C) { + return C == '_' || isalpha(C); +} + +Expected<FileCheckPattern::VariableProperties> +FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) { + if (Str.empty()) + return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name"); + + bool ParsedOneChar = false; + unsigned I = 0; + bool IsPseudo = Str[0] == '@'; + + // Global vars start with '$'. + if (Str[0] == '$' || IsPseudo) + ++I; + + for (unsigned E = Str.size(); I != E; ++I) { + if (!ParsedOneChar && !isValidVarNameStart(Str[I])) + return FileCheckErrorDiagnostic::get(SM, Str, "invalid variable name"); + + // Variable names are composed of alphanumeric characters and underscores. + if (Str[I] != '_' && !isalnum(Str[I])) + break; + ParsedOneChar = true; + } + + StringRef Name = Str.take_front(I); + Str = Str.substr(I); + return VariableProperties {Name, IsPseudo}; +} + +// StringRef holding all characters considered as horizontal whitespaces by +// FileCheck input canonicalization. +constexpr StringLiteral SpaceChars = " \t"; + +// Parsing helper function that strips the first character in S and returns it. +static char popFront(StringRef &S) { + char C = S.front(); + S = S.drop_front(); + return C; +} + +char FileCheckUndefVarError::ID = 0; +char FileCheckErrorDiagnostic::ID = 0; +char FileCheckNotFoundError::ID = 0; + +Expected<FileCheckNumericVariable *> +FileCheckPattern::parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional<size_t> LineNumber, const SourceMgr &SM) { + Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); + if (!ParseVarResult) + return ParseVarResult.takeError(); + StringRef Name = ParseVarResult->Name; + + if (ParseVarResult->IsPseudo) + return FileCheckErrorDiagnostic::get( + SM, Name, "definition of pseudo numeric variable unsupported"); + + // Detect collisions between string and numeric variables when the latter + // is created later than the former. + if (Context->DefinedVariableTable.find(Name) != + Context->DefinedVariableTable.end()) + return FileCheckErrorDiagnostic::get( + SM, Name, "string variable with name '" + Name + "' already exists"); + + Expr = Expr.ltrim(SpaceChars); + if (!Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, "unexpected characters after numeric variable name"); + + FileCheckNumericVariable *DefinedNumericVariable; + auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); + if (VarTableIter != Context->GlobalNumericVariableTable.end()) + DefinedNumericVariable = VarTableIter->second; + else + DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber); + + return DefinedNumericVariable; +} + +Expected<std::unique_ptr<FileCheckNumericVariableUse>> +FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional<size_t> LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { + if (IsPseudo && !Name.equals("@LINE")) + return FileCheckErrorDiagnostic::get( + SM, Name, "invalid pseudo numeric variable '" + Name + "'"); + + // Numeric variable definitions and uses are parsed in the order in which + // they appear in the CHECK patterns. For each definition, the pointer to the + // class instance of the corresponding numeric variable definition is stored + // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer + // we get below is null, it means no such variable was defined before. When + // that happens, we create a dummy variable so that parsing can continue. All + // uses of undefined variables, whether string or numeric, are then diagnosed + // in printSubstitutions() after failing to match. + auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); + FileCheckNumericVariable *NumericVariable; + if (VarTableIter != Context->GlobalNumericVariableTable.end()) + NumericVariable = VarTableIter->second; + else { + NumericVariable = Context->makeNumericVariable(Name); + Context->GlobalNumericVariableTable[Name] = NumericVariable; + } + + Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); + if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) + return FileCheckErrorDiagnostic::get( + SM, Name, + "numeric variable '" + Name + + "' defined earlier in the same CHECK directive"); + + return std::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable); +} + +Expected<std::unique_ptr<FileCheckExpressionAST>> +FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional<size_t> LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { + if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { + // Try to parse as a numeric variable use. + Expected<FileCheckPattern::VariableProperties> ParseVarResult = + parseVariable(Expr, SM); + if (ParseVarResult) + return parseNumericVariableUse(ParseVarResult->Name, + ParseVarResult->IsPseudo, LineNumber, + Context, SM); + if (AO == AllowedOperand::LineVar) + return ParseVarResult.takeError(); + // Ignore the error and retry parsing as a literal. + consumeError(ParseVarResult.takeError()); + } + + // Otherwise, parse it as a literal. + uint64_t LiteralValue; + if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue)) + return std::make_unique<FileCheckExpressionLiteral>(LiteralValue); + + return FileCheckErrorDiagnostic::get(SM, Expr, + "invalid operand format '" + Expr + "'"); +} + +static uint64_t add(uint64_t LeftOp, uint64_t RightOp) { + return LeftOp + RightOp; +} + +static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { + return LeftOp - RightOp; +} + +Expected<std::unique_ptr<FileCheckExpressionAST>> FileCheckPattern::parseBinop( + StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp, + bool IsLegacyLineExpr, Optional<size_t> LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { + Expr = Expr.ltrim(SpaceChars); + if (Expr.empty()) + return std::move(LeftOp); + + // Check if this is a supported operation and select a function to perform + // it. + SMLoc OpLoc = SMLoc::getFromPointer(Expr.data()); + char Operator = popFront(Expr); + binop_eval_t EvalBinop; + switch (Operator) { + case '+': + EvalBinop = add; + break; + case '-': + EvalBinop = sub; + break; + default: + return FileCheckErrorDiagnostic::get( + SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); + } + + // Parse right operand. + Expr = Expr.ltrim(SpaceChars); + if (Expr.empty()) + return FileCheckErrorDiagnostic::get(SM, Expr, + "missing operand in expression"); + // The second operand in a legacy @LINE expression is always a literal. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any; + Expected<std::unique_ptr<FileCheckExpressionAST>> RightOpResult = + parseNumericOperand(Expr, AO, LineNumber, Context, SM); + if (!RightOpResult) + return RightOpResult; + + Expr = Expr.ltrim(SpaceChars); + return std::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp), + std::move(*RightOpResult)); +} + +Expected<std::unique_ptr<FileCheckExpressionAST>> +FileCheckPattern::parseNumericSubstitutionBlock( + StringRef Expr, + Optional<FileCheckNumericVariable *> &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional<size_t> LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { + std::unique_ptr<FileCheckExpressionAST> ExpressionAST = nullptr; + StringRef DefExpr = StringRef(); + DefinedNumericVariable = None; + // Save variable definition expression if any. + size_t DefEnd = Expr.find(':'); + if (DefEnd != StringRef::npos) { + DefExpr = Expr.substr(0, DefEnd); + Expr = Expr.substr(DefEnd + 1); + } + + // Parse the expression itself. + Expr = Expr.ltrim(SpaceChars); + if (!Expr.empty()) { + // The first operand in a legacy @LINE expression is always the @LINE + // pseudo variable. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; + Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult = + parseNumericOperand(Expr, AO, LineNumber, Context, SM); + while (ParseResult && !Expr.empty()) { + ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, + LineNumber, Context, SM); + // Legacy @LINE expressions only allow 2 operands. + if (ParseResult && IsLegacyLineExpr && !Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, + "unexpected characters at end of expression '" + Expr + "'"); + } + if (!ParseResult) + return ParseResult; + ExpressionAST = std::move(*ParseResult); + } + + // Parse the numeric variable definition. + if (DefEnd != StringRef::npos) { + DefExpr = DefExpr.ltrim(SpaceChars); + Expected<FileCheckNumericVariable *> ParseResult = + parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM); + + if (!ParseResult) + return ParseResult.takeError(); + DefinedNumericVariable = *ParseResult; + } + + return std::move(ExpressionAST); +} + +bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, + SourceMgr &SM, + const FileCheckRequest &Req) { + bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; + IgnoreCase = Req.IgnoreCase; + + PatternLoc = SMLoc::getFromPointer(PatternStr.data()); + + if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) + // Ignore trailing whitespace. + while (!PatternStr.empty() && + (PatternStr.back() == ' ' || PatternStr.back() == '\t')) + PatternStr = PatternStr.substr(0, PatternStr.size() - 1); + + // Check that there is something on the line. + if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { + SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, + "found empty check string with prefix '" + Prefix + ":'"); + return true; + } + + if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { + SM.PrintMessage( + PatternLoc, SourceMgr::DK_Error, + "found non-empty check string for empty check with prefix '" + Prefix + + ":'"); + return true; + } + + if (CheckTy == Check::CheckEmpty) { + RegExStr = "(\n$)"; + return false; + } + + // Check to see if this is a fixed string, or if it has regex pieces. + if (!MatchFullLinesHere && + (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && + PatternStr.find("[[") == StringRef::npos))) { + FixedStr = PatternStr; + return false; + } + + if (MatchFullLinesHere) { + RegExStr += '^'; + if (!Req.NoCanonicalizeWhiteSpace) + RegExStr += " *"; + } + + // Paren value #0 is for the fully matched string. Any new parenthesized + // values add from there. + unsigned CurParen = 1; + + // Otherwise, there is at least one regex piece. Build up the regex pattern + // by escaping scary characters in fixed strings, building up one big regex. + while (!PatternStr.empty()) { + // RegEx matches. + if (PatternStr.startswith("{{")) { + // This is the start of a regex match. Scan for the }}. + size_t End = PatternStr.find("}}"); + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + SourceMgr::DK_Error, + "found start of regex string with no end '}}'"); + return true; + } + + // Enclose {{}} patterns in parens just like [[]] even though we're not + // capturing the result for any purpose. This is required in case the + // expression contains an alternation like: CHECK: abc{{x|z}}def. We + // want this to turn into: "abc(x|z)def" not "abcx|zdef". + RegExStr += '('; + ++CurParen; + + if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) + return true; + RegExStr += ')'; + + PatternStr = PatternStr.substr(End + 2); + continue; + } + + // String and numeric substitution blocks. Pattern substitution blocks come + // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some + // other regex) and assigns it to the string variable 'foo'. The latter + // substitutes foo's value. Numeric substitution blocks recognize the same + // form as string ones, but start with a '#' sign after the double + // brackets. They also accept a combined form which sets a numeric variable + // to the evaluation of an expression. Both string and numeric variable + // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be + // valid, as this helps catch some common errors. + if (PatternStr.startswith("[[")) { + StringRef UnparsedPatternStr = PatternStr.substr(2); + // Find the closing bracket pair ending the match. End is going to be an + // offset relative to the beginning of the match string. + size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); + StringRef MatchStr = UnparsedPatternStr.substr(0, End); + bool IsNumBlock = MatchStr.consume_front("#"); + + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + SourceMgr::DK_Error, + "Invalid substitution block, no ]] found"); + return true; + } + // Strip the substitution block we are parsing. End points to the start + // of the "]]" closing the expression so account for it in computing the + // index of the first unparsed character. + PatternStr = UnparsedPatternStr.substr(End + 2); + + bool IsDefinition = false; + bool SubstNeeded = false; + // Whether the substitution block is a legacy use of @LINE with string + // substitution block syntax. + bool IsLegacyLineExpr = false; + StringRef DefName; + StringRef SubstStr; + StringRef MatchRegexp; + size_t SubstInsertIdx = RegExStr.size(); + + // Parse string variable or legacy @LINE expression. + if (!IsNumBlock) { + size_t VarEndIdx = MatchStr.find(":"); + size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); + if (SpacePos != StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), + SourceMgr::DK_Error, "unexpected whitespace"); + return true; + } + + // Get the name (e.g. "foo") and verify it is well formed. + StringRef OrigMatchStr = MatchStr; + Expected<FileCheckPattern::VariableProperties> ParseVarResult = + parseVariable(MatchStr, SM); + if (!ParseVarResult) { + logAllUnhandledErrors(ParseVarResult.takeError(), errs()); + return true; + } + StringRef Name = ParseVarResult->Name; + bool IsPseudo = ParseVarResult->IsPseudo; + + IsDefinition = (VarEndIdx != StringRef::npos); + SubstNeeded = !IsDefinition; + if (IsDefinition) { + if ((IsPseudo || !MatchStr.consume_front(":"))) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), + SourceMgr::DK_Error, + "invalid name in string variable definition"); + return true; + } + + // Detect collisions between string and numeric variables when the + // former is created later than the latter. + if (Context->GlobalNumericVariableTable.find(Name) != + Context->GlobalNumericVariableTable.end()) { + SM.PrintMessage( + SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, + "numeric variable with name '" + Name + "' already exists"); + return true; + } + DefName = Name; + MatchRegexp = MatchStr; + } else { + if (IsPseudo) { + MatchStr = OrigMatchStr; + IsLegacyLineExpr = IsNumBlock = true; + } else + SubstStr = Name; + } + } + + // Parse numeric substitution block. + std::unique_ptr<FileCheckExpressionAST> ExpressionAST; + Optional<FileCheckNumericVariable *> DefinedNumericVariable; + if (IsNumBlock) { + Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult = + parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, + IsLegacyLineExpr, LineNumber, Context, + SM); + if (!ParseResult) { + logAllUnhandledErrors(ParseResult.takeError(), errs()); + return true; + } + ExpressionAST = std::move(*ParseResult); + SubstNeeded = ExpressionAST != nullptr; + if (DefinedNumericVariable) { + IsDefinition = true; + DefName = (*DefinedNumericVariable)->getName(); + } + if (SubstNeeded) + SubstStr = MatchStr; + else + MatchRegexp = "[0-9]+"; + } + + // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. + if (IsDefinition) { + RegExStr += '('; + ++SubstInsertIdx; + + if (IsNumBlock) { + FileCheckNumericVariableMatch NumericVariableDefinition = { + *DefinedNumericVariable, CurParen}; + NumericVariableDefs[DefName] = NumericVariableDefinition; + // This store is done here rather than in match() to allow + // parseNumericVariableUse() to get the pointer to the class instance + // of the right variable definition corresponding to a given numeric + // variable use. + Context->GlobalNumericVariableTable[DefName] = + *DefinedNumericVariable; + } else { + VariableDefs[DefName] = CurParen; + // Mark string variable as defined to detect collisions between + // string and numeric variables in parseNumericVariableUse() and + // defineCmdlineVariables() when the latter is created later than the + // former. We cannot reuse GlobalVariableTable for this by populating + // it with an empty string since we would then lose the ability to + // detect the use of an undefined variable in match(). + Context->DefinedVariableTable[DefName] = true; + } + + ++CurParen; + } + + if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) + return true; + + if (IsDefinition) + RegExStr += ')'; + + // Handle substitutions: [[foo]] and [[#<foo expr>]]. + if (SubstNeeded) { + // Handle substitution of string variables that were defined earlier on + // the same line by emitting a backreference. Expressions do not + // support substituting a numeric variable defined on the same line. + if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { + unsigned CaptureParenGroup = VariableDefs[SubstStr]; + if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { + SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), + SourceMgr::DK_Error, + "Can't back-reference more than 9 variables"); + return true; + } + AddBackrefToRegEx(CaptureParenGroup); + } else { + // Handle substitution of string variables ([[<var>]]) defined in + // previous CHECK patterns, and substitution of expressions. + FileCheckSubstitution *Substitution = + IsNumBlock + ? Context->makeNumericSubstitution( + SubstStr, std::move(ExpressionAST), SubstInsertIdx) + : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); + Substitutions.push_back(Substitution); + } + } + } + + // Handle fixed string matches. + // Find the end, which is the start of the next regex. + size_t FixedMatchEnd = PatternStr.find("{{"); + FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); + RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); + PatternStr = PatternStr.substr(FixedMatchEnd); + } + + if (MatchFullLinesHere) { + if (!Req.NoCanonicalizeWhiteSpace) + RegExStr += " *"; + RegExStr += '$'; + } + + return false; +} + +bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { + Regex R(RS); + std::string Error; + if (!R.isValid(Error)) { + SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, + "invalid regex: " + Error); + return true; + } + + RegExStr += RS.str(); + CurParen += R.getNumMatches(); + return false; +} + +void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) { + assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); + std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); + RegExStr += Backref; +} + +Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const { + // If this is the EOF pattern, match it immediately. + if (CheckTy == Check::CheckEOF) { + MatchLen = 0; + return Buffer.size(); + } + + // If this is a fixed string pattern, just match it now. + if (!FixedStr.empty()) { + MatchLen = FixedStr.size(); + size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr) + : Buffer.find(FixedStr); + if (Pos == StringRef::npos) + return make_error<FileCheckNotFoundError>(); + return Pos; + } + + // Regex match. + + // If there are substitutions, we need to create a temporary string with the + // actual value. + StringRef RegExToMatch = RegExStr; + std::string TmpStr; + if (!Substitutions.empty()) { + TmpStr = RegExStr; + if (LineNumber) + Context->LineVariable->setValue(*LineNumber); + + size_t InsertOffset = 0; + // Substitute all string variables and expressions whose values are only + // now known. Use of string variables defined on the same line are handled + // by back-references. + for (const auto &Substitution : Substitutions) { + // Substitute and check for failure (e.g. use of undefined variable). + Expected<std::string> Value = Substitution->getResult(); + if (!Value) + return Value.takeError(); + + // Plop it into the regex at the adjusted offset. + TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, + Value->begin(), Value->end()); + InsertOffset += Value->size(); + } + + // Match the newly constructed regex. + RegExToMatch = TmpStr; + } + + SmallVector<StringRef, 4> MatchInfo; + unsigned int Flags = Regex::Newline; + if (IgnoreCase) + Flags |= Regex::IgnoreCase; + if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) + return make_error<FileCheckNotFoundError>(); + + // Successful regex match. + assert(!MatchInfo.empty() && "Didn't get any match"); + StringRef FullMatch = MatchInfo[0]; + + // If this defines any string variables, remember their values. + for (const auto &VariableDef : VariableDefs) { + assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); + Context->GlobalVariableTable[VariableDef.first] = + MatchInfo[VariableDef.second]; + } + + // If this defines any numeric variables, remember their values. + for (const auto &NumericVariableDef : NumericVariableDefs) { + const FileCheckNumericVariableMatch &NumericVariableMatch = + NumericVariableDef.getValue(); + unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; + assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); + FileCheckNumericVariable *DefinedNumericVariable = + NumericVariableMatch.DefinedNumericVariable; + + StringRef MatchedValue = MatchInfo[CaptureParenGroup]; + uint64_t Val; + if (MatchedValue.getAsInteger(10, Val)) + return FileCheckErrorDiagnostic::get(SM, MatchedValue, + "Unable to represent numeric value"); + DefinedNumericVariable->setValue(Val); + } + + // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after + // the required preceding newline, which is consumed by the pattern in the + // case of CHECK-EMPTY but not CHECK-NEXT. + size_t MatchStartSkip = CheckTy == Check::CheckEmpty; + MatchLen = FullMatch.size() - MatchStartSkip; + return FullMatch.data() - Buffer.data() + MatchStartSkip; +} + +unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const { + // Just compute the number of matching characters. For regular expressions, we + // just compare against the regex itself and hope for the best. + // + // FIXME: One easy improvement here is have the regex lib generate a single + // example regular expression which matches, and use that as the example + // string. + StringRef ExampleString(FixedStr); + if (ExampleString.empty()) + ExampleString = RegExStr; + + // Only compare up to the first line in the buffer, or the string size. + StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); + BufferPrefix = BufferPrefix.split('\n').first; + return BufferPrefix.edit_distance(ExampleString); +} + +void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange) const { + // Print what we know about substitutions. + if (!Substitutions.empty()) { + for (const auto &Substitution : Substitutions) { + SmallString<256> Msg; + raw_svector_ostream OS(Msg); + Expected<std::string> MatchedValue = Substitution->getResult(); + + // Substitution failed or is not known at match time, print the undefined + // variables it uses. + if (!MatchedValue) { + bool UndefSeen = false; + handleAllErrors(MatchedValue.takeError(), + [](const FileCheckNotFoundError &E) {}, + // Handled in PrintNoMatch(). + [](const FileCheckErrorDiagnostic &E) {}, + [&](const FileCheckUndefVarError &E) { + if (!UndefSeen) { + OS << "uses undefined variable(s):"; + UndefSeen = true; + } + OS << " "; + E.log(OS); + }); + } else { + // Substitution succeeded. Print substituted value. + OS << "with \""; + OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; + OS.write_escaped(*MatchedValue) << "\""; + } + + if (MatchRange.isValid()) + SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(), + {MatchRange}); + else + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), + SourceMgr::DK_Note, OS.str()); + } + } +} + +static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, + const SourceMgr &SM, SMLoc Loc, + Check::FileCheckType CheckTy, + StringRef Buffer, size_t Pos, size_t Len, + std::vector<FileCheckDiag> *Diags, + bool AdjustPrevDiag = false) { + SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); + SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); + SMRange Range(Start, End); + if (Diags) { + if (AdjustPrevDiag) + Diags->rbegin()->MatchTy = MatchTy; + else + Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); + } + return Range; +} + +void FileCheckPattern::printFuzzyMatch( + const SourceMgr &SM, StringRef Buffer, + std::vector<FileCheckDiag> *Diags) const { + // Attempt to find the closest/best fuzzy match. Usually an error happens + // because some string in the output didn't exactly match. In these cases, we + // would like to show the user a best guess at what "should have" matched, to + // save them having to actually check the input manually. + size_t NumLinesForward = 0; + size_t Best = StringRef::npos; + double BestQuality = 0; + + // Use an arbitrary 4k limit on how far we will search. + for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { + if (Buffer[i] == '\n') + ++NumLinesForward; + + // Patterns have leading whitespace stripped, so skip whitespace when + // looking for something which looks like a pattern. + if (Buffer[i] == ' ' || Buffer[i] == '\t') + continue; + + // Compute the "quality" of this match as an arbitrary combination of the + // match distance and the number of lines skipped to get to this match. + unsigned Distance = computeMatchDistance(Buffer.substr(i)); + double Quality = Distance + (NumLinesForward / 100.); + + if (Quality < BestQuality || Best == StringRef::npos) { + Best = i; + BestQuality = Quality; + } + } + + // Print the "possible intended match here" line if we found something + // reasonable and not equal to what we showed in the "scanning from here" + // line. + if (Best && Best != StringRef::npos && BestQuality < 50) { + SMRange MatchRange = + ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), + getCheckTy(), Buffer, Best, 0, Diags); + SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, + "possible intended match here"); + + // FIXME: If we wanted to be really friendly we would show why the match + // failed, as it can be hard to spot simple one character differences. + } +} + +Expected<StringRef> +FileCheckPatternContext::getPatternVarValue(StringRef VarName) { + auto VarIter = GlobalVariableTable.find(VarName); + if (VarIter == GlobalVariableTable.end()) + return make_error<FileCheckUndefVarError>(VarName); + + return VarIter->second; +} + +template <class... Types> +FileCheckNumericVariable * +FileCheckPatternContext::makeNumericVariable(Types... args) { + NumericVariables.push_back( + std::make_unique<FileCheckNumericVariable>(args...)); + return NumericVariables.back().get(); +} + +FileCheckSubstitution * +FileCheckPatternContext::makeStringSubstitution(StringRef VarName, + size_t InsertIdx) { + Substitutions.push_back( + std::make_unique<FileCheckStringSubstitution>(this, VarName, InsertIdx)); + return Substitutions.back().get(); +} + +FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution( + StringRef ExpressionStr, + std::unique_ptr<FileCheckExpressionAST> ExpressionAST, size_t InsertIdx) { + Substitutions.push_back(std::make_unique<FileCheckNumericSubstitution>( + this, ExpressionStr, std::move(ExpressionAST), InsertIdx)); + return Substitutions.back().get(); +} + +size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { + // Offset keeps track of the current offset within the input Str + size_t Offset = 0; + // [...] Nesting depth + size_t BracketDepth = 0; + + while (!Str.empty()) { + if (Str.startswith("]]") && BracketDepth == 0) + return Offset; + if (Str[0] == '\\') { + // Backslash escapes the next char within regexes, so skip them both. + Str = Str.substr(2); + Offset += 2; + } else { + switch (Str[0]) { + default: + break; + case '[': + BracketDepth++; + break; + case ']': + if (BracketDepth == 0) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), + SourceMgr::DK_Error, + "missing closing \"]\" for regex variable"); + exit(1); + } + BracketDepth--; + break; + } + Str = Str.substr(1); + Offset++; + } + } + + return StringRef::npos; +} + +StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, + SmallVectorImpl<char> &OutputBuffer) { + OutputBuffer.reserve(MB.getBufferSize()); + + for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); + Ptr != End; ++Ptr) { + // Eliminate trailing dosish \r. + if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { + continue; + } + + // If current char is not a horizontal whitespace or if horizontal + // whitespace canonicalization is disabled, dump it to output as is. + if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { + OutputBuffer.push_back(*Ptr); + continue; + } + + // Otherwise, add one space and advance over neighboring space. + OutputBuffer.push_back(' '); + while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) + ++Ptr; + } + + // Add a null byte and then return all but that byte. + OutputBuffer.push_back('\0'); + return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); +} + +FileCheckDiag::FileCheckDiag(const SourceMgr &SM, + const Check::FileCheckType &CheckTy, + SMLoc CheckLoc, MatchType MatchTy, + SMRange InputRange) + : CheckTy(CheckTy), MatchTy(MatchTy) { + auto Start = SM.getLineAndColumn(InputRange.Start); + auto End = SM.getLineAndColumn(InputRange.End); + InputStartLine = Start.first; + InputStartCol = Start.second; + InputEndLine = End.first; + InputEndCol = End.second; + Start = SM.getLineAndColumn(CheckLoc); + CheckLine = Start.first; + CheckCol = Start.second; +} + +static bool IsPartOfWord(char c) { + return (isalnum(c) || c == '-' || c == '_'); +} + +Check::FileCheckType &Check::FileCheckType::setCount(int C) { + assert(Count > 0 && "zero and negative counts are not supported"); + assert((C == 1 || Kind == CheckPlain) && + "count supported only for plain CHECK directives"); + Count = C; + return *this; +} + +std::string Check::FileCheckType::getDescription(StringRef Prefix) const { + switch (Kind) { + case Check::CheckNone: + return "invalid"; + case Check::CheckPlain: + if (Count > 1) + return Prefix.str() + "-COUNT"; + return Prefix; + case Check::CheckNext: + return Prefix.str() + "-NEXT"; + case Check::CheckSame: + return Prefix.str() + "-SAME"; + case Check::CheckNot: + return Prefix.str() + "-NOT"; + case Check::CheckDAG: + return Prefix.str() + "-DAG"; + case Check::CheckLabel: + return Prefix.str() + "-LABEL"; + case Check::CheckEmpty: + return Prefix.str() + "-EMPTY"; + case Check::CheckEOF: + return "implicit EOF"; + case Check::CheckBadNot: + return "bad NOT"; + case Check::CheckBadCount: + return "bad COUNT"; + } + llvm_unreachable("unknown FileCheckType"); +} + +static std::pair<Check::FileCheckType, StringRef> +FindCheckType(StringRef Buffer, StringRef Prefix) { + if (Buffer.size() <= Prefix.size()) + return {Check::CheckNone, StringRef()}; + + char NextChar = Buffer[Prefix.size()]; + + StringRef Rest = Buffer.drop_front(Prefix.size() + 1); + // Verify that the : is present after the prefix. + if (NextChar == ':') + return {Check::CheckPlain, Rest}; + + if (NextChar != '-') + return {Check::CheckNone, StringRef()}; + + if (Rest.consume_front("COUNT-")) { + int64_t Count; + if (Rest.consumeInteger(10, Count)) + // Error happened in parsing integer. + return {Check::CheckBadCount, Rest}; + if (Count <= 0 || Count > INT32_MAX) + return {Check::CheckBadCount, Rest}; + if (!Rest.consume_front(":")) + return {Check::CheckBadCount, Rest}; + return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest}; + } + + if (Rest.consume_front("NEXT:")) + return {Check::CheckNext, Rest}; + + if (Rest.consume_front("SAME:")) + return {Check::CheckSame, Rest}; + + if (Rest.consume_front("NOT:")) + return {Check::CheckNot, Rest}; + + if (Rest.consume_front("DAG:")) + return {Check::CheckDAG, Rest}; + + if (Rest.consume_front("LABEL:")) + return {Check::CheckLabel, Rest}; + + if (Rest.consume_front("EMPTY:")) + return {Check::CheckEmpty, Rest}; + + // You can't combine -NOT with another suffix. + if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || + Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || + Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || + Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) + return {Check::CheckBadNot, Rest}; + + return {Check::CheckNone, Rest}; +} + +// From the given position, find the next character after the word. +static size_t SkipWord(StringRef Str, size_t Loc) { + while (Loc < Str.size() && IsPartOfWord(Str[Loc])) + ++Loc; + return Loc; +} + +/// Searches the buffer for the first prefix in the prefix regular expression. +/// +/// This searches the buffer using the provided regular expression, however it +/// enforces constraints beyond that: +/// 1) The found prefix must not be a suffix of something that looks like +/// a valid prefix. +/// 2) The found prefix must be followed by a valid check type suffix using \c +/// FindCheckType above. +/// +/// \returns a pair of StringRefs into the Buffer, which combines: +/// - the first match of the regular expression to satisfy these two is +/// returned, +/// otherwise an empty StringRef is returned to indicate failure. +/// - buffer rewound to the location right after parsed suffix, for parsing +/// to continue from +/// +/// If this routine returns a valid prefix, it will also shrink \p Buffer to +/// start at the beginning of the returned prefix, increment \p LineNumber for +/// each new line consumed from \p Buffer, and set \p CheckTy to the type of +/// check found by examining the suffix. +/// +/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy +/// is unspecified. +static std::pair<StringRef, StringRef> +FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, + unsigned &LineNumber, Check::FileCheckType &CheckTy) { + SmallVector<StringRef, 2> Matches; + + while (!Buffer.empty()) { + // Find the first (longest) match using the RE. + if (!PrefixRE.match(Buffer, &Matches)) + // No match at all, bail. + return {StringRef(), StringRef()}; + + StringRef Prefix = Matches[0]; + Matches.clear(); + + assert(Prefix.data() >= Buffer.data() && + Prefix.data() < Buffer.data() + Buffer.size() && + "Prefix doesn't start inside of buffer!"); + size_t Loc = Prefix.data() - Buffer.data(); + StringRef Skipped = Buffer.substr(0, Loc); + Buffer = Buffer.drop_front(Loc); + LineNumber += Skipped.count('\n'); + + // Check that the matched prefix isn't a suffix of some other check-like + // word. + // FIXME: This is a very ad-hoc check. it would be better handled in some + // other way. Among other things it seems hard to distinguish between + // intentional and unintentional uses of this feature. + if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { + // Now extract the type. + StringRef AfterSuffix; + std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix); + + // If we've found a valid check type for this prefix, we're done. + if (CheckTy != Check::CheckNone) + return {Prefix, AfterSuffix}; + } + + // If we didn't successfully find a prefix, we need to skip this invalid + // prefix and continue scanning. We directly skip the prefix that was + // matched and any additional parts of that check-like word. + Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); + } + + // We ran out of buffer while skipping partial matches so give up. + return {StringRef(), StringRef()}; +} + +void FileCheckPatternContext::createLineVariable() { + assert(!LineVariable && "@LINE pseudo numeric variable already created"); + StringRef LineName = "@LINE"; + LineVariable = makeNumericVariable(LineName); + GlobalNumericVariableTable[LineName] = LineVariable; +} + +FileCheck::FileCheck(FileCheckRequest Req) + : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()), + CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {} + +FileCheck::~FileCheck() = default; + +bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer, + Regex &PrefixRE) { + Error DefineError = + PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); + if (DefineError) { + logAllUnhandledErrors(std::move(DefineError), errs()); + return true; + } + + PatternContext->createLineVariable(); + + std::vector<FileCheckPattern> ImplicitNegativeChecks; + for (const auto &PatternString : Req.ImplicitCheckNot) { + // Create a buffer with fake command line content in order to display the + // command line option responsible for the specific implicit CHECK-NOT. + std::string Prefix = "-implicit-check-not='"; + std::string Suffix = "'"; + std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( + Prefix + PatternString + Suffix, "command line"); + + StringRef PatternInBuffer = + CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); + SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); + + ImplicitNegativeChecks.push_back( + FileCheckPattern(Check::CheckNot, PatternContext.get())); + ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, + "IMPLICIT-CHECK", SM, Req); + } + + std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks; + + // LineNumber keeps track of the line on which CheckPrefix instances are + // found. + unsigned LineNumber = 1; + + while (1) { + Check::FileCheckType CheckTy; + + // See if a prefix occurs in the memory buffer. + StringRef UsedPrefix; + StringRef AfterSuffix; + std::tie(UsedPrefix, AfterSuffix) = + FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy); + if (UsedPrefix.empty()) + break; + assert(UsedPrefix.data() == Buffer.data() && + "Failed to move Buffer's start forward, or pointed prefix outside " + "of the buffer!"); + assert(AfterSuffix.data() >= Buffer.data() && + AfterSuffix.data() < Buffer.data() + Buffer.size() && + "Parsing after suffix doesn't start inside of buffer!"); + + // Location to use for error messages. + const char *UsedPrefixStart = UsedPrefix.data(); + + // Skip the buffer to the end of parsed suffix (or just prefix, if no good + // suffix was processed). + Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) + : AfterSuffix; + + // Complain about useful-looking but unsupported suffixes. + if (CheckTy == Check::CheckBadNot) { + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, + "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); + return true; + } + + // Complain about invalid count specification. + if (CheckTy == Check::CheckBadCount) { + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, + "invalid count in -COUNT specification on prefix '" + + UsedPrefix + "'"); + return true; + } + + // Okay, we found the prefix, yay. Remember the rest of the line, but ignore + // leading whitespace. + if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) + Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); + + // Scan ahead to the end of line. + size_t EOL = Buffer.find_first_of("\n\r"); + + // Remember the location of the start of the pattern, for diagnostics. + SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); + + // Parse the pattern. + FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber); + if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req)) + return true; + + // Verify that CHECK-LABEL lines do not define or use variables + if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { + SM.PrintMessage( + SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, + "found '" + UsedPrefix + "-LABEL:'" + " with variable definition or use"); + return true; + } + + Buffer = Buffer.substr(EOL); + + // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. + if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || + CheckTy == Check::CheckEmpty) && + CheckStrings->empty()) { + StringRef Type = CheckTy == Check::CheckNext + ? "NEXT" + : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; + SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), + SourceMgr::DK_Error, + "found '" + UsedPrefix + "-" + Type + + "' without previous '" + UsedPrefix + ": line"); + return true; + } + + // Handle CHECK-DAG/-NOT. + if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { + DagNotMatches.push_back(P); + continue; + } + + // Okay, add the string we captured to the output vector and move on. + CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); + DagNotMatches = ImplicitNegativeChecks; + } + + // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first + // prefix as a filler for the error message. + if (!DagNotMatches.empty()) { + CheckStrings->emplace_back( + FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), + *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); + } + + if (CheckStrings->empty()) { + errs() << "error: no check strings found with prefix" + << (Req.CheckPrefixes.size() > 1 ? "es " : " "); + auto I = Req.CheckPrefixes.begin(); + auto E = Req.CheckPrefixes.end(); + if (I != E) { + errs() << "\'" << *I << ":'"; + ++I; + } + for (; I != E; ++I) + errs() << ", \'" << *I << ":'"; + + errs() << '\n'; + return true; + } + + return false; +} + +static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, + StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat, + int MatchedCount, StringRef Buffer, size_t MatchPos, + size_t MatchLen, const FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) { + bool PrintDiag = true; + if (ExpectedMatch) { + if (!Req.Verbose) + return; + if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) + return; + // Due to their verbosity, we don't print verbose diagnostics here if we're + // gathering them for a different rendering, but we always print other + // diagnostics. + PrintDiag = !Diags; + } + SMRange MatchRange = ProcessMatchResult( + ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected + : FileCheckDiag::MatchFoundButExcluded, + SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags); + if (!PrintDiag) + return; + + std::string Message = formatv("{0}: {1} string found in input", + Pat.getCheckTy().getDescription(Prefix), + (ExpectedMatch ? "expected" : "excluded")) + .str(); + if (Pat.getCount() > 1) + Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); + + SM.PrintMessage( + Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); + SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", + {MatchRange}); + Pat.printSubstitutions(SM, Buffer, MatchRange); +} + +static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, + const FileCheckString &CheckStr, int MatchedCount, + StringRef Buffer, size_t MatchPos, size_t MatchLen, + FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) { + PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, + MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); +} + +static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, + StringRef Prefix, SMLoc Loc, + const FileCheckPattern &Pat, int MatchedCount, + StringRef Buffer, bool VerboseVerbose, + std::vector<FileCheckDiag> *Diags, Error MatchErrors) { + assert(MatchErrors && "Called on successful match"); + bool PrintDiag = true; + if (!ExpectedMatch) { + if (!VerboseVerbose) { + consumeError(std::move(MatchErrors)); + return; + } + // Due to their verbosity, we don't print verbose diagnostics here if we're + // gathering them for a different rendering, but we always print other + // diagnostics. + PrintDiag = !Diags; + } + + // If the current position is at the end of a line, advance to the start of + // the next line. + Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); + SMRange SearchRange = ProcessMatchResult( + ExpectedMatch ? FileCheckDiag::MatchNoneButExpected + : FileCheckDiag::MatchNoneAndExcluded, + SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags); + if (!PrintDiag) { + consumeError(std::move(MatchErrors)); + return; + } + + MatchErrors = + handleErrors(std::move(MatchErrors), + [](const FileCheckErrorDiagnostic &E) { E.log(errs()); }); + + // No problem matching the string per se. + if (!MatchErrors) + return; + consumeError(std::move(MatchErrors)); + + // Print "not found" diagnostic. + std::string Message = formatv("{0}: {1} string not found in input", + Pat.getCheckTy().getDescription(Prefix), + (ExpectedMatch ? "expected" : "excluded")) + .str(); + if (Pat.getCount() > 1) + Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); + SM.PrintMessage( + Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); + + // Print the "scanning from here" line. + SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); + + // Allow the pattern to print additional information if desired. + Pat.printSubstitutions(SM, Buffer); + + if (ExpectedMatch) + Pat.printFuzzyMatch(SM, Buffer, Diags); +} + +static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, + const FileCheckString &CheckStr, int MatchedCount, + StringRef Buffer, bool VerboseVerbose, + std::vector<FileCheckDiag> *Diags, Error MatchErrors) { + PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, + MatchedCount, Buffer, VerboseVerbose, Diags, + std::move(MatchErrors)); +} + +/// Counts the number of newlines in the specified range. +static unsigned CountNumNewlinesBetween(StringRef Range, + const char *&FirstNewLine) { + unsigned NumNewLines = 0; + while (1) { + // Scan for newline. + Range = Range.substr(Range.find_first_of("\n\r")); + if (Range.empty()) + return NumNewLines; + + ++NumNewLines; + + // Handle \n\r and \r\n as a single newline. + if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && + (Range[0] != Range[1])) + Range = Range.substr(1); + Range = Range.substr(1); + + if (NumNewLines == 1) + FirstNewLine = Range.begin(); + } +} + +size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, + bool IsLabelScanMode, size_t &MatchLen, + FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) const { + size_t LastPos = 0; + std::vector<const FileCheckPattern *> NotStrings; + + // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL + // bounds; we have not processed variable definitions within the bounded block + // yet so cannot handle any final CHECK-DAG yet; this is handled when going + // over the block again (including the last CHECK-LABEL) in normal mode. + if (!IsLabelScanMode) { + // Match "dag strings" (with mixed "not strings" if any). + LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); + if (LastPos == StringRef::npos) + return StringRef::npos; + } + + // Match itself from the last position after matching CHECK-DAG. + size_t LastMatchEnd = LastPos; + size_t FirstMatchPos = 0; + // Go match the pattern Count times. Majority of patterns only match with + // count 1 though. + assert(Pat.getCount() != 0 && "pattern count can not be zero"); + for (int i = 1; i <= Pat.getCount(); i++) { + StringRef MatchBuffer = Buffer.substr(LastMatchEnd); + size_t CurrentMatchLen; + // get a match at current start point + Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); + + // report + if (!MatchResult) { + PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, + MatchResult.takeError()); + return StringRef::npos; + } + size_t MatchPos = *MatchResult; + PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, + Diags); + if (i == 1) + FirstMatchPos = LastPos + MatchPos; + + // move start point after the match + LastMatchEnd += MatchPos + CurrentMatchLen; + } + // Full match len counts from first match pos. + MatchLen = LastMatchEnd - FirstMatchPos; + + // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT + // or CHECK-NOT + if (!IsLabelScanMode) { + size_t MatchPos = FirstMatchPos - LastPos; + StringRef MatchBuffer = Buffer.substr(LastPos); + StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); + + // If this check is a "CHECK-NEXT", verify that the previous match was on + // the previous line (i.e. that there is one newline between them). + if (CheckNext(SM, SkippedRegion)) { + ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, + Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, + Diags, Req.Verbose); + return StringRef::npos; + } + + // If this check is a "CHECK-SAME", verify that the previous match was on + // the same line (i.e. that there is no newline between them). + if (CheckSame(SM, SkippedRegion)) { + ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, + Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, + Diags, Req.Verbose); + return StringRef::npos; + } + + // If this match had "not strings", verify that they don't exist in the + // skipped region. + if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) + return StringRef::npos; + } + + return FirstMatchPos; +} + +bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { + if (Pat.getCheckTy() != Check::CheckNext && + Pat.getCheckTy() != Check::CheckEmpty) + return false; + + Twine CheckName = + Prefix + + Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); + + // Count the number of newlines between the previous match and this one. + const char *FirstNewLine = nullptr; + unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); + + if (NumNewLines == 0) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + CheckName + ": is on the same line as previous match"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, + "'next' match was here"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, + "previous match ended here"); + return true; + } + + if (NumNewLines != 1) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + CheckName + + ": is not on the line after the previous match"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, + "'next' match was here"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, + "previous match ended here"); + SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, + "non-matching line after previous match is here"); + return true; + } + + return false; +} + +bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { + if (Pat.getCheckTy() != Check::CheckSame) + return false; + + // Count the number of newlines between the previous match and this one. + const char *FirstNewLine = nullptr; + unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); + + if (NumNewLines != 0) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + Prefix + + "-SAME: is not on the same line as the previous match"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, + "'next' match was here"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, + "previous match ended here"); + return true; + } + + return false; +} + +bool FileCheckString::CheckNot( + const SourceMgr &SM, StringRef Buffer, + const std::vector<const FileCheckPattern *> &NotStrings, + const FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const { + for (const FileCheckPattern *Pat : NotStrings) { + assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); + + size_t MatchLen = 0; + Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM); + + if (!MatchResult) { + PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, + Req.VerboseVerbose, Diags, MatchResult.takeError()); + continue; + } + size_t Pos = *MatchResult; + + PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, + Req, Diags); + + return true; + } + + return false; +} + +size_t +FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector<const FileCheckPattern *> &NotStrings, + const FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) const { + if (DagNotStrings.empty()) + return 0; + + // The start of the search range. + size_t StartPos = 0; + + struct MatchRange { + size_t Pos; + size_t End; + }; + // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match + // ranges are erased from this list once they are no longer in the search + // range. + std::list<MatchRange> MatchRanges; + + // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG + // group, so we don't use a range-based for loop here. + for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); + PatItr != PatEnd; ++PatItr) { + const FileCheckPattern &Pat = *PatItr; + assert((Pat.getCheckTy() == Check::CheckDAG || + Pat.getCheckTy() == Check::CheckNot) && + "Invalid CHECK-DAG or CHECK-NOT!"); + + if (Pat.getCheckTy() == Check::CheckNot) { + NotStrings.push_back(&Pat); + continue; + } + + assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); + + // CHECK-DAG always matches from the start. + size_t MatchLen = 0, MatchPos = StartPos; + + // Search for a match that doesn't overlap a previous match in this + // CHECK-DAG group. + for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { + StringRef MatchBuffer = Buffer.substr(MatchPos); + Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM); + // With a group of CHECK-DAGs, a single mismatching means the match on + // that group of CHECK-DAGs fails immediately. + if (!MatchResult) { + PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, + Req.VerboseVerbose, Diags, MatchResult.takeError()); + return StringRef::npos; + } + size_t MatchPosBuf = *MatchResult; + // Re-calc it as the offset relative to the start of the original string. + MatchPos += MatchPosBuf; + if (Req.VerboseVerbose) + PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, + MatchLen, Req, Diags); + MatchRange M{MatchPos, MatchPos + MatchLen}; + if (Req.AllowDeprecatedDagOverlap) { + // We don't need to track all matches in this mode, so we just maintain + // one match range that encompasses the current CHECK-DAG group's + // matches. + if (MatchRanges.empty()) + MatchRanges.insert(MatchRanges.end(), M); + else { + auto Block = MatchRanges.begin(); + Block->Pos = std::min(Block->Pos, M.Pos); + Block->End = std::max(Block->End, M.End); + } + break; + } + // Iterate previous matches until overlapping match or insertion point. + bool Overlap = false; + for (; MI != ME; ++MI) { + if (M.Pos < MI->End) { + // !Overlap => New match has no overlap and is before this old match. + // Overlap => New match overlaps this old match. + Overlap = MI->Pos < M.End; + break; + } + } + if (!Overlap) { + // Insert non-overlapping match into list. + MatchRanges.insert(MI, M); + break; + } + if (Req.VerboseVerbose) { + // Due to their verbosity, we don't print verbose diagnostics here if + // we're gathering them for a different rendering, but we always print + // other diagnostics. + if (!Diags) { + SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); + SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); + SMRange OldRange(OldStart, OldEnd); + SM.PrintMessage(OldStart, SourceMgr::DK_Note, + "match discarded, overlaps earlier DAG match here", + {OldRange}); + } else + Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded; + } + MatchPos = MI->End; + } + if (!Req.VerboseVerbose) + PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, + MatchLen, Req, Diags); + + // Handle the end of a CHECK-DAG group. + if (std::next(PatItr) == PatEnd || + std::next(PatItr)->getCheckTy() == Check::CheckNot) { + if (!NotStrings.empty()) { + // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to + // CHECK-DAG, verify that there are no 'not' strings occurred in that + // region. + StringRef SkippedRegion = + Buffer.slice(StartPos, MatchRanges.begin()->Pos); + if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) + return StringRef::npos; + // Clear "not strings". + NotStrings.clear(); + } + // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the + // end of this CHECK-DAG group's match range. + StartPos = MatchRanges.rbegin()->End; + // Don't waste time checking for (impossible) overlaps before that. + MatchRanges.clear(); + } + } + + return StartPos; +} + +// A check prefix must contain only alphanumeric, hyphens and underscores. +static bool ValidateCheckPrefix(StringRef CheckPrefix) { + static const Regex Validator("^[a-zA-Z0-9_-]*$"); + return Validator.match(CheckPrefix); +} + +bool FileCheck::ValidateCheckPrefixes() { + StringSet<> PrefixSet; + + for (StringRef Prefix : Req.CheckPrefixes) { + // Reject empty prefixes. + if (Prefix == "") + return false; + + if (!PrefixSet.insert(Prefix).second) + return false; + + if (!ValidateCheckPrefix(Prefix)) + return false; + } + + return true; +} + +Regex FileCheck::buildCheckPrefixRegex() { + // I don't think there's a way to specify an initial value for cl::list, + // so if nothing was specified, add the default + if (Req.CheckPrefixes.empty()) + Req.CheckPrefixes.push_back("CHECK"); + + // We already validated the contents of CheckPrefixes so just concatenate + // them as alternatives. + SmallString<32> PrefixRegexStr; + for (StringRef Prefix : Req.CheckPrefixes) { + if (Prefix != Req.CheckPrefixes.front()) + PrefixRegexStr.push_back('|'); + + PrefixRegexStr.append(Prefix); + } + + return Regex(PrefixRegexStr); +} + +Error FileCheckPatternContext::defineCmdlineVariables( + std::vector<std::string> &CmdlineDefines, SourceMgr &SM) { + assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && + "Overriding defined variable with command-line variable definitions"); + + if (CmdlineDefines.empty()) + return Error::success(); + + // Create a string representing the vector of command-line definitions. Each + // definition is on its own line and prefixed with a definition number to + // clarify which definition a given diagnostic corresponds to. + unsigned I = 0; + Error Errs = Error::success(); + std::string CmdlineDefsDiag; + SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; + for (StringRef CmdlineDef : CmdlineDefines) { + std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); + size_t EqIdx = CmdlineDef.find('='); + if (EqIdx == StringRef::npos) { + CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); + continue; + } + // Numeric variable definition. + if (CmdlineDef[0] == '#') { + // Append a copy of the command-line definition adapted to use the same + // format as in the input file to be able to reuse + // parseNumericSubstitutionBlock. + CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); + std::string SubstitutionStr = CmdlineDef; + SubstitutionStr[EqIdx] = ':'; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); + CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); + } else { + CmdlineDefsDiag += DefPrefix; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); + CmdlineDefsDiag += (CmdlineDef + "\n").str(); + } + } + + // Create a buffer with fake command line content in order to display + // parsing diagnostic with location information and point to the + // global definition with invalid syntax. + std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = + MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); + StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); + SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); + + for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { + StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, + CmdlineDefIndices.second); + if (CmdlineDef.empty()) { + Errs = joinErrors( + std::move(Errs), + FileCheckErrorDiagnostic::get( + SM, CmdlineDef, "missing equal sign in global definition")); + continue; + } + + // Numeric variable definition. + if (CmdlineDef[0] == '#') { + // Now parse the definition both to check that the syntax is correct and + // to create the necessary class instance. + StringRef CmdlineDefExpr = CmdlineDef.substr(1); + Optional<FileCheckNumericVariable *> DefinedNumericVariable; + Expected<std::unique_ptr<FileCheckExpressionAST>> ExpressionASTResult = + FileCheckPattern::parseNumericSubstitutionBlock( + CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); + if (!ExpressionASTResult) { + Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError()); + continue; + } + std::unique_ptr<FileCheckExpressionAST> ExpressionAST = + std::move(*ExpressionASTResult); + // Now evaluate the expression whose value this variable should be set + // to, since the expression of a command-line variable definition should + // only use variables defined earlier on the command-line. If not, this + // is an error and we report it. + Expected<uint64_t> Value = ExpressionAST->eval(); + if (!Value) { + Errs = joinErrors(std::move(Errs), Value.takeError()); + continue; + } + + assert(DefinedNumericVariable && "No variable defined"); + (*DefinedNumericVariable)->setValue(*Value); + + // Record this variable definition. + GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = + *DefinedNumericVariable; + } else { + // String variable definition. + std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); + StringRef CmdlineName = CmdlineNameVal.first; + StringRef OrigCmdlineName = CmdlineName; + Expected<FileCheckPattern::VariableProperties> ParseVarResult = + FileCheckPattern::parseVariable(CmdlineName, SM); + if (!ParseVarResult) { + Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); + continue; + } + // Check that CmdlineName does not denote a pseudo variable is only + // composed of the parsed numeric variable. This catches cases like + // "FOO+2" in a "FOO+2=10" definition. + if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { + Errs = joinErrors(std::move(Errs), + FileCheckErrorDiagnostic::get( + SM, OrigCmdlineName, + "invalid name in string variable definition '" + + OrigCmdlineName + "'")); + continue; + } + StringRef Name = ParseVarResult->Name; + + // Detect collisions between string and numeric variables when the former + // is created later than the latter. + if (GlobalNumericVariableTable.find(Name) != + GlobalNumericVariableTable.end()) { + Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get( + SM, Name, + "numeric variable with name '" + + Name + "' already exists")); + continue; + } + GlobalVariableTable.insert(CmdlineNameVal); + // Mark the string variable as defined to detect collisions between + // string and numeric variables in defineCmdlineVariables when the latter + // is created later than the former. We cannot reuse GlobalVariableTable + // for this by populating it with an empty string since we would then + // lose the ability to detect the use of an undefined variable in + // match(). + DefinedVariableTable[Name] = true; + } + } + + return Errs; +} + +void FileCheckPatternContext::clearLocalVars() { + SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; + for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) + if (Var.first()[0] != '$') + LocalPatternVars.push_back(Var.first()); + + // Numeric substitution reads the value of a variable directly, not via + // GlobalNumericVariableTable. Therefore, we clear local variables by + // clearing their value which will lead to a numeric substitution failure. We + // also mark the variable for removal from GlobalNumericVariableTable since + // this is what defineCmdlineVariables checks to decide that no global + // variable has been defined. + for (const auto &Var : GlobalNumericVariableTable) + if (Var.first()[0] != '$') { + Var.getValue()->clearValue(); + LocalNumericVars.push_back(Var.first()); + } + + for (const auto &Var : LocalPatternVars) + GlobalVariableTable.erase(Var); + for (const auto &Var : LocalNumericVars) + GlobalNumericVariableTable.erase(Var); +} + +bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, + std::vector<FileCheckDiag> *Diags) { + bool ChecksFailed = false; + + unsigned i = 0, j = 0, e = CheckStrings->size(); + while (true) { + StringRef CheckRegion; + if (j == e) { + CheckRegion = Buffer; + } else { + const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; + if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { + ++j; + continue; + } + + // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG + size_t MatchLabelLen = 0; + size_t MatchLabelPos = + CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); + if (MatchLabelPos == StringRef::npos) + // Immediately bail if CHECK-LABEL fails, nothing else we can do. + return false; + + CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); + Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); + ++j; + } + + // Do not clear the first region as it's the one before the first + // CHECK-LABEL and it would clear variables defined on the command-line + // before they get used. + if (i != 0 && Req.EnableVarScope) + PatternContext->clearLocalVars(); + + for (; i != j; ++i) { + const FileCheckString &CheckStr = (*CheckStrings)[i]; + + // Check each string within the scanned region, including a second check + // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) + size_t MatchLen = 0; + size_t MatchPos = + CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); + + if (MatchPos == StringRef::npos) { + ChecksFailed = true; + i = j; + break; + } + + CheckRegion = CheckRegion.substr(MatchPos + MatchLen); + } + + if (j == e) + break; + } + + // Success if no checks failed. + return !ChecksFailed; +} diff --git a/llvm/lib/Support/FileCheckImpl.h b/llvm/lib/Support/FileCheckImpl.h new file mode 100644 index 0000000000000..06ce8301cec4b --- /dev/null +++ b/llvm/lib/Support/FileCheckImpl.h @@ -0,0 +1,624 @@ +//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the private interfaces of FileCheck. Its purpose is to +// allow unit testing of FileCheck and to separate the interface from the +// implementation. It is only meant to be used by FileCheck. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H +#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SourceMgr.h" +#include <map> +#include <string> +#include <vector> + +namespace llvm { + +//===----------------------------------------------------------------------===// +// Numeric substitution handling code. +//===----------------------------------------------------------------------===// + +/// Base class representing the AST of a given expression. +class FileCheckExpressionAST { +public: + virtual ~FileCheckExpressionAST() = default; + + /// Evaluates and \returns the value of the expression represented by this + /// AST or an error if evaluation fails. + virtual Expected<uint64_t> eval() const = 0; +}; + +/// Class representing an unsigned literal in the AST of an expression. +class FileCheckExpressionLiteral : public FileCheckExpressionAST { +private: + /// Actual value of the literal. + uint64_t Value; + +public: + /// Constructs a literal with the specified value. + FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} + + /// \returns the literal's value. + Expected<uint64_t> eval() const { return Value; } +}; + +/// Class to represent an undefined variable error, which quotes that +/// variable's name when printed. +class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> { +private: + StringRef VarName; + +public: + static char ID; + + FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + + StringRef getVarName() const { return VarName; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print name of variable associated with this error. + void log(raw_ostream &OS) const override { + OS << "\""; + OS.write_escaped(VarName) << "\""; + } +}; + +/// Class representing a numeric variable and its associated current value. +class FileCheckNumericVariable { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Value of numeric variable, if defined, or None otherwise. + Optional<uint64_t> Value; + + /// Line number where this variable is defined, or None if defined before + /// input is parsed. Used to determine whether a variable is defined on the + /// same line as a given use. + Optional<size_t> DefLineNumber; + +public: + /// Constructor for a variable \p Name defined at line \p DefLineNumber or + /// defined before input is parsed if \p DefLineNumber is None. + explicit FileCheckNumericVariable(StringRef Name, + Optional<size_t> DefLineNumber = None) + : Name(Name), DefLineNumber(DefLineNumber) {} + + /// \returns name of this numeric variable. + StringRef getName() const { return Name; } + + /// \returns this variable's value. + Optional<uint64_t> getValue() const { return Value; } + + /// Sets value of this numeric variable to \p NewValue. + void setValue(uint64_t NewValue) { Value = NewValue; } + + /// Clears value of this numeric variable, regardless of whether it is + /// currently defined or not. + void clearValue() { Value = None; } + + /// \returns the line number where this variable is defined, if any, or None + /// if defined before input is parsed. + Optional<size_t> getDefLineNumber() { return DefLineNumber; } +}; + +/// Class representing the use of a numeric variable in the AST of an +/// expression. +class FileCheckNumericVariableUse : public FileCheckExpressionAST { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Pointer to the class instance for the variable this use is about. + FileCheckNumericVariable *NumericVariable; + +public: + FileCheckNumericVariableUse(StringRef Name, + FileCheckNumericVariable *NumericVariable) + : Name(Name), NumericVariable(NumericVariable) {} + + /// \returns the value of the variable referenced by this instance. + Expected<uint64_t> eval() const; +}; + +/// Type of functions evaluating a given binary operation. +using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); + +/// Class representing a single binary operation in the AST of an expression. +class FileCheckASTBinop : public FileCheckExpressionAST { +private: + /// Left operand. + std::unique_ptr<FileCheckExpressionAST> LeftOperand; + + /// Right operand. + std::unique_ptr<FileCheckExpressionAST> RightOperand; + + /// Pointer to function that can evaluate this binary operation. + binop_eval_t EvalBinop; + +public: + FileCheckASTBinop(binop_eval_t EvalBinop, + std::unique_ptr<FileCheckExpressionAST> LeftOp, + std::unique_ptr<FileCheckExpressionAST> RightOp) + : EvalBinop(EvalBinop) { + LeftOperand = std::move(LeftOp); + RightOperand = std::move(RightOp); + } + + /// Evaluates the value of the binary operation represented by this AST, + /// using EvalBinop on the result of recursively evaluating the operands. + /// \returns the expression value or an error if an undefined numeric + /// variable is used in one of the operands. + Expected<uint64_t> eval() const; +}; + +class FileCheckPatternContext; + +/// Class representing a substitution to perform in the RegExStr string. +class FileCheckSubstitution { +protected: + /// Pointer to a class instance holding, among other things, the table with + /// the values of live string variables at the start of any given CHECK line. + /// Used for substituting string variables with the text they were defined + /// as. Expressions are linked to the numeric variables they use at + /// parse time and directly access the value of the numeric variable to + /// evaluate their value. + FileCheckPatternContext *Context; + + /// The string that needs to be substituted for something else. For a + /// string variable this is its name, otherwise this is the whole expression. + StringRef FromStr; + + // Index in RegExStr of where to do the substitution. + size_t InsertIdx; + +public: + FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, + size_t InsertIdx) + : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} + + virtual ~FileCheckSubstitution() = default; + + /// \returns the string to be substituted for something else. + StringRef getFromString() const { return FromStr; } + + /// \returns the index where the substitution is to be performed in RegExStr. + size_t getIndex() const { return InsertIdx; } + + /// \returns a string containing the result of the substitution represented + /// by this class instance or an error if substitution failed. + virtual Expected<std::string> getResult() const = 0; +}; + +class FileCheckStringSubstitution : public FileCheckSubstitution { +public: + FileCheckStringSubstitution(FileCheckPatternContext *Context, + StringRef VarName, size_t InsertIdx) + : FileCheckSubstitution(Context, VarName, InsertIdx) {} + + /// \returns the text that the string variable in this substitution matched + /// when defined, or an error if the variable is undefined. + Expected<std::string> getResult() const override; +}; + +class FileCheckNumericSubstitution : public FileCheckSubstitution { +private: + /// Pointer to the class representing the expression whose value is to be + /// substituted. + std::unique_ptr<FileCheckExpressionAST> ExpressionAST; + +public: + FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, + std::unique_ptr<FileCheckExpressionAST> ExprAST, + size_t InsertIdx) + : FileCheckSubstitution(Context, Expr, InsertIdx) { + ExpressionAST = std::move(ExprAST); + } + + /// \returns a string containing the result of evaluating the expression in + /// this substitution, or an error if evaluation failed. + Expected<std::string> getResult() const override; +}; + +//===----------------------------------------------------------------------===// +// Pattern handling code. +//===----------------------------------------------------------------------===// + +struct FileCheckDiag; + +/// Class holding the FileCheckPattern global state, shared by all patterns: +/// tables holding values of variables and whether they are defined or not at +/// any given time in the matching process. +class FileCheckPatternContext { + friend class FileCheckPattern; + +private: + /// When matching a given pattern, this holds the value of all the string + /// variables defined in previous patterns. In a pattern, only the last + /// definition for a given variable is recorded in this table. + /// Back-references are used for uses after any the other definition. + StringMap<StringRef> GlobalVariableTable; + + /// Map of all string variables defined so far. Used at parse time to detect + /// a name conflict between a numeric variable and a string variable when + /// the former is defined on a later line than the latter. + StringMap<bool> DefinedVariableTable; + + /// When matching a given pattern, this holds the pointers to the classes + /// representing the numeric variables defined in previous patterns. When + /// matching a pattern all definitions for that pattern are recorded in the + /// NumericVariableDefs table in the FileCheckPattern instance of that + /// pattern. + StringMap<FileCheckNumericVariable *> GlobalNumericVariableTable; + + /// Pointer to the class instance representing the @LINE pseudo variable for + /// easily updating its value. + FileCheckNumericVariable *LineVariable = nullptr; + + /// Vector holding pointers to all parsed numeric variables. Used to + /// automatically free them once they are guaranteed to no longer be used. + std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables; + + /// Vector holding pointers to all substitutions. Used to automatically free + /// them once they are guaranteed to no longer be used. + std::vector<std::unique_ptr<FileCheckSubstitution>> Substitutions; + +public: + /// \returns the value of string variable \p VarName or an error if no such + /// variable has been defined. + Expected<StringRef> getPatternVarValue(StringRef VarName); + + /// Defines string and numeric variables from definitions given on the + /// command line, passed as a vector of [#]VAR=VAL strings in + /// \p CmdlineDefines. \returns an error list containing diagnostics against + /// \p SM for all definition parsing failures, if any, or Success otherwise. + Error defineCmdlineVariables(std::vector<std::string> &CmdlineDefines, + SourceMgr &SM); + + /// Create @LINE pseudo variable. Value is set when pattern are being + /// matched. + void createLineVariable(); + + /// Undefines local variables (variables whose name does not start with a '$' + /// sign), i.e. removes them from GlobalVariableTable and from + /// GlobalNumericVariableTable and also clears the value of numeric + /// variables. + void clearLocalVars(); + +private: + /// Makes a new numeric variable and registers it for destruction when the + /// context is destroyed. + template <class... Types> + FileCheckNumericVariable *makeNumericVariable(Types... args); + + /// Makes a new string substitution and registers it for destruction when the + /// context is destroyed. + FileCheckSubstitution *makeStringSubstitution(StringRef VarName, + size_t InsertIdx); + + /// Makes a new numeric substitution and registers it for destruction when + /// the context is destroyed. + FileCheckSubstitution * + makeNumericSubstitution(StringRef ExpressionStr, + std::unique_ptr<FileCheckExpressionAST> ExpressionAST, + size_t InsertIdx); +}; + +/// Class to represent an error holding a diagnostic with location information +/// used when printing it. +class FileCheckErrorDiagnostic : public ErrorInfo<FileCheckErrorDiagnostic> { +private: + SMDiagnostic Diagnostic; + +public: + static char ID; + + FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } + + static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { + return make_error<FileCheckErrorDiagnostic>( + SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); + } + + static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { + return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); + } +}; + +class FileCheckNotFoundError : public ErrorInfo<FileCheckNotFoundError> { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { + OS << "String not found in input"; + } +}; + +class FileCheckPattern { + SMLoc PatternLoc; + + /// A fixed string to match as the pattern or empty if this pattern requires + /// a regex match. + StringRef FixedStr; + + /// A regex string to match as the pattern or empty if this pattern requires + /// a fixed string to match. + std::string RegExStr; + + /// Entries in this vector represent a substitution of a string variable or + /// an expression in the RegExStr regex at match time. For example, in the + /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", + /// RegExStr will contain "foobaz" and we'll get two entries in this vector + /// that tells us to insert the value of string variable "bar" at offset 3 + /// and the value of expression "N+1" at offset 6. + std::vector<FileCheckSubstitution *> Substitutions; + + /// Maps names of string variables defined in a pattern to the number of + /// their parenthesis group in RegExStr capturing their last definition. + /// + /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", + /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is + /// the value captured for QUUX on the earlier line where it was defined, and + /// VariableDefs will map "bar" to the third parenthesis group which captures + /// the second definition of "bar". + /// + /// Note: uses std::map rather than StringMap to be able to get the key when + /// iterating over values. + std::map<StringRef, unsigned> VariableDefs; + + /// Structure representing the definition of a numeric variable in a pattern. + /// It holds the pointer to the class representing the numeric variable whose + /// value is being defined and the number of the parenthesis group in + /// RegExStr to capture that value. + struct FileCheckNumericVariableMatch { + /// Pointer to class representing the numeric variable whose value is being + /// defined. + FileCheckNumericVariable *DefinedNumericVariable; + + /// Number of the parenthesis group in RegExStr that captures the value of + /// this numeric variable definition. + unsigned CaptureParenGroup; + }; + + /// Holds the number of the parenthesis group in RegExStr and pointer to the + /// corresponding FileCheckNumericVariable class instance of all numeric + /// variable definitions. Used to set the matched value of all those + /// variables. + StringMap<FileCheckNumericVariableMatch> NumericVariableDefs; + + /// Pointer to a class instance holding the global state shared by all + /// patterns: + /// - separate tables with the values of live string and numeric variables + /// respectively at the start of any given CHECK line; + /// - table holding whether a string variable has been defined at any given + /// point during the parsing phase. + FileCheckPatternContext *Context; + + Check::FileCheckType CheckTy; + + /// Line number for this CHECK pattern or None if it is an implicit pattern. + /// Used to determine whether a variable definition is made on an earlier + /// line to the one with this CHECK. + Optional<size_t> LineNumber; + + /// Ignore case while matching if set to true. + bool IgnoreCase = false; + +public: + FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, + Optional<size_t> Line = None) + : Context(Context), CheckTy(Ty), LineNumber(Line) {} + + /// \returns the location in source code. + SMLoc getLoc() const { return PatternLoc; } + + /// \returns the pointer to the global state for all patterns in this + /// FileCheck instance. + FileCheckPatternContext *getContext() const { return Context; } + + /// \returns whether \p C is a valid first character for a variable name. + static bool isValidVarNameStart(char C); + + /// Parsing information about a variable. + struct VariableProperties { + StringRef Name; + bool IsPseudo; + }; + + /// Parses the string at the start of \p Str for a variable name. \returns + /// a VariableProperties structure holding the variable name and whether it + /// is the name of a pseudo variable, or an error holding a diagnostic + /// against \p SM if parsing fail. If parsing was successful, also strips + /// \p Str from the variable name. + static Expected<VariableProperties> parseVariable(StringRef &Str, + const SourceMgr &SM); + /// Parses \p Expr for a numeric substitution block at line \p LineNumber, + /// or before input is parsed if \p LineNumber is None. Parameter + /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE + /// expression and \p Context points to the class instance holding the live + /// string and numeric variables. \returns a pointer to the class instance + /// representing the AST of the expression whose value must be substitued, or + /// an error holding a diagnostic against \p SM if parsing fails. If + /// substitution was successful, sets \p DefinedNumericVariable to point to + /// the class representing the numeric variable defined in this numeric + /// substitution block, or None if this block does not define any variable. + static Expected<std::unique_ptr<FileCheckExpressionAST>> + parseNumericSubstitutionBlock( + StringRef Expr, + Optional<FileCheckNumericVariable *> &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional<size_t> LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern + /// instance accordingly. + /// + /// \p Prefix provides which prefix is being matched, \p Req describes the + /// global options that influence the parsing such as whitespace + /// canonicalization, \p SM provides the SourceMgr used for error reports. + /// \returns true in case of an error, false otherwise. + bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, + const FileCheckRequest &Req); + /// Matches the pattern string against the input buffer \p Buffer + /// + /// \returns the position that is matched or an error indicating why matching + /// failed. If there is a match, updates \p MatchLen with the size of the + /// matched string. + /// + /// The GlobalVariableTable StringMap in the FileCheckPatternContext class + /// instance provides the current values of FileCheck string variables and + /// is updated if this match defines new values. Likewise, the + /// GlobalNumericVariableTable StringMap in the same class provides the + /// current values of FileCheck numeric variables and is updated if this + /// match defines new numeric values. + Expected<size_t> match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const; + /// Prints the value of successful substitutions or the name of the undefined + /// string or numeric variables preventing a successful substitution. + void printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange = None) const; + void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, + std::vector<FileCheckDiag> *Diags) const; + + bool hasVariable() const { + return !(Substitutions.empty() && VariableDefs.empty()); + } + + Check::FileCheckType getCheckTy() const { return CheckTy; } + + int getCount() const { return CheckTy.getCount(); } + +private: + bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); + void AddBackrefToRegEx(unsigned BackrefNum); + /// Computes an arbitrary estimate for the quality of matching this pattern + /// at the start of \p Buffer; a distance of zero should correspond to a + /// perfect match. + unsigned computeMatchDistance(StringRef Buffer) const; + /// Finds the closing sequence of a regex variable usage or definition. + /// + /// \p Str has to point in the beginning of the definition (right after the + /// opening sequence). \p SM holds the SourceMgr used for error repporting. + /// \returns the offset of the closing sequence within Str, or npos if it + /// was not found. + size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); + + /// Parses \p Expr for the name of a numeric variable to be defined at line + /// \p LineNumber, or before input is parsed if \p LineNumber is None. + /// \returns a pointer to the class instance representing that variable, + /// creating it if needed, or an error holding a diagnostic against \p SM + /// should defining such a variable be invalid. + static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional<size_t> LineNumber, const SourceMgr &SM); + /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use + /// at line \p LineNumber, or before input is parsed if \p LineNumber is + /// None. Parameter \p Context points to the class instance holding the live + /// string and numeric variables. \returns the pointer to the class instance + /// representing that variable if successful, or an error holding a + /// diagnostic against \p SM otherwise. + static Expected<std::unique_ptr<FileCheckNumericVariableUse>> + parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional<size_t> LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM); + enum class AllowedOperand { LineVar, Literal, Any }; + /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or + /// before input is parsed if \p LineNumber is None. Accepts both literal + /// values and numeric variables, depending on the value of \p AO. Parameter + /// \p Context points to the class instance holding the live string and + /// numeric variables. \returns the class representing that operand in the + /// AST of the expression or an error holding a diagnostic against \p SM + /// otherwise. + static Expected<std::unique_ptr<FileCheckExpressionAST>> + parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional<size_t> LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses \p Expr for a binary operation at line \p LineNumber, or before + /// input is parsed if \p LineNumber is None. The left operand of this binary + /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether + /// we are parsing a legacy @LINE expression. Parameter \p Context points to + /// the class instance holding the live string and numeric variables. + /// \returns the class representing the binary operation in the AST of the + /// expression, or an error holding a diagnostic against \p SM otherwise. + static Expected<std::unique_ptr<FileCheckExpressionAST>> + parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp, + bool IsLegacyLineExpr, Optional<size_t> LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); +}; + +//===----------------------------------------------------------------------===// +// Check Strings. +//===----------------------------------------------------------------------===// + +/// A check that we found in the input file. +struct FileCheckString { + /// The pattern to match. + FileCheckPattern Pat; + + /// Which prefix name this check matched. + StringRef Prefix; + + /// The location in the match file that the check string was specified. + SMLoc Loc; + + /// All of the strings that are disallowed from occurring between this match + /// string and the previous one (or start of file). + std::vector<FileCheckPattern> DagNotStrings; + + FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) + : Pat(P), Prefix(S), Loc(L) {} + + /// Matches check string and its "not strings" and/or "dag strings". + size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, + size_t &MatchLen, FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) const; + + /// Verifies that there is a single line in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that there is no newline in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that none of the strings in \p NotStrings are found in the given + /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in + /// \p Diags according to the verbosity level set in \p Req. + bool CheckNot(const SourceMgr &SM, StringRef Buffer, + const std::vector<const FileCheckPattern *> &NotStrings, + const FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) const; + /// Matches "dag strings" and their mixed "not strings". + size_t CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector<const FileCheckPattern *> &NotStrings, + const FileCheckRequest &Req, + std::vector<FileCheckDiag> *Diags) const; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Support/FileCollector.cpp b/llvm/lib/Support/FileCollector.cpp new file mode 100644 index 0000000000000..47fca64137223 --- /dev/null +++ b/llvm/lib/Support/FileCollector.cpp @@ -0,0 +1,268 @@ +//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileCollector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" + +using namespace llvm; + +static bool isCaseSensitivePath(StringRef Path) { + SmallString<256> TmpDest = Path, UpperDest, RealDest; + + // Remove component traversals, links, etc. + if (!sys::fs::real_path(Path, TmpDest)) + return true; // Current default value in vfs.yaml + Path = TmpDest; + + // Change path to all upper case and ask for its real path, if the latter + // exists and is equal to path, it's not case sensitive. Default to case + // sensitive in the absence of real_path, since this is the YAMLVFSWriter + // default. + UpperDest = Path.upper(); + if (sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest)) + return false; + return true; +} + +FileCollector::FileCollector(std::string Root, std::string OverlayRoot) + : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) { + sys::fs::create_directories(this->Root, true); +} + +bool FileCollector::getRealPath(StringRef SrcPath, + SmallVectorImpl<char> &Result) { + SmallString<256> RealPath; + StringRef FileName = sys::path::filename(SrcPath); + std::string Directory = sys::path::parent_path(SrcPath).str(); + auto DirWithSymlink = SymlinkMap.find(Directory); + + // Use real_path to fix any symbolic link component present in a path. + // Computing the real path is expensive, cache the search through the parent + // path Directory. + if (DirWithSymlink == SymlinkMap.end()) { + auto EC = sys::fs::real_path(Directory, RealPath); + if (EC) + return false; + SymlinkMap[Directory] = RealPath.str(); + } else { + RealPath = DirWithSymlink->second; + } + + sys::path::append(RealPath, FileName); + Result.swap(RealPath); + return true; +} + +void FileCollector::addFile(const Twine &file) { + std::lock_guard<std::mutex> lock(Mutex); + std::string FileStr = file.str(); + if (markAsSeen(FileStr)) + addFileImpl(FileStr); +} + +void FileCollector::addFileImpl(StringRef SrcPath) { + // We need an absolute src path to append to the root. + SmallString<256> AbsoluteSrc = SrcPath; + sys::fs::make_absolute(AbsoluteSrc); + + // Canonicalize src to a native path to avoid mixed separator styles. + sys::path::native(AbsoluteSrc); + + // Remove redundant leading "./" pieces and consecutive separators. + AbsoluteSrc = sys::path::remove_leading_dotslash(AbsoluteSrc); + + // Canonicalize the source path by removing "..", "." components. + SmallString<256> VirtualPath = AbsoluteSrc; + sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true); + + // If a ".." component is present after a symlink component, remove_dots may + // lead to the wrong real destination path. Let the source be canonicalized + // like that but make sure we always use the real path for the destination. + SmallString<256> CopyFrom; + if (!getRealPath(AbsoluteSrc, CopyFrom)) + CopyFrom = VirtualPath; + + SmallString<256> DstPath = StringRef(Root); + sys::path::append(DstPath, sys::path::relative_path(CopyFrom)); + + // Always map a canonical src path to its real path into the YAML, by doing + // this we map different virtual src paths to the same entry in the VFS + // overlay, which is a way to emulate symlink inside the VFS; this is also + // needed for correctness, not doing that can lead to module redefinition + // errors. + addFileToMapping(VirtualPath, DstPath); +} + +/// Set the access and modification time for the given file from the given +/// status object. +static std::error_code +copyAccessAndModificationTime(StringRef Filename, + const sys::fs::file_status &Stat) { + int FD; + + if (auto EC = + sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting)) + return EC; + + if (auto EC = sys::fs::setLastAccessAndModificationTime( + FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime())) + return EC; + + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + return EC; + + return {}; +} + +std::error_code FileCollector::copyFiles(bool StopOnError) { + for (auto &entry : VFSWriter.getMappings()) { + // Create directory tree. + if (std::error_code EC = + sys::fs::create_directories(sys::path::parent_path(entry.RPath), + /*IgnoreExisting=*/true)) { + if (StopOnError) + return EC; + } + + // Get the status of the original file/directory. + sys::fs::file_status Stat; + if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) { + if (StopOnError) + return EC; + continue; + } + + if (Stat.type() == sys::fs::file_type::directory_file) { + // Construct a directory when it's just a directory entry. + if (std::error_code EC = + sys::fs::create_directories(entry.RPath, + /*IgnoreExisting=*/true)) { + if (StopOnError) + return EC; + } + continue; + } + + // Copy file over. + if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) { + if (StopOnError) + return EC; + } + + // Copy over permissions. + if (auto perms = sys::fs::getPermissions(entry.VPath)) { + if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) { + if (StopOnError) + return EC; + } + } + + // Copy over modification time. + copyAccessAndModificationTime(entry.RPath, Stat); + } + return {}; +} + +std::error_code FileCollector::writeMapping(StringRef mapping_file) { + std::lock_guard<std::mutex> lock(Mutex); + + VFSWriter.setOverlayDir(OverlayRoot); + VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot)); + VFSWriter.setUseExternalNames(false); + + std::error_code EC; + raw_fd_ostream os(mapping_file, EC, sys::fs::OF_Text); + if (EC) + return EC; + + VFSWriter.write(os); + + return {}; +} + +namespace { + +class FileCollectorFileSystem : public vfs::FileSystem { +public: + explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS, + std::shared_ptr<FileCollector> Collector) + : FS(std::move(FS)), Collector(std::move(Collector)) {} + + llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override { + auto Result = FS->status(Path); + if (Result && Result->exists()) + Collector->addFile(Path); + return Result; + } + + llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> + openFileForRead(const Twine &Path) override { + auto Result = FS->openFileForRead(Path); + if (Result && *Result) + Collector->addFile(Path); + return Result; + } + + llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir, + std::error_code &EC) override { + auto It = FS->dir_begin(Dir, EC); + if (EC) + return It; + // Collect everything that's listed in case the user needs it. + Collector->addFile(Dir); + for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) { + if (It->type() == sys::fs::file_type::regular_file || + It->type() == sys::fs::file_type::directory_file || + It->type() == sys::fs::file_type::symlink_file) { + Collector->addFile(It->path()); + } + } + if (EC) + return It; + // Return a new iterator. + return FS->dir_begin(Dir, EC); + } + + std::error_code getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const override { + auto EC = FS->getRealPath(Path, Output); + if (!EC) { + Collector->addFile(Path); + if (Output.size() > 0) + Collector->addFile(Output); + } + return EC; + } + + std::error_code isLocal(const Twine &Path, bool &Result) override { + return FS->isLocal(Path, Result); + } + + llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { + return FS->getCurrentWorkingDirectory(); + } + + std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override { + return FS->setCurrentWorkingDirectory(Path); + } + +private: + IntrusiveRefCntPtr<vfs::FileSystem> FS; + std::shared_ptr<FileCollector> Collector; +}; + +} // end anonymous namespace + +IntrusiveRefCntPtr<vfs::FileSystem> +FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS, + std::shared_ptr<FileCollector> Collector) { + return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector)); +} diff --git a/llvm/lib/Support/FileOutputBuffer.cpp b/llvm/lib/Support/FileOutputBuffer.cpp new file mode 100644 index 0000000000000..024dd3e57a407 --- /dev/null +++ b/llvm/lib/Support/FileOutputBuffer.cpp @@ -0,0 +1,196 @@ +//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility for creating a in-memory buffer that will be written to a file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/Path.h" +#include <system_error> + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; +using namespace llvm::sys; + +namespace { +// A FileOutputBuffer which creates a temporary file in the same directory +// as the final output file. The final output file is atomically replaced +// with the temporary file on commit(). +class OnDiskBuffer : public FileOutputBuffer { +public: + OnDiskBuffer(StringRef Path, fs::TempFile Temp, + std::unique_ptr<fs::mapped_file_region> Buf) + : FileOutputBuffer(Path), Buffer(std::move(Buf)), Temp(std::move(Temp)) {} + + uint8_t *getBufferStart() const override { return (uint8_t *)Buffer->data(); } + + uint8_t *getBufferEnd() const override { + return (uint8_t *)Buffer->data() + Buffer->size(); + } + + size_t getBufferSize() const override { return Buffer->size(); } + + Error commit() override { + // Unmap buffer, letting OS flush dirty pages to file on disk. + Buffer.reset(); + + // Atomically replace the existing file with the new one. + return Temp.keep(FinalPath); + } + + ~OnDiskBuffer() override { + // Close the mapping before deleting the temp file, so that the removal + // succeeds. + Buffer.reset(); + consumeError(Temp.discard()); + } + + void discard() override { + // Delete the temp file if it still was open, but keeping the mapping + // active. + consumeError(Temp.discard()); + } + +private: + std::unique_ptr<fs::mapped_file_region> Buffer; + fs::TempFile Temp; +}; + +// A FileOutputBuffer which keeps data in memory and writes to the final +// output file on commit(). This is used only when we cannot use OnDiskBuffer. +class InMemoryBuffer : public FileOutputBuffer { +public: + InMemoryBuffer(StringRef Path, MemoryBlock Buf, std::size_t BufSize, + unsigned Mode) + : FileOutputBuffer(Path), Buffer(Buf), BufferSize(BufSize), + Mode(Mode) {} + + uint8_t *getBufferStart() const override { return (uint8_t *)Buffer.base(); } + + uint8_t *getBufferEnd() const override { + return (uint8_t *)Buffer.base() + BufferSize; + } + + size_t getBufferSize() const override { return BufferSize; } + + Error commit() override { + if (FinalPath == "-") { + llvm::outs() << StringRef((const char *)Buffer.base(), BufferSize); + llvm::outs().flush(); + return Error::success(); + } + + using namespace sys::fs; + int FD; + std::error_code EC; + if (auto EC = + openFileForWrite(FinalPath, FD, CD_CreateAlways, OF_None, Mode)) + return errorCodeToError(EC); + raw_fd_ostream OS(FD, /*shouldClose=*/true, /*unbuffered=*/true); + OS << StringRef((const char *)Buffer.base(), BufferSize); + return Error::success(); + } + +private: + // Buffer may actually contain a larger memory block than BufferSize + OwningMemoryBlock Buffer; + size_t BufferSize; + unsigned Mode; +}; +} // namespace + +static Expected<std::unique_ptr<InMemoryBuffer>> +createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) { + std::error_code EC; + MemoryBlock MB = Memory::allocateMappedMemory( + Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); + if (EC) + return errorCodeToError(EC); + return std::make_unique<InMemoryBuffer>(Path, MB, Size, Mode); +} + +static Expected<std::unique_ptr<FileOutputBuffer>> +createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) { + Expected<fs::TempFile> FileOrErr = + fs::TempFile::create(Path + ".tmp%%%%%%%", Mode); + if (!FileOrErr) + return FileOrErr.takeError(); + fs::TempFile File = std::move(*FileOrErr); + +#ifndef _WIN32 + // On Windows, CreateFileMapping (the mmap function on Windows) + // automatically extends the underlying file. We don't need to + // extend the file beforehand. _chsize (ftruncate on Windows) is + // pretty slow just like it writes specified amount of bytes, + // so we should avoid calling that function. + if (auto EC = fs::resize_file(File.FD, Size)) { + consumeError(File.discard()); + return errorCodeToError(EC); + } +#endif + + // Mmap it. + std::error_code EC; + auto MappedFile = std::make_unique<fs::mapped_file_region>( + fs::convertFDToNativeFile(File.FD), fs::mapped_file_region::readwrite, + Size, 0, EC); + + // mmap(2) can fail if the underlying filesystem does not support it. + // If that happens, we fall back to in-memory buffer as the last resort. + if (EC) { + consumeError(File.discard()); + return createInMemoryBuffer(Path, Size, Mode); + } + + return std::make_unique<OnDiskBuffer>(Path, std::move(File), + std::move(MappedFile)); +} + +// Create an instance of FileOutputBuffer. +Expected<std::unique_ptr<FileOutputBuffer>> +FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) { + // Handle "-" as stdout just like llvm::raw_ostream does. + if (Path == "-") + return createInMemoryBuffer("-", Size, /*Mode=*/0); + + unsigned Mode = fs::all_read | fs::all_write; + if (Flags & F_executable) + Mode |= fs::all_exe; + + fs::file_status Stat; + fs::status(Path, Stat); + + // Usually, we want to create OnDiskBuffer to create a temporary file in + // the same directory as the destination file and atomically replaces it + // by rename(2). + // + // However, if the destination file is a special file, we don't want to + // use rename (e.g. we don't want to replace /dev/null with a regular + // file.) If that's the case, we create an in-memory buffer, open the + // destination file and write to it on commit(). + switch (Stat.type()) { + case fs::file_type::directory_file: + return errorCodeToError(errc::is_a_directory); + case fs::file_type::regular_file: + case fs::file_type::file_not_found: + case fs::file_type::status_error: + return createOnDiskBuffer(Path, Size, Mode); + default: + return createInMemoryBuffer(Path, Size, Mode); + } +} diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp new file mode 100644 index 0000000000000..d11fbb54dc0d8 --- /dev/null +++ b/llvm/lib/Support/FileUtilities.cpp @@ -0,0 +1,332 @@ +//===- Support/FileUtilities.cpp - File System Utilities ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a family of utility functions which are useful for doing +// various things with files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileUtilities.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <cctype> +#include <cmath> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <memory> +#include <system_error> + +using namespace llvm; + +static bool isSignedChar(char C) { + return (C == '+' || C == '-'); +} + +static bool isExponentChar(char C) { + switch (C) { + case 'D': // Strange exponential notation. + case 'd': // Strange exponential notation. + case 'e': + case 'E': return true; + default: return false; + } +} + +static bool isNumberChar(char C) { + switch (C) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '.': return true; + default: return isSignedChar(C) || isExponentChar(C); + } +} + +static const char *BackupNumber(const char *Pos, const char *FirstChar) { + // If we didn't stop in the middle of a number, don't backup. + if (!isNumberChar(*Pos)) return Pos; + + // Otherwise, return to the start of the number. + bool HasPeriod = false; + while (Pos > FirstChar && isNumberChar(Pos[-1])) { + // Backup over at most one period. + if (Pos[-1] == '.') { + if (HasPeriod) + break; + HasPeriod = true; + } + + --Pos; + if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1])) + break; + } + return Pos; +} + +/// EndOfNumber - Return the first character that is not part of the specified +/// number. This assumes that the buffer is null terminated, so it won't fall +/// off the end. +static const char *EndOfNumber(const char *Pos) { + while (isNumberChar(*Pos)) + ++Pos; + return Pos; +} + +/// CompareNumbers - compare two numbers, returning true if they are different. +static bool CompareNumbers(const char *&F1P, const char *&F2P, + const char *F1End, const char *F2End, + double AbsTolerance, double RelTolerance, + std::string *ErrorMsg) { + const char *F1NumEnd, *F2NumEnd; + double V1 = 0.0, V2 = 0.0; + + // If one of the positions is at a space and the other isn't, chomp up 'til + // the end of the space. + while (isspace(static_cast<unsigned char>(*F1P)) && F1P != F1End) + ++F1P; + while (isspace(static_cast<unsigned char>(*F2P)) && F2P != F2End) + ++F2P; + + // If we stop on numbers, compare their difference. + if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) { + // The diff failed. + F1NumEnd = F1P; + F2NumEnd = F2P; + } else { + // Note that some ugliness is built into this to permit support for numbers + // that use "D" or "d" as their exponential marker, e.g. "1.234D45". This + // occurs in 200.sixtrack in spec2k. + V1 = strtod(F1P, const_cast<char**>(&F1NumEnd)); + V2 = strtod(F2P, const_cast<char**>(&F2NumEnd)); + + if (*F1NumEnd == 'D' || *F1NumEnd == 'd') { + // Copy string into tmp buffer to replace the 'D' with an 'e'. + SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1); + // Strange exponential notation! + StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e'; + + V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd)); + F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]); + } + + if (*F2NumEnd == 'D' || *F2NumEnd == 'd') { + // Copy string into tmp buffer to replace the 'D' with an 'e'. + SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1); + // Strange exponential notation! + StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e'; + + V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd)); + F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]); + } + } + + if (F1NumEnd == F1P || F2NumEnd == F2P) { + if (ErrorMsg) { + *ErrorMsg = "FP Comparison failed, not a numeric difference between '"; + *ErrorMsg += F1P[0]; + *ErrorMsg += "' and '"; + *ErrorMsg += F2P[0]; + *ErrorMsg += "'"; + } + return true; + } + + // Check to see if these are inside the absolute tolerance + if (AbsTolerance < std::abs(V1-V2)) { + // Nope, check the relative tolerance... + double Diff; + if (V2) + Diff = std::abs(V1/V2 - 1.0); + else if (V1) + Diff = std::abs(V2/V1 - 1.0); + else + Diff = 0; // Both zero. + if (Diff > RelTolerance) { + if (ErrorMsg) { + raw_string_ostream(*ErrorMsg) + << "Compared: " << V1 << " and " << V2 << '\n' + << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n' + << "Out of tolerance: rel/abs: " << RelTolerance << '/' + << AbsTolerance; + } + return true; + } + } + + // Otherwise, advance our read pointers to the end of the numbers. + F1P = F1NumEnd; F2P = F2NumEnd; + return false; +} + +/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the +/// files match, 1 if they are different, and 2 if there is a file error. This +/// function differs from DiffFiles in that you can specify an absolete and +/// relative FP error that is allowed to exist. If you specify a string to fill +/// in for the error option, it will set the string to an error message if an +/// error occurs, allowing the caller to distinguish between a failed diff and a +/// file system error. +/// +int llvm::DiffFilesWithTolerance(StringRef NameA, + StringRef NameB, + double AbsTol, double RelTol, + std::string *Error) { + // Now its safe to mmap the files into memory because both files + // have a non-zero size. + ErrorOr<std::unique_ptr<MemoryBuffer>> F1OrErr = MemoryBuffer::getFile(NameA); + if (std::error_code EC = F1OrErr.getError()) { + if (Error) + *Error = EC.message(); + return 2; + } + MemoryBuffer &F1 = *F1OrErr.get(); + + ErrorOr<std::unique_ptr<MemoryBuffer>> F2OrErr = MemoryBuffer::getFile(NameB); + if (std::error_code EC = F2OrErr.getError()) { + if (Error) + *Error = EC.message(); + return 2; + } + MemoryBuffer &F2 = *F2OrErr.get(); + + // Okay, now that we opened the files, scan them for the first difference. + const char *File1Start = F1.getBufferStart(); + const char *File2Start = F2.getBufferStart(); + const char *File1End = F1.getBufferEnd(); + const char *File2End = F2.getBufferEnd(); + const char *F1P = File1Start; + const char *F2P = File2Start; + uint64_t A_size = F1.getBufferSize(); + uint64_t B_size = F2.getBufferSize(); + + // Are the buffers identical? Common case: Handle this efficiently. + if (A_size == B_size && + std::memcmp(File1Start, File2Start, A_size) == 0) + return 0; + + // Otherwise, we are done a tolerances are set. + if (AbsTol == 0 && RelTol == 0) { + if (Error) + *Error = "Files differ without tolerance allowance"; + return 1; // Files different! + } + + bool CompareFailed = false; + while (true) { + // Scan for the end of file or next difference. + while (F1P < File1End && F2P < File2End && *F1P == *F2P) { + ++F1P; + ++F2P; + } + + if (F1P >= File1End || F2P >= File2End) break; + + // Okay, we must have found a difference. Backup to the start of the + // current number each stream is at so that we can compare from the + // beginning. + F1P = BackupNumber(F1P, File1Start); + F2P = BackupNumber(F2P, File2Start); + + // Now that we are at the start of the numbers, compare them, exiting if + // they don't match. + if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) { + CompareFailed = true; + break; + } + } + + // Okay, we reached the end of file. If both files are at the end, we + // succeeded. + bool F1AtEnd = F1P >= File1End; + bool F2AtEnd = F2P >= File2End; + if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) { + // Else, we might have run off the end due to a number: backup and retry. + if (F1AtEnd && isNumberChar(F1P[-1])) --F1P; + if (F2AtEnd && isNumberChar(F2P[-1])) --F2P; + F1P = BackupNumber(F1P, File1Start); + F2P = BackupNumber(F2P, File2Start); + + // Now that we are at the start of the numbers, compare them, exiting if + // they don't match. + if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) + CompareFailed = true; + + // If we found the end, we succeeded. + if (F1P < File1End || F2P < File2End) + CompareFailed = true; + } + + return CompareFailed; +} + +void llvm::AtomicFileWriteError::log(raw_ostream &OS) const { + OS << "atomic_write_error: "; + switch (Error) { + case atomic_write_error::failed_to_create_uniq_file: + OS << "failed_to_create_uniq_file"; + return; + case atomic_write_error::output_stream_error: + OS << "output_stream_error"; + return; + case atomic_write_error::failed_to_rename_temp_file: + OS << "failed_to_rename_temp_file"; + return; + } + llvm_unreachable("unknown atomic_write_error value in " + "failed_to_rename_temp_file::log()"); +} + +llvm::Error llvm::writeFileAtomically(StringRef TempPathModel, + StringRef FinalPath, StringRef Buffer) { + return writeFileAtomically(TempPathModel, FinalPath, + [&Buffer](llvm::raw_ostream &OS) { + OS.write(Buffer.data(), Buffer.size()); + return llvm::Error::success(); + }); +} + +llvm::Error llvm::writeFileAtomically( + StringRef TempPathModel, StringRef FinalPath, + std::function<llvm::Error(llvm::raw_ostream &)> Writer) { + SmallString<128> GeneratedUniqPath; + int TempFD; + if (sys::fs::createUniqueFile(TempPathModel.str(), TempFD, + GeneratedUniqPath)) { + return llvm::make_error<AtomicFileWriteError>( + atomic_write_error::failed_to_create_uniq_file); + } + llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath); + + raw_fd_ostream OS(TempFD, /*shouldClose=*/true); + if (llvm::Error Err = Writer(OS)) { + return Err; + } + + OS.close(); + if (OS.has_error()) { + OS.clear_error(); + return llvm::make_error<AtomicFileWriteError>( + atomic_write_error::output_stream_error); + } + + if (const std::error_code Error = + sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(), + /*to=*/FinalPath.str().c_str())) { + return llvm::make_error<AtomicFileWriteError>( + atomic_write_error::failed_to_rename_temp_file); + } + + RemoveTmpFileOnFail.releaseFile(); + return Error::success(); +} + +char llvm::AtomicFileWriteError::ID; diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp new file mode 100644 index 0000000000000..ce6f196e1060f --- /dev/null +++ b/llvm/lib/Support/FoldingSet.cpp @@ -0,0 +1,463 @@ +//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a hash set that can be used to remove duplication of +// nodes in a graph. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstring> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// FoldingSetNodeIDRef Implementation + +/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, +/// used to lookup the node in the FoldingSetBase. +unsigned FoldingSetNodeIDRef::ComputeHash() const { + return static_cast<unsigned>(hash_combine_range(Data, Data+Size)); +} + +bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { + if (Size != RHS.Size) return false; + return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; +} + +/// Used to compare the "ordering" of two nodes as defined by the +/// profiled bits and their ordering defined by memcmp(). +bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { + if (Size != RHS.Size) + return Size < RHS.Size; + return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0; +} + +//===----------------------------------------------------------------------===// +// FoldingSetNodeID Implementation + +/// Add* - Add various data types to Bit data. +/// +void FoldingSetNodeID::AddPointer(const void *Ptr) { + // Note: this adds pointers to the hash using sizes and endianness that + // depend on the host. It doesn't matter, however, because hashing on + // pointer values is inherently unstable. Nothing should depend on the + // ordering of nodes in the folding set. + static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long), + "unexpected pointer size"); + AddInteger(reinterpret_cast<uintptr_t>(Ptr)); +} +void FoldingSetNodeID::AddInteger(signed I) { + Bits.push_back(I); +} +void FoldingSetNodeID::AddInteger(unsigned I) { + Bits.push_back(I); +} +void FoldingSetNodeID::AddInteger(long I) { + AddInteger((unsigned long)I); +} +void FoldingSetNodeID::AddInteger(unsigned long I) { + if (sizeof(long) == sizeof(int)) + AddInteger(unsigned(I)); + else if (sizeof(long) == sizeof(long long)) { + AddInteger((unsigned long long)I); + } else { + llvm_unreachable("unexpected sizeof(long)"); + } +} +void FoldingSetNodeID::AddInteger(long long I) { + AddInteger((unsigned long long)I); +} +void FoldingSetNodeID::AddInteger(unsigned long long I) { + AddInteger(unsigned(I)); + AddInteger(unsigned(I >> 32)); +} + +void FoldingSetNodeID::AddString(StringRef String) { + unsigned Size = String.size(); + Bits.push_back(Size); + if (!Size) return; + + unsigned Units = Size / 4; + unsigned Pos = 0; + const unsigned *Base = (const unsigned*) String.data(); + + // If the string is aligned do a bulk transfer. + if (!((intptr_t)Base & 3)) { + Bits.append(Base, Base + Units); + Pos = (Units + 1) * 4; + } else { + // Otherwise do it the hard way. + // To be compatible with above bulk transfer, we need to take endianness + // into account. + static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost, + "Unexpected host endianness"); + if (sys::IsBigEndianHost) { + for (Pos += 4; Pos <= Size; Pos += 4) { + unsigned V = ((unsigned char)String[Pos - 4] << 24) | + ((unsigned char)String[Pos - 3] << 16) | + ((unsigned char)String[Pos - 2] << 8) | + (unsigned char)String[Pos - 1]; + Bits.push_back(V); + } + } else { // Little-endian host + for (Pos += 4; Pos <= Size; Pos += 4) { + unsigned V = ((unsigned char)String[Pos - 1] << 24) | + ((unsigned char)String[Pos - 2] << 16) | + ((unsigned char)String[Pos - 3] << 8) | + (unsigned char)String[Pos - 4]; + Bits.push_back(V); + } + } + } + + // With the leftover bits. + unsigned V = 0; + // Pos will have overshot size by 4 - #bytes left over. + // No need to take endianness into account here - this is always executed. + switch (Pos - Size) { + case 1: V = (V << 8) | (unsigned char)String[Size - 3]; LLVM_FALLTHROUGH; + case 2: V = (V << 8) | (unsigned char)String[Size - 2]; LLVM_FALLTHROUGH; + case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; + default: return; // Nothing left. + } + + Bits.push_back(V); +} + +// AddNodeID - Adds the Bit data of another ID to *this. +void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { + Bits.append(ID.Bits.begin(), ID.Bits.end()); +} + +/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to +/// lookup the node in the FoldingSetBase. +unsigned FoldingSetNodeID::ComputeHash() const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); +} + +/// operator== - Used to compare two nodes to each other. +/// +bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { + return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); +} + +/// operator== - Used to compare two nodes to each other. +/// +bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; +} + +/// Used to compare the "ordering" of two nodes as defined by the +/// profiled bits and their ordering defined by memcmp(). +bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { + return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); +} + +bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; +} + +/// Intern - Copy this node's data to a memory region allocated from the +/// given allocator and return a FoldingSetNodeIDRef describing the +/// interned data. +FoldingSetNodeIDRef +FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { + unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); + std::uninitialized_copy(Bits.begin(), Bits.end(), New); + return FoldingSetNodeIDRef(New, Bits.size()); +} + +//===----------------------------------------------------------------------===// +/// Helper functions for FoldingSetBase. + +/// GetNextPtr - In order to save space, each bucket is a +/// singly-linked-list. In order to make deletion more efficient, we make +/// the list circular, so we can delete a node without computing its hash. +/// The problem with this is that the start of the hash buckets are not +/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: +/// use GetBucketPtr when this happens. +static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { + // The low bit is set if this is the pointer back to the bucket. + if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) + return nullptr; + + return static_cast<FoldingSetBase::Node*>(NextInBucketPtr); +} + + +/// testing. +static void **GetBucketPtr(void *NextInBucketPtr) { + intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); + assert((Ptr & 1) && "Not a bucket pointer"); + return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); +} + +/// GetBucketFor - Hash the specified node ID and return the hash bucket for +/// the specified ID. +static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { + // NumBuckets is always a power of 2. + unsigned BucketNum = Hash & (NumBuckets-1); + return Buckets + BucketNum; +} + +/// AllocateBuckets - Allocated initialized bucket memory. +static void **AllocateBuckets(unsigned NumBuckets) { + void **Buckets = static_cast<void**>(safe_calloc(NumBuckets + 1, + sizeof(void*))); + // Set the very last bucket to be a non-null "pointer". + Buckets[NumBuckets] = reinterpret_cast<void*>(-1); + return Buckets; +} + +//===----------------------------------------------------------------------===// +// FoldingSetBase Implementation + +void FoldingSetBase::anchor() {} + +FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { + assert(5 < Log2InitSize && Log2InitSize < 32 && + "Initial hash table size out of range"); + NumBuckets = 1 << Log2InitSize; + Buckets = AllocateBuckets(NumBuckets); + NumNodes = 0; +} + +FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) + : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { + Arg.Buckets = nullptr; + Arg.NumBuckets = 0; + Arg.NumNodes = 0; +} + +FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { + free(Buckets); // This may be null if the set is in a moved-from state. + Buckets = RHS.Buckets; + NumBuckets = RHS.NumBuckets; + NumNodes = RHS.NumNodes; + RHS.Buckets = nullptr; + RHS.NumBuckets = 0; + RHS.NumNodes = 0; + return *this; +} + +FoldingSetBase::~FoldingSetBase() { + free(Buckets); +} + +void FoldingSetBase::clear() { + // Set all but the last bucket to null pointers. + memset(Buckets, 0, NumBuckets*sizeof(void*)); + + // Set the very last bucket to be a non-null "pointer". + Buckets[NumBuckets] = reinterpret_cast<void*>(-1); + + // Reset the node count to zero. + NumNodes = 0; +} + +void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) { + assert((NewBucketCount > NumBuckets) && "Can't shrink a folding set with GrowBucketCount"); + assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!"); + void **OldBuckets = Buckets; + unsigned OldNumBuckets = NumBuckets; + + // Clear out new buckets. + Buckets = AllocateBuckets(NewBucketCount); + // Set NumBuckets only if allocation of new buckets was successful. + NumBuckets = NewBucketCount; + NumNodes = 0; + + // Walk the old buckets, rehashing nodes into their new place. + FoldingSetNodeID TempID; + for (unsigned i = 0; i != OldNumBuckets; ++i) { + void *Probe = OldBuckets[i]; + if (!Probe) continue; + while (Node *NodeInBucket = GetNextPtr(Probe)) { + // Figure out the next link, remove NodeInBucket from the old link. + Probe = NodeInBucket->getNextInBucket(); + NodeInBucket->SetNextInBucket(nullptr); + + // Insert the node into the new bucket, after recomputing the hash. + InsertNode(NodeInBucket, + GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), + Buckets, NumBuckets)); + TempID.clear(); + } + } + + free(OldBuckets); +} + +/// GrowHashTable - Double the size of the hash table and rehash everything. +/// +void FoldingSetBase::GrowHashTable() { + GrowBucketCount(NumBuckets * 2); +} + +void FoldingSetBase::reserve(unsigned EltCount) { + // This will give us somewhere between EltCount / 2 and + // EltCount buckets. This puts us in the load factor + // range of 1.0 - 2.0. + if(EltCount < capacity()) + return; + GrowBucketCount(PowerOf2Floor(EltCount)); +} + +/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, +/// return it. If not, return the insertion token that will make insertion +/// faster. +FoldingSetBase::Node * +FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { + unsigned IDHash = ID.ComputeHash(); + void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); + void *Probe = *Bucket; + + InsertPos = nullptr; + + FoldingSetNodeID TempID; + while (Node *NodeInBucket = GetNextPtr(Probe)) { + if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) + return NodeInBucket; + TempID.clear(); + + Probe = NodeInBucket->getNextInBucket(); + } + + // Didn't find the node, return null with the bucket as the InsertPos. + InsertPos = Bucket; + return nullptr; +} + +/// InsertNode - Insert the specified node into the folding set, knowing that it +/// is not already in the map. InsertPos must be obtained from +/// FindNodeOrInsertPos. +void FoldingSetBase::InsertNode(Node *N, void *InsertPos) { + assert(!N->getNextInBucket()); + // Do we need to grow the hashtable? + if (NumNodes+1 > capacity()) { + GrowHashTable(); + FoldingSetNodeID TempID; + InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); + } + + ++NumNodes; + + /// The insert position is actually a bucket pointer. + void **Bucket = static_cast<void**>(InsertPos); + + void *Next = *Bucket; + + // If this is the first insertion into this bucket, its next pointer will be + // null. Pretend as if it pointed to itself, setting the low bit to indicate + // that it is a pointer to the bucket. + if (!Next) + Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); + + // Set the node's next pointer, and make the bucket point to the node. + N->SetNextInBucket(Next); + *Bucket = N; +} + +/// RemoveNode - Remove a node from the folding set, returning true if one was +/// removed or false if the node was not in the folding set. +bool FoldingSetBase::RemoveNode(Node *N) { + // Because each bucket is a circular list, we don't need to compute N's hash + // to remove it. + void *Ptr = N->getNextInBucket(); + if (!Ptr) return false; // Not in folding set. + + --NumNodes; + N->SetNextInBucket(nullptr); + + // Remember what N originally pointed to, either a bucket or another node. + void *NodeNextPtr = Ptr; + + // Chase around the list until we find the node (or bucket) which points to N. + while (true) { + if (Node *NodeInBucket = GetNextPtr(Ptr)) { + // Advance pointer. + Ptr = NodeInBucket->getNextInBucket(); + + // We found a node that points to N, change it to point to N's next node, + // removing N from the list. + if (Ptr == N) { + NodeInBucket->SetNextInBucket(NodeNextPtr); + return true; + } + } else { + void **Bucket = GetBucketPtr(Ptr); + Ptr = *Bucket; + + // If we found that the bucket points to N, update the bucket to point to + // whatever is next. + if (Ptr == N) { + *Bucket = NodeNextPtr; + return true; + } + } + } +} + +/// GetOrInsertNode - If there is an existing simple Node exactly +/// equal to the specified node, return it. Otherwise, insert 'N' and it +/// instead. +FoldingSetBase::Node *FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N) { + FoldingSetNodeID ID; + GetNodeProfile(N, ID); + void *IP; + if (Node *E = FindNodeOrInsertPos(ID, IP)) + return E; + InsertNode(N, IP); + return N; +} + +//===----------------------------------------------------------------------===// +// FoldingSetIteratorImpl Implementation + +FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { + // Skip to the first non-null non-self-cycle bucket. + while (*Bucket != reinterpret_cast<void*>(-1) && + (!*Bucket || !GetNextPtr(*Bucket))) + ++Bucket; + + NodePtr = static_cast<FoldingSetNode*>(*Bucket); +} + +void FoldingSetIteratorImpl::advance() { + // If there is another link within this bucket, go to it. + void *Probe = NodePtr->getNextInBucket(); + + if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) + NodePtr = NextNodeInBucket; + else { + // Otherwise, this is the last link in this bucket. + void **Bucket = GetBucketPtr(Probe); + + // Skip to the next non-null non-self-cycle bucket. + do { + ++Bucket; + } while (*Bucket != reinterpret_cast<void*>(-1) && + (!*Bucket || !GetNextPtr(*Bucket))); + + NodePtr = static_cast<FoldingSetNode*>(*Bucket); + } +} + +//===----------------------------------------------------------------------===// +// FoldingSetBucketIteratorImpl Implementation + +FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { + Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket; +} diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp new file mode 100644 index 0000000000000..f9e89f69b528c --- /dev/null +++ b/llvm/lib/Support/FormatVariadic.cpp @@ -0,0 +1,155 @@ +//===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; + +static Optional<AlignStyle> translateLocChar(char C) { + switch (C) { + case '-': + return AlignStyle::Left; + case '=': + return AlignStyle::Center; + case '+': + return AlignStyle::Right; + default: + return None; + } + LLVM_BUILTIN_UNREACHABLE; +} + +bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where, + size_t &Align, char &Pad) { + Where = AlignStyle::Right; + Align = 0; + Pad = ' '; + if (Spec.empty()) + return true; + + if (Spec.size() > 1) { + // A maximum of 2 characters at the beginning can be used for something + // other + // than the width. + // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...] + // contains the width. + // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width. + // Otherwise, Spec[0:...] contains the width. + if (auto Loc = translateLocChar(Spec[1])) { + Pad = Spec[0]; + Where = *Loc; + Spec = Spec.drop_front(2); + } else if (auto Loc = translateLocChar(Spec[0])) { + Where = *Loc; + Spec = Spec.drop_front(1); + } + } + + bool Failed = Spec.consumeInteger(0, Align); + return !Failed; +} + +Optional<ReplacementItem> +formatv_object_base::parseReplacementItem(StringRef Spec) { + StringRef RepString = Spec.trim("{}"); + + // If the replacement sequence does not start with a non-negative integer, + // this is an error. + char Pad = ' '; + std::size_t Align = 0; + AlignStyle Where = AlignStyle::Right; + StringRef Options; + size_t Index = 0; + RepString = RepString.trim(); + if (RepString.consumeInteger(0, Index)) { + assert(false && "Invalid replacement sequence index!"); + return ReplacementItem{}; + } + RepString = RepString.trim(); + if (!RepString.empty() && RepString.front() == ',') { + RepString = RepString.drop_front(); + if (!consumeFieldLayout(RepString, Where, Align, Pad)) + assert(false && "Invalid replacement field layout specification!"); + } + RepString = RepString.trim(); + if (!RepString.empty() && RepString.front() == ':') { + Options = RepString.drop_front().trim(); + RepString = StringRef(); + } + RepString = RepString.trim(); + if (!RepString.empty()) { + assert(false && "Unexpected characters found in replacement string!"); + } + + return ReplacementItem{Spec, Index, Align, Where, Pad, Options}; +} + +std::pair<ReplacementItem, StringRef> +formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) { + std::size_t From = 0; + while (From < Fmt.size() && From != StringRef::npos) { + std::size_t BO = Fmt.find_first_of('{', From); + // Everything up until the first brace is a literal. + if (BO != 0) + return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO)); + + StringRef Braces = + Fmt.drop_front(BO).take_while([](char C) { return C == '{'; }); + // If there is more than one brace, then some of them are escaped. Treat + // these as replacements. + if (Braces.size() > 1) { + size_t NumEscapedBraces = Braces.size() / 2; + StringRef Middle = Fmt.substr(BO, NumEscapedBraces); + StringRef Right = Fmt.drop_front(BO + NumEscapedBraces * 2); + return std::make_pair(ReplacementItem{Middle}, Right); + } + // An unterminated open brace is undefined. We treat the rest of the string + // as a literal replacement, but we assert to indicate that this is + // undefined and that we consider it an error. + std::size_t BC = Fmt.find_first_of('}', BO); + if (BC == StringRef::npos) { + assert( + false && + "Unterminated brace sequence. Escape with {{ for a literal brace."); + return std::make_pair(ReplacementItem{Fmt}, StringRef()); + } + + // Even if there is a closing brace, if there is another open brace before + // this closing brace, treat this portion as literal, and try again with the + // next one. + std::size_t BO2 = Fmt.find_first_of('{', BO + 1); + if (BO2 < BC) + return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)}, + Fmt.substr(BO2)); + + StringRef Spec = Fmt.slice(BO + 1, BC); + StringRef Right = Fmt.substr(BC + 1); + + auto RI = parseReplacementItem(Spec); + if (RI.hasValue()) + return std::make_pair(*RI, Right); + + // If there was an error parsing the replacement item, treat it as an + // invalid replacement spec, and just continue. + From = BC + 1; + } + return std::make_pair(ReplacementItem{Fmt}, StringRef()); +} + +std::vector<ReplacementItem> +formatv_object_base::parseFormatString(StringRef Fmt) { + std::vector<ReplacementItem> Replacements; + ReplacementItem I; + while (!Fmt.empty()) { + std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt); + if (I.Type != ReplacementType::Empty) + Replacements.push_back(I); + } + return Replacements; +} + +void detail::format_adapter::anchor() { } diff --git a/llvm/lib/Support/FormattedStream.cpp b/llvm/lib/Support/FormattedStream.cpp new file mode 100644 index 0000000000000..4eb747038bb9e --- /dev/null +++ b/llvm/lib/Support/FormattedStream.cpp @@ -0,0 +1,107 @@ +//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of formatted_raw_ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +/// UpdatePosition - Examine the given char sequence and figure out which +/// column we end up in after output, and how many line breaks are contained. +/// +static void UpdatePosition(std::pair<unsigned, unsigned> &Position, const char *Ptr, size_t Size) { + unsigned &Column = Position.first; + unsigned &Line = Position.second; + + // Keep track of the current column and line by scanning the string for + // special characters + for (const char *End = Ptr + Size; Ptr != End; ++Ptr) { + ++Column; + switch (*Ptr) { + case '\n': + Line += 1; + LLVM_FALLTHROUGH; + case '\r': + Column = 0; + break; + case '\t': + // Assumes tab stop = 8 characters. + Column += (8 - (Column & 0x7)) & 0x7; + break; + } + } +} + +/// ComputePosition - Examine the current output and update line and column +/// counts. +void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { + // If our previous scan pointer is inside the buffer, assume we already + // scanned those bytes. This depends on raw_ostream to not change our buffer + // in unexpected ways. + if (Ptr <= Scanned && Scanned <= Ptr + Size) + // Scan all characters added since our last scan to determine the new + // column. + UpdatePosition(Position, Scanned, Size - (Scanned - Ptr)); + else + UpdatePosition(Position, Ptr, Size); + + // Update the scanning pointer. + Scanned = Ptr + Size; +} + +/// PadToColumn - Align the output to some column number. +/// +/// \param NewCol - The column to move to. +/// +formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { + // Figure out what's in the buffer and add it to the column count. + ComputePosition(getBufferStart(), GetNumBytesInBuffer()); + + // Output spaces until we reach the desired column. + indent(std::max(int(NewCol - getColumn()), 1)); + return *this; +} + +void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { + // Figure out what's in the buffer and add it to the column count. + ComputePosition(Ptr, Size); + + // Write the data to the underlying stream (which is unbuffered, so + // the data will be immediately written out). + TheStream->write(Ptr, Size); + + // Reset the scanning pointer. + Scanned = nullptr; +} + +/// fouts() - This returns a reference to a formatted_raw_ostream for +/// standard output. Use it like: fouts() << "foo" << "bar"; +formatted_raw_ostream &llvm::fouts() { + static formatted_raw_ostream S(outs()); + return S; +} + +/// ferrs() - This returns a reference to a formatted_raw_ostream for +/// standard error. Use it like: ferrs() << "foo" << "bar"; +formatted_raw_ostream &llvm::ferrs() { + static formatted_raw_ostream S(errs()); + return S; +} + +/// fdbgs() - This returns a reference to a formatted_raw_ostream for +/// the debug stream. Use it like: fdbgs() << "foo" << "bar"; +formatted_raw_ostream &llvm::fdbgs() { + static formatted_raw_ostream S(dbgs()); + return S; +} diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp new file mode 100644 index 0000000000000..8dae6941ec770 --- /dev/null +++ b/llvm/lib/Support/GlobPattern.cpp @@ -0,0 +1,178 @@ +//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a glob pattern matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GlobPattern.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" + +using namespace llvm; + +static bool hasWildcard(StringRef S) { + return S.find_first_of("?*[\\") != StringRef::npos; +} + +// Expands character ranges and returns a bitmap. +// For example, "a-cf-hz" is expanded to "abcfghz". +static Expected<BitVector> expand(StringRef S, StringRef Original) { + BitVector BV(256, false); + + // Expand X-Y. + for (;;) { + if (S.size() < 3) + break; + + uint8_t Start = S[0]; + uint8_t End = S[2]; + + // If it doesn't start with something like X-Y, + // consume the first character and proceed. + if (S[1] != '-') { + BV[Start] = true; + S = S.substr(1); + continue; + } + + // It must be in the form of X-Y. + // Validate it and then interpret the range. + if (Start > End) + return make_error<StringError>("invalid glob pattern: " + Original, + errc::invalid_argument); + + for (int C = Start; C <= End; ++C) + BV[(uint8_t)C] = true; + S = S.substr(3); + } + + for (char C : S) + BV[(uint8_t)C] = true; + return BV; +} + +// This is a scanner for the glob pattern. +// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]" +// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is +// equivalent to "[^<chars>]"), or a non-meta character. +// This function returns the first token in S. +static Expected<BitVector> scan(StringRef &S, StringRef Original) { + switch (S[0]) { + case '*': + S = S.substr(1); + // '*' is represented by an empty bitvector. + // All other bitvectors are 256-bit long. + return BitVector(); + case '?': + S = S.substr(1); + return BitVector(256, true); + case '[': { + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + size_t End = S.find(']', 2); + if (End == StringRef::npos) + return make_error<StringError>("invalid glob pattern: " + Original, + errc::invalid_argument); + + StringRef Chars = S.substr(1, End - 1); + S = S.substr(End + 1); + if (Chars.startswith("^") || Chars.startswith("!")) { + Expected<BitVector> BV = expand(Chars.substr(1), Original); + if (!BV) + return BV.takeError(); + return BV->flip(); + } + return expand(Chars, Original); + } + case '\\': + // Eat this character and fall through below to treat it like a non-meta + // character. + S = S.substr(1); + LLVM_FALLTHROUGH; + default: + BitVector BV(256, false); + BV[(uint8_t)S[0]] = true; + S = S.substr(1); + return BV; + } +} + +Expected<GlobPattern> GlobPattern::create(StringRef S) { + GlobPattern Pat; + + // S doesn't contain any metacharacter, + // so the regular string comparison should work. + if (!hasWildcard(S)) { + Pat.Exact = S; + return Pat; + } + + // S is something like "foo*", and the "* is not escaped. We can use + // startswith(). + if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { + Pat.Prefix = S.drop_back(); + return Pat; + } + + // S is something like "*foo". We can use endswith(). + if (S.startswith("*") && !hasWildcard(S.drop_front())) { + Pat.Suffix = S.drop_front(); + return Pat; + } + + // Otherwise, we need to do real glob pattern matching. + // Parse the pattern now. + StringRef Original = S; + while (!S.empty()) { + Expected<BitVector> BV = scan(S, Original); + if (!BV) + return BV.takeError(); + Pat.Tokens.push_back(*BV); + } + return Pat; +} + +bool GlobPattern::match(StringRef S) const { + if (Exact) + return S == *Exact; + if (Prefix) + return S.startswith(*Prefix); + if (Suffix) + return S.endswith(*Suffix); + return matchOne(Tokens, S); +} + +// Runs glob pattern Pats against string S. +bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { + for (;;) { + if (Pats.empty()) + return S.empty(); + + // If Pats[0] is '*', try to match Pats[1..] against all possible + // tail strings of S to see at least one pattern succeeds. + if (Pats[0].size() == 0) { + Pats = Pats.slice(1); + if (Pats.empty()) + // Fast path. If a pattern is '*', it matches anything. + return true; + for (size_t I = 0, E = S.size(); I < E; ++I) + if (matchOne(Pats, S.substr(I))) + return true; + return false; + } + + // If Pats[0] is not '*', it must consume one character. + if (S.empty() || !Pats[0][(uint8_t)S[0]]) + return false; + Pats = Pats.slice(1); + S = S.substr(1); + } +} diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp new file mode 100644 index 0000000000000..c689a81925d4c --- /dev/null +++ b/llvm/lib/Support/GraphWriter.cpp @@ -0,0 +1,298 @@ +//===- GraphWriter.cpp - Implements GraphWriter support routines ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements misc. GraphWriter support routines. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GraphWriter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/config.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <system_error> +#include <string> +#include <vector> + +using namespace llvm; + +static cl::opt<bool> ViewBackground("view-background", cl::Hidden, + cl::desc("Execute graph viewer in the background. Creates tmp file litter.")); + +std::string llvm::DOT::EscapeString(const std::string &Label) { + std::string Str(Label); + for (unsigned i = 0; i != Str.length(); ++i) + switch (Str[i]) { + case '\n': + Str.insert(Str.begin()+i, '\\'); // Escape character... + ++i; + Str[i] = 'n'; + break; + case '\t': + Str.insert(Str.begin()+i, ' '); // Convert to two spaces + ++i; + Str[i] = ' '; + break; + case '\\': + if (i+1 != Str.length()) + switch (Str[i+1]) { + case 'l': continue; // don't disturb \l + case '|': case '{': case '}': + Str.erase(Str.begin()+i); continue; + default: break; + } + LLVM_FALLTHROUGH; + case '{': case '}': + case '<': case '>': + case '|': case '"': + Str.insert(Str.begin()+i, '\\'); // Escape character... + ++i; // don't infinite loop + break; + } + return Str; +} + +/// Get a color string for this node number. Simply round-robin selects +/// from a reasonable number of colors. +StringRef llvm::DOT::getColorString(unsigned ColorNumber) { + static const int NumColors = 20; + static const char* Colors[NumColors] = { + "aaaaaa", "aa0000", "00aa00", "aa5500", "0055ff", "aa00aa", "00aaaa", + "555555", "ff5555", "55ff55", "ffff55", "5555ff", "ff55ff", "55ffff", + "ffaaaa", "aaffaa", "ffffaa", "aaaaff", "ffaaff", "aaffff"}; + return Colors[ColorNumber % NumColors]; +} + +std::string llvm::createGraphFilename(const Twine &Name, int &FD) { + FD = -1; + SmallString<128> Filename; + std::error_code EC = sys::fs::createTemporaryFile(Name, "dot", FD, Filename); + if (EC) { + errs() << "Error: " << EC.message() << "\n"; + return ""; + } + + errs() << "Writing '" << Filename << "'... "; + return Filename.str(); +} + +// Execute the graph viewer. Return true if there were errors. +static bool ExecGraphViewer(StringRef ExecPath, std::vector<StringRef> &args, + StringRef Filename, bool wait, + std::string &ErrMsg) { + if (wait) { + if (sys::ExecuteAndWait(ExecPath, args, None, {}, 0, 0, &ErrMsg)) { + errs() << "Error: " << ErrMsg << "\n"; + return true; + } + sys::fs::remove(Filename); + errs() << " done. \n"; + } else { + sys::ExecuteNoWait(ExecPath, args, None, {}, 0, &ErrMsg); + errs() << "Remember to erase graph file: " << Filename << "\n"; + } + return false; +} + +namespace { + +struct GraphSession { + std::string LogBuffer; + + bool TryFindProgram(StringRef Names, std::string &ProgramPath) { + raw_string_ostream Log(LogBuffer); + SmallVector<StringRef, 8> parts; + Names.split(parts, '|'); + for (auto Name : parts) { + if (ErrorOr<std::string> P = sys::findProgramByName(Name)) { + ProgramPath = *P; + return true; + } + Log << " Tried '" << Name << "'\n"; + } + return false; + } +}; + +} // end anonymous namespace + +static const char *getProgramName(GraphProgram::Name program) { + switch (program) { + case GraphProgram::DOT: + return "dot"; + case GraphProgram::FDP: + return "fdp"; + case GraphProgram::NEATO: + return "neato"; + case GraphProgram::TWOPI: + return "twopi"; + case GraphProgram::CIRCO: + return "circo"; + } + llvm_unreachable("bad kind"); +} + +bool llvm::DisplayGraph(StringRef FilenameRef, bool wait, + GraphProgram::Name program) { + std::string Filename = FilenameRef; + std::string ErrMsg; + std::string ViewerPath; + GraphSession S; + +#ifdef __APPLE__ + wait &= !ViewBackground; + if (S.TryFindProgram("open", ViewerPath)) { + std::vector<StringRef> args; + args.push_back(ViewerPath); + if (wait) + args.push_back("-W"); + args.push_back(Filename); + errs() << "Trying 'open' program... "; + if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg)) + return false; + } +#endif + if (S.TryFindProgram("xdg-open", ViewerPath)) { + std::vector<StringRef> args; + args.push_back(ViewerPath); + args.push_back(Filename); + errs() << "Trying 'xdg-open' program... "; + if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg)) + return false; + } + + // Graphviz + if (S.TryFindProgram("Graphviz", ViewerPath)) { + std::vector<StringRef> args; + args.push_back(ViewerPath); + args.push_back(Filename); + + errs() << "Running 'Graphviz' program... "; + return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); + } + + // xdot + if (S.TryFindProgram("xdot|xdot.py", ViewerPath)) { + std::vector<StringRef> args; + args.push_back(ViewerPath); + args.push_back(Filename); + + args.push_back("-f"); + args.push_back(getProgramName(program)); + + errs() << "Running 'xdot.py' program... "; + return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); + } + + enum ViewerKind { + VK_None, + VK_OSXOpen, + VK_XDGOpen, + VK_Ghostview, + VK_CmdStart + }; + ViewerKind Viewer = VK_None; +#ifdef __APPLE__ + if (!Viewer && S.TryFindProgram("open", ViewerPath)) + Viewer = VK_OSXOpen; +#endif + if (!Viewer && S.TryFindProgram("gv", ViewerPath)) + Viewer = VK_Ghostview; + if (!Viewer && S.TryFindProgram("xdg-open", ViewerPath)) + Viewer = VK_XDGOpen; +#ifdef _WIN32 + if (!Viewer && S.TryFindProgram("cmd", ViewerPath)) { + Viewer = VK_CmdStart; + } +#endif + + // PostScript or PDF graph generator + PostScript/PDF viewer + std::string GeneratorPath; + if (Viewer && + (S.TryFindProgram(getProgramName(program), GeneratorPath) || + S.TryFindProgram("dot|fdp|neato|twopi|circo", GeneratorPath))) { + std::string OutputFilename = + Filename + (Viewer == VK_CmdStart ? ".pdf" : ".ps"); + + std::vector<StringRef> args; + args.push_back(GeneratorPath); + if (Viewer == VK_CmdStart) + args.push_back("-Tpdf"); + else + args.push_back("-Tps"); + args.push_back("-Nfontname=Courier"); + args.push_back("-Gsize=7.5,10"); + args.push_back(Filename); + args.push_back("-o"); + args.push_back(OutputFilename); + + errs() << "Running '" << GeneratorPath << "' program... "; + + if (ExecGraphViewer(GeneratorPath, args, Filename, true, ErrMsg)) + return true; + + // The lifetime of StartArg must include the call of ExecGraphViewer + // because the args are passed as vector of char*. + std::string StartArg; + + args.clear(); + args.push_back(ViewerPath); + switch (Viewer) { + case VK_OSXOpen: + args.push_back("-W"); + args.push_back(OutputFilename); + break; + case VK_XDGOpen: + wait = false; + args.push_back(OutputFilename); + break; + case VK_Ghostview: + args.push_back("--spartan"); + args.push_back(OutputFilename); + break; + case VK_CmdStart: + args.push_back("/S"); + args.push_back("/C"); + StartArg = + (StringRef("start ") + (wait ? "/WAIT " : "") + OutputFilename).str(); + args.push_back(StartArg); + break; + case VK_None: + llvm_unreachable("Invalid viewer"); + } + + ErrMsg.clear(); + return ExecGraphViewer(ViewerPath, args, OutputFilename, wait, ErrMsg); + } + + // dotty + if (S.TryFindProgram("dotty", ViewerPath)) { + std::vector<StringRef> args; + args.push_back(ViewerPath); + args.push_back(Filename); + +// Dotty spawns another app and doesn't wait until it returns +#ifdef _WIN32 + wait = false; +#endif + errs() << "Running 'dotty' program... "; + return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); + } + + errs() << "Error: Couldn't find a usable graph viewer program:\n"; + errs() << S.LogBuffer << "\n"; + return true; +} diff --git a/llvm/lib/Support/Hashing.cpp b/llvm/lib/Support/Hashing.cpp new file mode 100644 index 0000000000000..1b20a670434f1 --- /dev/null +++ b/llvm/lib/Support/Hashing.cpp @@ -0,0 +1,28 @@ +//===-------------- lib/Support/Hashing.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides implementation bits for the LLVM common hashing +// infrastructure. Documentation and most of the other information is in the +// header file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Hashing.h" + +using namespace llvm; + +// Provide a definition and static initializer for the fixed seed. This +// initializer should always be zero to ensure its value can never appear to be +// non-zero, even during dynamic initialization. +uint64_t llvm::hashing::detail::fixed_seed_override = 0; + +// Implement the function for forced setting of the fixed seed. +// FIXME: Use atomic operations here so that there is no data race. +void llvm::set_fixed_execution_hash_seed(uint64_t fixed_value) { + hashing::detail::fixed_seed_override = fixed_value; +} diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp new file mode 100644 index 0000000000000..2a473a1994c2b --- /dev/null +++ b/llvm/lib/Support/Host.cpp @@ -0,0 +1,1540 @@ +//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system Host concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetParser.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <assert.h> +#include <string.h> + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Host.inc" +#endif +#ifdef _WIN32 +#include "Windows/Host.inc" +#endif +#ifdef _MSC_VER +#include <intrin.h> +#endif +#if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) +#include <mach/host_info.h> +#include <mach/mach.h> +#include <mach/mach_host.h> +#include <mach/machine.h> +#endif + +#define DEBUG_TYPE "host-detection" + +//===----------------------------------------------------------------------===// +// +// Implementations of the CPU detection routines +// +//===----------------------------------------------------------------------===// + +using namespace llvm; + +static std::unique_ptr<llvm::MemoryBuffer> + LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = + llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Can't read " + << "/proc/cpuinfo: " << EC.message() << "\n"; + return nullptr; + } + return std::move(*Text); +} + +StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { + // Access to the Processor Version Register (PVR) on PowerPC is privileged, + // and so we must use an operating-system interface to determine the current + // processor type. On Linux, this is exposed through the /proc/cpuinfo file. + const char *generic = "generic"; + + // The cpu line is second (after the 'processor: 0' line), so if this + // buffer is too small then something has changed (or is wrong). + StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); + StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); + + StringRef::const_iterator CIP = CPUInfoStart; + + StringRef::const_iterator CPUStart = 0; + size_t CPULen = 0; + + // We need to find the first line which starts with cpu, spaces, and a colon. + // After the colon, there may be some additional spaces and then the cpu type. + while (CIP < CPUInfoEnd && CPUStart == 0) { + if (CIP < CPUInfoEnd && *CIP == '\n') + ++CIP; + + if (CIP < CPUInfoEnd && *CIP == 'c') { + ++CIP; + if (CIP < CPUInfoEnd && *CIP == 'p') { + ++CIP; + if (CIP < CPUInfoEnd && *CIP == 'u') { + ++CIP; + while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) + ++CIP; + + if (CIP < CPUInfoEnd && *CIP == ':') { + ++CIP; + while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) + ++CIP; + + if (CIP < CPUInfoEnd) { + CPUStart = CIP; + while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && + *CIP != ',' && *CIP != '\n')) + ++CIP; + CPULen = CIP - CPUStart; + } + } + } + } + } + + if (CPUStart == 0) + while (CIP < CPUInfoEnd && *CIP != '\n') + ++CIP; + } + + if (CPUStart == 0) + return generic; + + return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) + .Case("604e", "604e") + .Case("604", "604") + .Case("7400", "7400") + .Case("7410", "7400") + .Case("7447", "7400") + .Case("7455", "7450") + .Case("G4", "g4") + .Case("POWER4", "970") + .Case("PPC970FX", "970") + .Case("PPC970MP", "970") + .Case("G5", "g5") + .Case("POWER5", "g5") + .Case("A2", "a2") + .Case("POWER6", "pwr6") + .Case("POWER7", "pwr7") + .Case("POWER8", "pwr8") + .Case("POWER8E", "pwr8") + .Case("POWER8NVL", "pwr8") + .Case("POWER9", "pwr9") + .Default(generic); +} + +StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { + // The cpuid register on arm is not accessible from user space. On Linux, + // it is exposed through the /proc/cpuinfo file. + + // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line + // in all cases. + SmallVector<StringRef, 32> Lines; + ProcCpuinfoContent.split(Lines, "\n"); + + // Look for the CPU implementer line. + StringRef Implementer; + StringRef Hardware; + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("CPU implementer")) + Implementer = Lines[I].substr(15).ltrim("\t :"); + if (Lines[I].startswith("Hardware")) + Hardware = Lines[I].substr(8).ltrim("\t :"); + } + + if (Implementer == "0x41") { // ARM Ltd. + // MSM8992/8994 may give cpu part for the core that the kernel is running on, + // which is undeterministic and wrong. Always return cortex-a53 for these SoC. + if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) + return "cortex-a53"; + + + // Look for the CPU part line. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU part")) + // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The + // values correspond to the "Part number" in the CP15/c0 register. The + // contents are specified in the various processor manuals. + return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) + .Case("0x926", "arm926ej-s") + .Case("0xb02", "mpcore") + .Case("0xb36", "arm1136j-s") + .Case("0xb56", "arm1156t2-s") + .Case("0xb76", "arm1176jz-s") + .Case("0xc08", "cortex-a8") + .Case("0xc09", "cortex-a9") + .Case("0xc0f", "cortex-a15") + .Case("0xc20", "cortex-m0") + .Case("0xc23", "cortex-m3") + .Case("0xc24", "cortex-m4") + .Case("0xd04", "cortex-a35") + .Case("0xd03", "cortex-a53") + .Case("0xd07", "cortex-a57") + .Case("0xd08", "cortex-a72") + .Case("0xd09", "cortex-a73") + .Case("0xd0a", "cortex-a75") + .Case("0xd0b", "cortex-a76") + .Default("generic"); + } + + if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("CPU part")) { + return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) + .Case("0x516", "thunderx2t99") + .Case("0x0516", "thunderx2t99") + .Case("0xaf", "thunderx2t99") + .Case("0x0af", "thunderx2t99") + .Case("0xa1", "thunderxt88") + .Case("0x0a1", "thunderxt88") + .Default("generic"); + } + } + } + + if (Implementer == "0x48") // HiSilicon Technologies, Inc. + // Look for the CPU part line. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU part")) + // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The + // values correspond to the "Part number" in the CP15/c0 register. The + // contents are specified in the various processor manuals. + return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) + .Case("0xd01", "tsv110") + .Default("generic"); + + if (Implementer == "0x51") // Qualcomm Technologies, Inc. + // Look for the CPU part line. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU part")) + // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The + // values correspond to the "Part number" in the CP15/c0 register. The + // contents are specified in the various processor manuals. + return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) + .Case("0x06f", "krait") // APQ8064 + .Case("0x201", "kryo") + .Case("0x205", "kryo") + .Case("0x211", "kryo") + .Case("0x800", "cortex-a73") + .Case("0x801", "cortex-a73") + .Case("0x802", "cortex-a73") + .Case("0x803", "cortex-a73") + .Case("0x804", "cortex-a73") + .Case("0x805", "cortex-a73") + .Case("0xc00", "falkor") + .Case("0xc01", "saphira") + .Default("generic"); + + if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. + // The Exynos chips have a convoluted ID scheme that doesn't seem to follow + // any predictive pattern across variants and parts. + unsigned Variant = 0, Part = 0; + + // Look for the CPU variant line, whose value is a 1 digit hexadecimal + // number, corresponding to the Variant bits in the CP15/C0 register. + for (auto I : Lines) + if (I.consume_front("CPU variant")) + I.ltrim("\t :").getAsInteger(0, Variant); + + // Look for the CPU part line, whose value is a 3 digit hexadecimal + // number, corresponding to the PartNum bits in the CP15/C0 register. + for (auto I : Lines) + if (I.consume_front("CPU part")) + I.ltrim("\t :").getAsInteger(0, Part); + + unsigned Exynos = (Variant << 12) | Part; + switch (Exynos) { + default: + // Default by falling through to Exynos M1. + LLVM_FALLTHROUGH; + + case 0x1001: + return "exynos-m1"; + + case 0x4001: + return "exynos-m2"; + } + } + + return "generic"; +} + +StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { + // STIDP is a privileged operation, so use /proc/cpuinfo instead. + + // The "processor 0:" line comes after a fair amount of other information, + // including a cache breakdown, but this should be plenty. + SmallVector<StringRef, 32> Lines; + ProcCpuinfoContent.split(Lines, "\n"); + + // Look for the CPU features. + SmallVector<StringRef, 32> CPUFeatures; + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("features")) { + size_t Pos = Lines[I].find(":"); + if (Pos != StringRef::npos) { + Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); + break; + } + } + + // We need to check for the presence of vector support independently of + // the machine type, since we may only use the vector register set when + // supported by the kernel (and hypervisor). + bool HaveVectorSupport = false; + for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { + if (CPUFeatures[I] == "vx") + HaveVectorSupport = true; + } + + // Now check the processor machine type. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("processor ")) { + size_t Pos = Lines[I].find("machine = "); + if (Pos != StringRef::npos) { + Pos += sizeof("machine = ") - 1; + unsigned int Id; + if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { + if (Id >= 8561 && HaveVectorSupport) + return "z15"; + if (Id >= 3906 && HaveVectorSupport) + return "z14"; + if (Id >= 2964 && HaveVectorSupport) + return "z13"; + if (Id >= 2827) + return "zEC12"; + if (Id >= 2817) + return "z196"; + } + } + break; + } + } + + return "generic"; +} + +StringRef sys::detail::getHostCPUNameForBPF() { +#if !defined(__linux__) || !defined(__x86_64__) + return "generic"; +#else + uint8_t v3_insns[40] __attribute__ ((aligned (8))) = + /* BPF_MOV64_IMM(BPF_REG_0, 0) */ + { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + /* BPF_MOV64_IMM(BPF_REG_2, 1) */ + 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, + /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ + 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, + /* BPF_MOV64_IMM(BPF_REG_0, 1) */ + 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, + /* BPF_EXIT_INSN() */ + 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + + uint8_t v2_insns[40] __attribute__ ((aligned (8))) = + /* BPF_MOV64_IMM(BPF_REG_0, 0) */ + { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + /* BPF_MOV64_IMM(BPF_REG_2, 1) */ + 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, + /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ + 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, + /* BPF_MOV64_IMM(BPF_REG_0, 1) */ + 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, + /* BPF_EXIT_INSN() */ + 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + + struct bpf_prog_load_attr { + uint32_t prog_type; + uint32_t insn_cnt; + uint64_t insns; + uint64_t license; + uint32_t log_level; + uint32_t log_size; + uint64_t log_buf; + uint32_t kern_version; + uint32_t prog_flags; + } attr = {}; + attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ + attr.insn_cnt = 5; + attr.insns = (uint64_t)v3_insns; + attr.license = (uint64_t)"DUMMY"; + + int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, + sizeof(attr)); + if (fd >= 0) { + close(fd); + return "v3"; + } + + /* Clear the whole attr in case its content changed by syscall. */ + memset(&attr, 0, sizeof(attr)); + attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ + attr.insn_cnt = 5; + attr.insns = (uint64_t)v2_insns; + attr.license = (uint64_t)"DUMMY"; + fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); + if (fd >= 0) { + close(fd); + return "v2"; + } + return "v1"; +#endif +} + +#if defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64) + +enum VendorSignatures { + SIG_INTEL = 0x756e6547 /* Genu */, + SIG_AMD = 0x68747541 /* Auth */ +}; + +// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). +// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID +// support. Consequently, for i386, the presence of CPUID is checked first +// via the corresponding eflags bit. +// Removal of cpuid.h header motivated by PR30384 +// Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp +// or test-suite, but are used in external projects e.g. libstdcxx +static bool isCpuIdSupported() { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__i386__) + int __cpuid_supported; + __asm__(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r"(__cpuid_supported) + : + : "eax", "ecx"); + if (!__cpuid_supported) + return false; +#endif + return true; +#endif + return true; +} + +/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in +/// the specified arguments. If we can't run cpuid on the host, return true. +static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + // The MSVC intrinsic is portable across x86 and x64. + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return +/// the 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, + unsigned *rEAX, unsigned *rEBX, unsigned *rECX, + unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +// Read control register 0 (XCR0). Used to detect features such as AVX. +static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); + return false; +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + *rEAX = Result; + *rEDX = Result >> 32; + return false; +#else + return true; +#endif +} + +static void detectX86FamilyModel(unsigned EAX, unsigned *Family, + unsigned *Model) { + *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (*Family == 6 || *Family == 0xf) { + if (*Family == 0xf) + // Examine extended family ID if family ID is F. + *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +static void +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned Features2, unsigned Features3, + unsigned *Type, unsigned *Subtype) { + if (Brand_id != 0) + return; + switch (Family) { + case 3: + *Type = X86::INTEL_i386; + break; + case 4: + *Type = X86::INTEL_i486; + break; + case 5: + if (Features & (1 << X86::FEATURE_MMX)) { + *Type = X86::INTEL_PENTIUM_MMX; + break; + } + *Type = X86::INTEL_PENTIUM; + break; + case 6: + switch (Model) { + case 0x01: // Pentium Pro processor + *Type = X86::INTEL_PENTIUM_PRO; + break; + case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, + // model 03 + case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, + // model 05, and Intel Celeron processor, model 05 + case 0x06: // Celeron processor, model 06 + *Type = X86::INTEL_PENTIUM_II; + break; + case 0x07: // Pentium III processor, model 07, and Pentium III Xeon + // processor, model 07 + case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, + // model 08, and Celeron processor, model 08 + case 0x0a: // Pentium III Xeon processor, model 0Ah + case 0x0b: // Pentium III processor, model 0Bh + *Type = X86::INTEL_PENTIUM_III; + break; + case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. + case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model + // 0Dh. All processors are manufactured using the 90 nm process. + case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 + // Integrated Processor with Intel QuickAssist Technology + *Type = X86::INTEL_PENTIUM_M; + break; + case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model + // 0Eh. All processors are manufactured using the 65 nm process. + *Type = X86::INTEL_CORE_DUO; + break; // yonah + case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile + // processor, Intel Core 2 Quad processor, Intel Core 2 Quad + // mobile processor, Intel Core 2 Extreme processor, Intel + // Pentium Dual-Core processor, Intel Xeon processor, model + // 0Fh. All processors are manufactured using the 65 nm process. + case 0x16: // Intel Celeron processor model 16h. All processors are + // manufactured using the 65 nm process + *Type = X86::INTEL_CORE2; // "core2" + *Subtype = X86::INTEL_CORE2_65; + break; + case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model + // 17h. All processors are manufactured using the 45 nm process. + // + // 45nm: Penryn , Wolfdale, Yorkfield (XE) + case 0x1d: // Intel Xeon processor MP. All processors are manufactured using + // the 45 nm process. + *Type = X86::INTEL_CORE2; // "penryn" + *Subtype = X86::INTEL_CORE2_45; + break; + case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 45 nm process. + case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. + // As found in a Summer 2010 model iMac. + case 0x1f: + case 0x2e: // Nehalem EX + *Type = X86::INTEL_COREI7; // "nehalem" + *Subtype = X86::INTEL_COREI7_NEHALEM; + break; + case 0x25: // Intel Core i7, laptop version. + case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. + case 0x2f: // Westmere EX + *Type = X86::INTEL_COREI7; // "westmere" + *Subtype = X86::INTEL_COREI7_WESTMERE; + break; + case 0x2a: // Intel Core i7 processor. All processors are manufactured + // using the 32 nm process. + case 0x2d: + *Type = X86::INTEL_COREI7; //"sandybridge" + *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; + break; + case 0x3a: + case 0x3e: // Ivy Bridge EP + *Type = X86::INTEL_COREI7; // "ivybridge" + *Subtype = X86::INTEL_COREI7_IVYBRIDGE; + break; + + // Haswell: + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + *Type = X86::INTEL_COREI7; // "haswell" + *Subtype = X86::INTEL_COREI7_HASWELL; + break; + + // Broadwell: + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: + *Type = X86::INTEL_COREI7; // "broadwell" + *Subtype = X86::INTEL_COREI7_BROADWELL; + break; + + // Skylake: + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop + *Type = X86::INTEL_COREI7; // "skylake" + *Subtype = X86::INTEL_COREI7_SKYLAKE; + break; + + // Skylake Xeon: + case 0x55: + *Type = X86::INTEL_COREI7; + if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) + *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake" + else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) + *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake" + else + *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" + break; + + // Cannonlake: + case 0x66: + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake" + break; + + // Icelake: + case 0x7d: + case 0x7e: + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client" + break; + + // Icelake Xeon: + case 0x6a: + case 0x6c: + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server" + break; + + case 0x1c: // Most 45 nm Intel Atom processors + case 0x26: // 45 nm Atom Lincroft + case 0x27: // 32 nm Atom Medfield + case 0x35: // 32 nm Atom Midview + case 0x36: // 32 nm Atom Midview + *Type = X86::INTEL_BONNELL; + break; // "bonnell" + + // Atom Silvermont codes from the Intel software optimization guide. + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + case 0x4c: // really airmont + *Type = X86::INTEL_SILVERMONT; + break; // "silvermont" + // Goldmont: + case 0x5c: // Apollo Lake + case 0x5f: // Denverton + *Type = X86::INTEL_GOLDMONT; + break; // "goldmont" + case 0x7a: + *Type = X86::INTEL_GOLDMONT_PLUS; + break; + case 0x86: + *Type = X86::INTEL_TREMONT; + break; + + case 0x57: + *Type = X86::INTEL_KNL; // knl + break; + + case 0x85: + *Type = X86::INTEL_KNM; // knm + break; + + default: // Unknown family 6 CPU, try to guess. + // TODO detect tigerlake host + if (Features3 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 64))) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_TIGERLAKE; + break; + } + + if (Features & (1 << X86::FEATURE_AVX512VBMI2)) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; + break; + } + + if (Features & (1 << X86::FEATURE_AVX512VBMI)) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_CANNONLAKE; + break; + } + + if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_COOPERLAKE; + break; + } + + if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_CASCADELAKE; + break; + } + + if (Features & (1 << X86::FEATURE_AVX512VL)) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; + break; + } + + if (Features & (1 << X86::FEATURE_AVX512ER)) { + *Type = X86::INTEL_KNL; // knl + break; + } + + if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) { + if (Features3 & (1 << (X86::FEATURE_SHA - 64))) { + *Type = X86::INTEL_GOLDMONT; + } else { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_SKYLAKE; + } + break; + } + if (Features3 & (1 << (X86::FEATURE_ADX - 64))) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_BROADWELL; + break; + } + if (Features & (1 << X86::FEATURE_AVX2)) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_HASWELL; + break; + } + if (Features & (1 << X86::FEATURE_AVX)) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; + break; + } + if (Features & (1 << X86::FEATURE_SSE4_2)) { + if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) { + *Type = X86::INTEL_SILVERMONT; + } else { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_NEHALEM; + } + break; + } + if (Features & (1 << X86::FEATURE_SSE4_1)) { + *Type = X86::INTEL_CORE2; // "penryn" + *Subtype = X86::INTEL_CORE2_45; + break; + } + if (Features & (1 << X86::FEATURE_SSSE3)) { + if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) { + *Type = X86::INTEL_BONNELL; // "bonnell" + } else { + *Type = X86::INTEL_CORE2; // "core2" + *Subtype = X86::INTEL_CORE2_65; + } + break; + } + if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) { + *Type = X86::INTEL_CORE2; // "core2" + *Subtype = X86::INTEL_CORE2_65; + break; + } + if (Features & (1 << X86::FEATURE_SSE3)) { + *Type = X86::INTEL_CORE_DUO; + break; + } + if (Features & (1 << X86::FEATURE_SSE2)) { + *Type = X86::INTEL_PENTIUM_M; + break; + } + if (Features & (1 << X86::FEATURE_SSE)) { + *Type = X86::INTEL_PENTIUM_III; + break; + } + if (Features & (1 << X86::FEATURE_MMX)) { + *Type = X86::INTEL_PENTIUM_II; + break; + } + *Type = X86::INTEL_PENTIUM_PRO; + break; + } + break; + case 15: { + if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) { + *Type = X86::INTEL_NOCONA; + break; + } + if (Features & (1 << X86::FEATURE_SSE3)) { + *Type = X86::INTEL_PRESCOTT; + break; + } + *Type = X86::INTEL_PENTIUM_IV; + break; + } + default: + break; /*"generic"*/ + } +} + +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, + unsigned *Subtype) { + // FIXME: this poorly matches the generated SubtargetFeatureKV table. There + // appears to be no way to generate the wide variety of AMD-specific targets + // from the information returned from CPUID. + switch (Family) { + case 4: + *Type = X86::AMD_i486; + break; + case 5: + *Type = X86::AMDPENTIUM; + switch (Model) { + case 6: + case 7: + *Subtype = X86::AMDPENTIUM_K6; + break; // "k6" + case 8: + *Subtype = X86::AMDPENTIUM_K62; + break; // "k6-2" + case 9: + case 13: + *Subtype = X86::AMDPENTIUM_K63; + break; // "k6-3" + case 10: + *Subtype = X86::AMDPENTIUM_GEODE; + break; // "geode" + } + break; + case 6: + if (Features & (1 << X86::FEATURE_SSE)) { + *Type = X86::AMD_ATHLON_XP; + break; // "athlon-xp" + } + *Type = X86::AMD_ATHLON; + break; // "athlon" + case 15: + if (Features & (1 << X86::FEATURE_SSE3)) { + *Type = X86::AMD_K8SSE3; + break; // "k8-sse3" + } + *Type = X86::AMD_K8; + break; // "k8" + case 16: + *Type = X86::AMDFAM10H; // "amdfam10" + switch (Model) { + case 2: + *Subtype = X86::AMDFAM10H_BARCELONA; + break; + case 4: + *Subtype = X86::AMDFAM10H_SHANGHAI; + break; + case 8: + *Subtype = X86::AMDFAM10H_ISTANBUL; + break; + } + break; + case 20: + *Type = X86::AMD_BTVER1; + break; // "btver1"; + case 21: + *Type = X86::AMDFAM15H; + if (Model >= 0x60 && Model <= 0x7f) { + *Subtype = X86::AMDFAM15H_BDVER4; + break; // "bdver4"; 60h-7Fh: Excavator + } + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = X86::AMDFAM15H_BDVER3; + break; // "bdver3"; 30h-3Fh: Steamroller + } + if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + *Subtype = X86::AMDFAM15H_BDVER2; + break; // "bdver2"; 02h, 10h-1Fh: Piledriver + } + if (Model <= 0x0f) { + *Subtype = X86::AMDFAM15H_BDVER1; + break; // "bdver1"; 00h-0Fh: Bulldozer + } + break; + case 22: + *Type = X86::AMD_BTVER2; + break; // "btver2" + case 23: + *Type = X86::AMDFAM17H; + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = X86::AMDFAM17H_ZNVER2; + break; // "znver2"; 30h-3fh: Zen2 + } + if (Model <= 0x0f) { + *Subtype = X86::AMDFAM17H_ZNVER1; + break; // "znver1"; 00h-0Fh: Zen1 + } + break; + default: + break; // "generic" + } +} + +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut, unsigned *Features2Out, + unsigned *Features3Out) { + unsigned Features = 0; + unsigned Features2 = 0; + unsigned Features3 = 0; + unsigned EAX, EBX; + + auto setFeature = [&](unsigned F) { + if (F < 32) + Features |= 1U << (F & 0x1f); + else if (F < 64) + Features2 |= 1U << ((F - 32) & 0x1f); + else if (F < 96) + Features3 |= 1U << ((F - 64) & 0x1f); + else + llvm_unreachable("Unexpected FeatureBit"); + }; + + if ((EDX >> 15) & 1) + setFeature(X86::FEATURE_CMOV); + if ((EDX >> 23) & 1) + setFeature(X86::FEATURE_MMX); + if ((EDX >> 25) & 1) + setFeature(X86::FEATURE_SSE); + if ((EDX >> 26) & 1) + setFeature(X86::FEATURE_SSE2); + + if ((ECX >> 0) & 1) + setFeature(X86::FEATURE_SSE3); + if ((ECX >> 1) & 1) + setFeature(X86::FEATURE_PCLMUL); + if ((ECX >> 9) & 1) + setFeature(X86::FEATURE_SSSE3); + if ((ECX >> 12) & 1) + setFeature(X86::FEATURE_FMA); + if ((ECX >> 19) & 1) + setFeature(X86::FEATURE_SSE4_1); + if ((ECX >> 20) & 1) + setFeature(X86::FEATURE_SSE4_2); + if ((ECX >> 23) & 1) + setFeature(X86::FEATURE_POPCNT); + if ((ECX >> 25) & 1) + setFeature(X86::FEATURE_AES); + + if ((ECX >> 22) & 1) + setFeature(X86::FEATURE_MOVBE); + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + const unsigned AVXBits = (1 << 27) | (1 << 28); + bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + if (HasAVX) + setFeature(X86::FEATURE_AVX); + + bool HasLeaf7 = + MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + if (HasLeaf7 && ((EBX >> 3) & 1)) + setFeature(X86::FEATURE_BMI); + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + setFeature(X86::FEATURE_AVX2); + if (HasLeaf7 && ((EBX >> 8) & 1)) + setFeature(X86::FEATURE_BMI2); + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512F); + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512DQ); + if (HasLeaf7 && ((EBX >> 19) & 1)) + setFeature(X86::FEATURE_ADX); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512IFMA); + if (HasLeaf7 && ((EBX >> 23) & 1)) + setFeature(X86::FEATURE_CLFLUSHOPT); + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512PF); + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512ER); + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512CD); + if (HasLeaf7 && ((EBX >> 29) & 1)) + setFeature(X86::FEATURE_SHA); + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512BW); + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VL); + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 8) & 1)) + setFeature(X86::FEATURE_GFNI); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + setFeature(X86::FEATURE_VPCLMULQDQ); + if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VNNI); + if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512BITALG); + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VPOPCNTDQ); + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX5124VNNIW); + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VP2INTERSECT); + + bool HasLeaf7Subleaf1 = + MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512BF16); + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + setFeature(X86::FEATURE_SSE4_A); + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + setFeature(X86::FEATURE_XOP); + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + setFeature(X86::FEATURE_FMA4); + + if (HasExtLeaf1 && ((EDX >> 29) & 1)) + setFeature(X86::FEATURE_EM64T); + + *FeaturesOut = Features; + *Features2Out = Features2; + *Features3Out = Features3; +} + +StringRef sys::getHostCPUName() { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + unsigned MaxLeaf, Vendor; + +#if defined(__GNUC__) || defined(__clang__) + //FIXME: include cpuid.h from clang or copy __get_cpuid_max here + // and simplify it to not invoke __cpuid (like cpu_model.c in + // compiler-rt/lib/builtins/cpu_model.c? + // Opting for the second option. + if(!isCpuIdSupported()) + return "generic"; +#endif + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) + return "generic"; + getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + + unsigned Brand_id = EBX & 0xff; + unsigned Family = 0, Model = 0; + unsigned Features = 0, Features2 = 0, Features3 = 0; + detectX86FamilyModel(EAX, &Family, &Model); + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2, &Features3); + + unsigned Type = 0; + unsigned Subtype = 0; + + if (Vendor == SIG_INTEL) { + getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, + Features2, Features3, &Type, &Subtype); + } else if (Vendor == SIG_AMD) { + getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); + } + + // Check subtypes first since those are more specific. +#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ + if (Subtype == X86::ENUM) \ + return ARCHNAME; +#include "llvm/Support/X86TargetParser.def" + + // Now check types. +#define X86_CPU_TYPE(ARCHNAME, ENUM) \ + if (Type == X86::ENUM) \ + return ARCHNAME; +#include "llvm/Support/X86TargetParser.def" + + return "generic"; +} + +#elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) +StringRef sys::getHostCPUName() { + host_basic_info_data_t hostInfo; + mach_msg_type_number_t infoCount; + + infoCount = HOST_BASIC_INFO_COUNT; + mach_port_t hostPort = mach_host_self(); + host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, + &infoCount); + mach_port_deallocate(mach_task_self(), hostPort); + + if (hostInfo.cpu_type != CPU_TYPE_POWERPC) + return "generic"; + + switch (hostInfo.cpu_subtype) { + case CPU_SUBTYPE_POWERPC_601: + return "601"; + case CPU_SUBTYPE_POWERPC_602: + return "602"; + case CPU_SUBTYPE_POWERPC_603: + return "603"; + case CPU_SUBTYPE_POWERPC_603e: + return "603e"; + case CPU_SUBTYPE_POWERPC_603ev: + return "603ev"; + case CPU_SUBTYPE_POWERPC_604: + return "604"; + case CPU_SUBTYPE_POWERPC_604e: + return "604e"; + case CPU_SUBTYPE_POWERPC_620: + return "620"; + case CPU_SUBTYPE_POWERPC_750: + return "750"; + case CPU_SUBTYPE_POWERPC_7400: + return "7400"; + case CPU_SUBTYPE_POWERPC_7450: + return "7450"; + case CPU_SUBTYPE_POWERPC_970: + return "970"; + default:; + } + + return "generic"; +} +#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) +StringRef sys::getHostCPUName() { + std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForPowerPC(Content); +} +#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) +StringRef sys::getHostCPUName() { + std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForARM(Content); +} +#elif defined(__linux__) && defined(__s390x__) +StringRef sys::getHostCPUName() { + std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForS390x(Content); +} +#else +StringRef sys::getHostCPUName() { return "generic"; } +#endif + +#if defined(__linux__) && defined(__x86_64__) +// On Linux, the number of physical cores can be computed from /proc/cpuinfo, +// using the number of unique physical/core id pairs. The following +// implementation reads the /proc/cpuinfo format on an x86_64 system. +static int computeHostNumPhysicalCores() { + // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be + // mmapped because it appears to have 0 size. + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = + llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Can't read " + << "/proc/cpuinfo: " << EC.message() << "\n"; + return -1; + } + SmallVector<StringRef, 8> strs; + (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, + /*KeepEmpty=*/false); + int CurPhysicalId = -1; + int CurCoreId = -1; + SmallSet<std::pair<int, int>, 32> UniqueItems; + for (auto &Line : strs) { + Line = Line.trim(); + if (!Line.startswith("physical id") && !Line.startswith("core id")) + continue; + std::pair<StringRef, StringRef> Data = Line.split(':'); + auto Name = Data.first.trim(); + auto Val = Data.second.trim(); + if (Name == "physical id") { + assert(CurPhysicalId == -1 && + "Expected a core id before seeing another physical id"); + Val.getAsInteger(10, CurPhysicalId); + } + if (Name == "core id") { + assert(CurCoreId == -1 && + "Expected a physical id before seeing another core id"); + Val.getAsInteger(10, CurCoreId); + } + if (CurPhysicalId != -1 && CurCoreId != -1) { + UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); + CurPhysicalId = -1; + CurCoreId = -1; + } + } + return UniqueItems.size(); +} +#elif defined(__APPLE__) && defined(__x86_64__) +#include <sys/param.h> +#include <sys/sysctl.h> + +// Gets the number of *physical cores* on the machine. +static int computeHostNumPhysicalCores() { + uint32_t count; + size_t len = sizeof(count); + sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); + if (count < 1) { + int nm[2]; + nm[0] = CTL_HW; + nm[1] = HW_AVAILCPU; + sysctl(nm, 2, &count, &len, NULL, 0); + if (count < 1) + return -1; + } + return count; +} +#else +// On other systems, return -1 to indicate unknown. +static int computeHostNumPhysicalCores() { return -1; } +#endif + +int sys::getHostNumPhysicalCores() { + static int NumCores = computeHostNumPhysicalCores(); + return NumCores; +} + +#if defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64) +bool sys::getHostCPUFeatures(StringMap<bool> &Features) { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + unsigned MaxLevel; + union { + unsigned u[3]; + char c[12]; + } text; + + if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || + MaxLevel < 1) + return false; + + getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + + Features["cx8"] = (EDX >> 8) & 1; + Features["cmov"] = (EDX >> 15) & 1; + Features["mmx"] = (EDX >> 23) & 1; + Features["fxsr"] = (EDX >> 24) & 1; + Features["sse"] = (EDX >> 25) & 1; + Features["sse2"] = (EDX >> 26) & 1; + + Features["sse3"] = (ECX >> 0) & 1; + Features["pclmul"] = (ECX >> 1) & 1; + Features["ssse3"] = (ECX >> 9) & 1; + Features["cx16"] = (ECX >> 13) & 1; + Features["sse4.1"] = (ECX >> 19) & 1; + Features["sse4.2"] = (ECX >> 20) & 1; + Features["movbe"] = (ECX >> 22) & 1; + Features["popcnt"] = (ECX >> 23) & 1; + Features["aes"] = (ECX >> 25) & 1; + Features["rdrnd"] = (ECX >> 30) & 1; + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && + !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); + // AVX512 requires additional context to be saved by the OS. + bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); + + Features["avx"] = HasAVXSave; + Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; + // Only enable XSAVE if OS has enabled support for saving YMM state. + Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; + Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); + Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); + Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); + Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); + Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; + Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); + Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; + Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); + Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); + + Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); + + // Miscellaneous memory related features, detected by + // using the 0x80000008 leaf of the CPUID instruction + bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && + !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); + Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); + Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); + + bool HasLeaf7 = + MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); + Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); + Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); + // AVX2 is only supported if we have the OS save support from AVX. + Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; + Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); + Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); + Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); + // AVX512 is only supported if the OS supports the context save for it. + Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; + Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; + Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); + Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); + Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; + Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); + Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); + Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; + Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; + Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; + Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); + Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; + Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; + + Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); + Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); + Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); + Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; + Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); + Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); + Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; + Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; + Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; + Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; + Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; + Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); + Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); + Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); + Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); + Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); + + // There are two CPUID leafs which information associated with the pconfig + // instruction: + // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th + // bit of EDX), while the EAX=0x1b leaf returns information on the + // availability of specific pconfig leafs. + // The target feature here only refers to the the first of these two. + // Users might need to check for the availability of specific pconfig + // leaves using cpuid, since that information is ignored while + // detecting features using the "-march=native" flag. + // For more info, see X86 ISA docs. + Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); + bool HasLeaf7Subleaf1 = + MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; + + bool HasLeafD = MaxLevel >= 0xd && + !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); + + // Only enable XSAVE if OS has enabled support for saving YMM state. + Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; + Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; + Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; + + bool HasLeaf14 = MaxLevel >= 0x14 && + !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); + + Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); + + return true; +} +#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) +bool sys::getHostCPUFeatures(StringMap<bool> &Features) { + std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); + if (!P) + return false; + + SmallVector<StringRef, 32> Lines; + P->getBuffer().split(Lines, "\n"); + + SmallVector<StringRef, 32> CPUFeatures; + + // Look for the CPU features. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("Features")) { + Lines[I].split(CPUFeatures, ' '); + break; + } + +#if defined(__aarch64__) + // Keep track of which crypto features we have seen + enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; + uint32_t crypto = 0; +#endif + + for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { + StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) +#if defined(__aarch64__) + .Case("asimd", "neon") + .Case("fp", "fp-armv8") + .Case("crc32", "crc") +#else + .Case("half", "fp16") + .Case("neon", "neon") + .Case("vfpv3", "vfp3") + .Case("vfpv3d16", "d16") + .Case("vfpv4", "vfp4") + .Case("idiva", "hwdiv-arm") + .Case("idivt", "hwdiv") +#endif + .Default(""); + +#if defined(__aarch64__) + // We need to check crypto separately since we need all of the crypto + // extensions to enable the subtarget feature + if (CPUFeatures[I] == "aes") + crypto |= CAP_AES; + else if (CPUFeatures[I] == "pmull") + crypto |= CAP_PMULL; + else if (CPUFeatures[I] == "sha1") + crypto |= CAP_SHA1; + else if (CPUFeatures[I] == "sha2") + crypto |= CAP_SHA2; +#endif + + if (LLVMFeatureStr != "") + Features[LLVMFeatureStr] = true; + } + +#if defined(__aarch64__) + // If we have all crypto bits we can add the feature + if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) + Features["crypto"] = true; +#endif + + return true; +} +#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) +bool sys::getHostCPUFeatures(StringMap<bool> &Features) { + if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + Features["neon"] = true; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) + Features["crc"] = true; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + Features["crypto"] = true; + + return true; +} +#else +bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } +#endif + +std::string sys::getProcessTriple() { + std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); + Triple PT(Triple::normalize(TargetTripleString)); + + if (sizeof(void *) == 8 && PT.isArch32Bit()) + PT = PT.get64BitArchVariant(); + if (sizeof(void *) == 4 && PT.isArch64Bit()) + PT = PT.get32BitArchVariant(); + + return PT.str(); +} diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp new file mode 100644 index 0000000000000..0d7d7fcc8cb60 --- /dev/null +++ b/llvm/lib/Support/InitLLVM.cpp @@ -0,0 +1,52 @@ +//===-- InitLLVM.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include <string> + +#ifdef _WIN32 +#include "Windows/WindowsSupport.h" +#endif + +using namespace llvm; +using namespace llvm::sys; + +InitLLVM::InitLLVM(int &Argc, const char **&Argv) : StackPrinter(Argc, Argv) { + sys::PrintStackTraceOnErrorSignal(Argv[0]); + install_out_of_memory_new_handler(); + +#ifdef _WIN32 + // We use UTF-8 as the internal character encoding. On Windows, + // arguments passed to main() may not be encoded in UTF-8. In order + // to reliably detect encoding of command line arguments, we use an + // Windows API to obtain arguments, convert them to UTF-8, and then + // write them back to the Argv vector. + // + // There's probably other way to do the same thing (e.g. using + // wmain() instead of main()), but this way seems less intrusive + // than that. + std::string Banner = std::string(Argv[0]) + ": "; + ExitOnError ExitOnErr(Banner); + + ExitOnErr(errorCodeToError(windows::GetCommandLineArguments(Args, Alloc))); + + // GetCommandLineArguments doesn't terminate the vector with a + // nullptr. Do it to make it compatible with the real argv. + Args.push_back(nullptr); + + Argc = Args.size() - 1; + Argv = Args.data(); +#endif +} + +InitLLVM::~InitLLVM() { llvm_shutdown(); } diff --git a/llvm/lib/Support/IntEqClasses.cpp b/llvm/lib/Support/IntEqClasses.cpp new file mode 100644 index 0000000000000..4a976dcefc65f --- /dev/null +++ b/llvm/lib/Support/IntEqClasses.cpp @@ -0,0 +1,76 @@ +//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Equivalence classes for small integers. This is a mapping of the integers +// 0 .. N-1 into M equivalence classes numbered 0 .. M-1. +// +// Initially each integer has its own equivalence class. Classes are joined by +// passing a representative member of each class to join(). +// +// Once the classes are built, compress() will number them 0 .. M-1 and prevent +// further changes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/IntEqClasses.h" + +using namespace llvm; + +void IntEqClasses::grow(unsigned N) { + assert(NumClasses == 0 && "grow() called after compress()."); + EC.reserve(N); + while (EC.size() < N) + EC.push_back(EC.size()); +} + +unsigned IntEqClasses::join(unsigned a, unsigned b) { + assert(NumClasses == 0 && "join() called after compress()."); + unsigned eca = EC[a]; + unsigned ecb = EC[b]; + // Update pointers while searching for the leaders, compressing the paths + // incrementally. The larger leader will eventually be updated, joining the + // classes. + while (eca != ecb) + if (eca < ecb) { + EC[b] = eca; + b = ecb; + ecb = EC[b]; + } else { + EC[a] = ecb; + a = eca; + eca = EC[a]; + } + + return eca; +} + +unsigned IntEqClasses::findLeader(unsigned a) const { + assert(NumClasses == 0 && "findLeader() called after compress()."); + while (a != EC[a]) + a = EC[a]; + return a; +} + +void IntEqClasses::compress() { + if (NumClasses) + return; + for (unsigned i = 0, e = EC.size(); i != e; ++i) + EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]]; +} + +void IntEqClasses::uncompress() { + if (!NumClasses) + return; + SmallVector<unsigned, 8> Leader; + for (unsigned i = 0, e = EC.size(); i != e; ++i) + if (EC[i] < Leader.size()) + EC[i] = Leader[EC[i]]; + else + Leader.push_back(EC[i] = i); + NumClasses = 0; +} diff --git a/llvm/lib/Support/IntervalMap.cpp b/llvm/lib/Support/IntervalMap.cpp new file mode 100644 index 0000000000000..f15c7c9403c36 --- /dev/null +++ b/llvm/lib/Support/IntervalMap.cpp @@ -0,0 +1,160 @@ +//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the few non-templated functions in IntervalMap. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/IntervalMap.h" + +namespace llvm { +namespace IntervalMapImpl { + +void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) { + assert(!path.empty() && "Can't replace missing root"); + path.front() = Entry(Root, Size, Offsets.first); + path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second)); +} + +NodeRef Path::getLeftSibling(unsigned Level) const { + // The root has no siblings. + if (Level == 0) + return NodeRef(); + + // Go up the tree until we can go left. + unsigned l = Level - 1; + while (l && path[l].offset == 0) + --l; + + // We can't go left. + if (path[l].offset == 0) + return NodeRef(); + + // NR is the subtree containing our left sibling. + NodeRef NR = path[l].subtree(path[l].offset - 1); + + // Keep right all the way down. + for (++l; l != Level; ++l) + NR = NR.subtree(NR.size() - 1); + return NR; +} + +void Path::moveLeft(unsigned Level) { + assert(Level != 0 && "Cannot move the root node"); + + // Go up the tree until we can go left. + unsigned l = 0; + if (valid()) { + l = Level - 1; + while (path[l].offset == 0) { + assert(l != 0 && "Cannot move beyond begin()"); + --l; + } + } else if (height() < Level) + // end() may have created a height=0 path. + path.resize(Level + 1, Entry(nullptr, 0, 0)); + + // NR is the subtree containing our left sibling. + --path[l].offset; + NodeRef NR = subtree(l); + + // Get the rightmost node in the subtree. + for (++l; l != Level; ++l) { + path[l] = Entry(NR, NR.size() - 1); + NR = NR.subtree(NR.size() - 1); + } + path[l] = Entry(NR, NR.size() - 1); +} + +NodeRef Path::getRightSibling(unsigned Level) const { + // The root has no siblings. + if (Level == 0) + return NodeRef(); + + // Go up the tree until we can go right. + unsigned l = Level - 1; + while (l && atLastEntry(l)) + --l; + + // We can't go right. + if (atLastEntry(l)) + return NodeRef(); + + // NR is the subtree containing our right sibling. + NodeRef NR = path[l].subtree(path[l].offset + 1); + + // Keep left all the way down. + for (++l; l != Level; ++l) + NR = NR.subtree(0); + return NR; +} + +void Path::moveRight(unsigned Level) { + assert(Level != 0 && "Cannot move the root node"); + + // Go up the tree until we can go right. + unsigned l = Level - 1; + while (l && atLastEntry(l)) + --l; + + // NR is the subtree containing our right sibling. If we hit end(), we have + // offset(0) == node(0).size(). + if (++path[l].offset == path[l].size) + return; + NodeRef NR = subtree(l); + + for (++l; l != Level; ++l) { + path[l] = Entry(NR, 0); + NR = NR.subtree(0); + } + path[l] = Entry(NR, 0); +} + + +IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity, + const unsigned *CurSize, unsigned NewSize[], + unsigned Position, bool Grow) { + assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements"); + assert(Position <= Elements && "Invalid position"); + if (!Nodes) + return IdxPair(); + + // Trivial algorithm: left-leaning even distribution. + const unsigned PerNode = (Elements + Grow) / Nodes; + const unsigned Extra = (Elements + Grow) % Nodes; + IdxPair PosPair = IdxPair(Nodes, 0); + unsigned Sum = 0; + for (unsigned n = 0; n != Nodes; ++n) { + Sum += NewSize[n] = PerNode + (n < Extra); + if (PosPair.first == Nodes && Sum > Position) + PosPair = IdxPair(n, Position - (Sum - NewSize[n])); + } + assert(Sum == Elements + Grow && "Bad distribution sum"); + + // Subtract the Grow element that was added. + if (Grow) { + assert(PosPair.first < Nodes && "Bad algebra"); + assert(NewSize[PosPair.first] && "Too few elements to need Grow"); + --NewSize[PosPair.first]; + } + +#ifndef NDEBUG + Sum = 0; + for (unsigned n = 0; n != Nodes; ++n) { + assert(NewSize[n] <= Capacity && "Overallocated node"); + Sum += NewSize[n]; + } + assert(Sum == Elements && "Bad distribution sum"); +#endif + + return PosPair; +} + +} // namespace IntervalMapImpl +} // namespace llvm + diff --git a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp new file mode 100644 index 0000000000000..da6514f7170bb --- /dev/null +++ b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp @@ -0,0 +1,322 @@ +//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ItaniumManglingCanonicalizer.h" + +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Demangle/ItaniumDemangle.h" +#include "llvm/Support/Allocator.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; +using llvm::itanium_demangle::ForwardTemplateReference; +using llvm::itanium_demangle::Node; +using llvm::itanium_demangle::NodeKind; +using llvm::itanium_demangle::StringView; + +namespace { +struct FoldingSetNodeIDBuilder { + llvm::FoldingSetNodeID &ID; + void operator()(const Node *P) { ID.AddPointer(P); } + void operator()(StringView Str) { + ID.AddString(llvm::StringRef(Str.begin(), Str.size())); + } + template<typename T> + typename std::enable_if<std::is_integral<T>::value || + std::is_enum<T>::value>::type + operator()(T V) { + ID.AddInteger((unsigned long long)V); + } + void operator()(itanium_demangle::NodeOrString NS) { + if (NS.isNode()) { + ID.AddInteger(0); + (*this)(NS.asNode()); + } else if (NS.isString()) { + ID.AddInteger(1); + (*this)(NS.asString()); + } else { + ID.AddInteger(2); + } + } + void operator()(itanium_demangle::NodeArray A) { + ID.AddInteger(A.size()); + for (const Node *N : A) + (*this)(N); + } +}; + +template<typename ...T> +void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) { + FoldingSetNodeIDBuilder Builder = {ID}; + Builder(K); + int VisitInOrder[] = { + (Builder(V), 0) ..., + 0 // Avoid empty array if there are no arguments. + }; + (void)VisitInOrder; +} + +// FIXME: Convert this to a generic lambda when possible. +template<typename NodeT> struct ProfileSpecificNode { + FoldingSetNodeID &ID; + template<typename ...T> void operator()(T ...V) { + profileCtor(ID, NodeKind<NodeT>::Kind, V...); + } +}; + +struct ProfileNode { + FoldingSetNodeID &ID; + template<typename NodeT> void operator()(const NodeT *N) { + N->match(ProfileSpecificNode<NodeT>{ID}); + } +}; + +template<> void ProfileNode::operator()(const ForwardTemplateReference *N) { + llvm_unreachable("should never canonicalize a ForwardTemplateReference"); +} + +void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) { + N->visit(ProfileNode{ID}); +} + +class FoldingNodeAllocator { + class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode { + public: + // 'Node' in this context names the injected-class-name of the base class. + itanium_demangle::Node *getNode() { + return reinterpret_cast<itanium_demangle::Node *>(this + 1); + } + void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); } + }; + + BumpPtrAllocator RawAlloc; + llvm::FoldingSet<NodeHeader> Nodes; + +public: + void reset() {} + + template <typename T, typename... Args> + std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) { + // FIXME: Don't canonicalize forward template references for now, because + // they contain state (the resolved template node) that's not known at their + // point of creation. + if (std::is_same<T, ForwardTemplateReference>::value) { + // Note that we don't use if-constexpr here and so we must still write + // this code in a generic form. + return {new (RawAlloc.Allocate(sizeof(T), alignof(T))) + T(std::forward<Args>(As)...), + true}; + } + + llvm::FoldingSetNodeID ID; + profileCtor(ID, NodeKind<T>::Kind, As...); + + void *InsertPos; + if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos)) + return {static_cast<T*>(Existing->getNode()), false}; + + if (!CreateNewNodes) + return {nullptr, true}; + + static_assert(alignof(T) <= alignof(NodeHeader), + "underaligned node header for specific node kind"); + void *Storage = + RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader)); + NodeHeader *New = new (Storage) NodeHeader; + T *Result = new (New->getNode()) T(std::forward<Args>(As)...); + Nodes.InsertNode(New, InsertPos); + return {Result, true}; + } + + template<typename T, typename... Args> + Node *makeNode(Args &&...As) { + return getOrCreateNode<T>(true, std::forward<Args>(As)...).first; + } + + void *allocateNodeArray(size_t sz) { + return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *)); + } +}; + +class CanonicalizerAllocator : public FoldingNodeAllocator { + Node *MostRecentlyCreated = nullptr; + Node *TrackedNode = nullptr; + bool TrackedNodeIsUsed = false; + bool CreateNewNodes = true; + llvm::SmallDenseMap<Node*, Node*, 32> Remappings; + + template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) { + std::pair<Node *, bool> Result = + getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...); + if (Result.second) { + // Node is new. Make a note of that. + MostRecentlyCreated = Result.first; + } else if (Result.first) { + // Node is pre-existing; check if it's in our remapping table. + if (auto *N = Remappings.lookup(Result.first)) { + Result.first = N; + assert(Remappings.find(Result.first) == Remappings.end() && + "should never need multiple remap steps"); + } + if (Result.first == TrackedNode) + TrackedNodeIsUsed = true; + } + return Result.first; + } + + /// Helper to allow makeNode to be partially-specialized on T. + template<typename T> struct MakeNodeImpl { + CanonicalizerAllocator &Self; + template<typename ...Args> Node *make(Args &&...As) { + return Self.makeNodeSimple<T>(std::forward<Args>(As)...); + } + }; + +public: + template<typename T, typename ...Args> Node *makeNode(Args &&...As) { + return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...); + } + + void reset() { MostRecentlyCreated = nullptr; } + + void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; } + + void addRemapping(Node *A, Node *B) { + // Note, we don't need to check whether B is also remapped, because if it + // was we would have already remapped it when building it. + Remappings.insert(std::make_pair(A, B)); + } + + bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; } + + void trackUsesOf(Node *N) { + TrackedNode = N; + TrackedNodeIsUsed = false; + } + bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } +}; + +/// Convert St3foo to NSt3fooE so that equivalences naming one also affect the +/// other. +template<> +struct CanonicalizerAllocator::MakeNodeImpl< + itanium_demangle::StdQualifiedName> { + CanonicalizerAllocator &Self; + Node *make(Node *Child) { + Node *StdNamespace = Self.makeNode<itanium_demangle::NameType>("std"); + if (!StdNamespace) + return nullptr; + return Self.makeNode<itanium_demangle::NestedName>(StdNamespace, Child); + } +}; + +// FIXME: Also expand built-in substitutions? + +using CanonicalizingDemangler = + itanium_demangle::ManglingParser<CanonicalizerAllocator>; +} + +struct ItaniumManglingCanonicalizer::Impl { + CanonicalizingDemangler Demangler = {nullptr, nullptr}; +}; + +ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {} +ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; } + +ItaniumManglingCanonicalizer::EquivalenceError +ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First, + StringRef Second) { + auto &Alloc = P->Demangler.ASTAllocator; + Alloc.setCreateNewNodes(true); + + auto Parse = [&](StringRef Str) { + P->Demangler.reset(Str.begin(), Str.end()); + Node *N = nullptr; + switch (Kind) { + // A <name>, with minor extensions to allow arbitrary namespace and + // template names that can't easily be written as <name>s. + case FragmentKind::Name: + // Very special case: allow "St" as a shorthand for "3std". It's not + // valid as a <name> mangling, but is nonetheless the most natural + // way to name the 'std' namespace. + if (Str.size() == 2 && P->Demangler.consumeIf("St")) + N = P->Demangler.make<itanium_demangle::NameType>("std"); + // We permit substitutions to name templates without their template + // arguments. This mostly just falls out, as almost all template names + // are valid as <name>s, but we also want to parse <substitution>s as + // <name>s, even though they're not. + else if (Str.startswith("S")) + // Parse the substitution and optional following template arguments. + N = P->Demangler.parseType(); + else + N = P->Demangler.parseName(); + break; + + // A <type>. + case FragmentKind::Type: + N = P->Demangler.parseType(); + break; + + // An <encoding>. + case FragmentKind::Encoding: + N = P->Demangler.parseEncoding(); + break; + } + + // If we have trailing junk, the mangling is invalid. + if (P->Demangler.numLeft() != 0) + N = nullptr; + + // If any node was created after N, then we cannot safely remap it because + // it might already be in use by another node. + return std::make_pair(N, Alloc.isMostRecentlyCreated(N)); + }; + + Node *FirstNode, *SecondNode; + bool FirstIsNew, SecondIsNew; + + std::tie(FirstNode, FirstIsNew) = Parse(First); + if (!FirstNode) + return EquivalenceError::InvalidFirstMangling; + + Alloc.trackUsesOf(FirstNode); + std::tie(SecondNode, SecondIsNew) = Parse(Second); + if (!SecondNode) + return EquivalenceError::InvalidSecondMangling; + + // If they're already equivalent, there's nothing to do. + if (FirstNode == SecondNode) + return EquivalenceError::Success; + + if (FirstIsNew && !Alloc.trackedNodeIsUsed()) + Alloc.addRemapping(FirstNode, SecondNode); + else if (SecondIsNew) + Alloc.addRemapping(SecondNode, FirstNode); + else + return EquivalenceError::ManglingAlreadyUsed; + + return EquivalenceError::Success; +} + +ItaniumManglingCanonicalizer::Key +ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) { + P->Demangler.ASTAllocator.setCreateNewNodes(true); + P->Demangler.reset(Mangling.begin(), Mangling.end()); + return reinterpret_cast<Key>(P->Demangler.parse()); +} + +ItaniumManglingCanonicalizer::Key +ItaniumManglingCanonicalizer::lookup(StringRef Mangling) { + P->Demangler.ASTAllocator.setCreateNewNodes(false); + P->Demangler.reset(Mangling.begin(), Mangling.end()); + return reinterpret_cast<Key>(P->Demangler.parse()); +} diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp new file mode 100644 index 0000000000000..16b1d11efd085 --- /dev/null +++ b/llvm/lib/Support/JSON.cpp @@ -0,0 +1,718 @@ +//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "llvm/Support/JSON.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Format.h" +#include <cctype> + +namespace llvm { +namespace json { + +Value &Object::operator[](const ObjectKey &K) { + return try_emplace(K, nullptr).first->getSecond(); +} +Value &Object::operator[](ObjectKey &&K) { + return try_emplace(std::move(K), nullptr).first->getSecond(); +} +Value *Object::get(StringRef K) { + auto I = find(K); + if (I == end()) + return nullptr; + return &I->second; +} +const Value *Object::get(StringRef K) const { + auto I = find(K); + if (I == end()) + return nullptr; + return &I->second; +} +llvm::Optional<std::nullptr_t> Object::getNull(StringRef K) const { + if (auto *V = get(K)) + return V->getAsNull(); + return llvm::None; +} +llvm::Optional<bool> Object::getBoolean(StringRef K) const { + if (auto *V = get(K)) + return V->getAsBoolean(); + return llvm::None; +} +llvm::Optional<double> Object::getNumber(StringRef K) const { + if (auto *V = get(K)) + return V->getAsNumber(); + return llvm::None; +} +llvm::Optional<int64_t> Object::getInteger(StringRef K) const { + if (auto *V = get(K)) + return V->getAsInteger(); + return llvm::None; +} +llvm::Optional<llvm::StringRef> Object::getString(StringRef K) const { + if (auto *V = get(K)) + return V->getAsString(); + return llvm::None; +} +const json::Object *Object::getObject(StringRef K) const { + if (auto *V = get(K)) + return V->getAsObject(); + return nullptr; +} +json::Object *Object::getObject(StringRef K) { + if (auto *V = get(K)) + return V->getAsObject(); + return nullptr; +} +const json::Array *Object::getArray(StringRef K) const { + if (auto *V = get(K)) + return V->getAsArray(); + return nullptr; +} +json::Array *Object::getArray(StringRef K) { + if (auto *V = get(K)) + return V->getAsArray(); + return nullptr; +} +bool operator==(const Object &LHS, const Object &RHS) { + if (LHS.size() != RHS.size()) + return false; + for (const auto &L : LHS) { + auto R = RHS.find(L.first); + if (R == RHS.end() || L.second != R->second) + return false; + } + return true; +} + +Array::Array(std::initializer_list<Value> Elements) { + V.reserve(Elements.size()); + for (const Value &V : Elements) { + emplace_back(nullptr); + back().moveFrom(std::move(V)); + } +} + +Value::Value(std::initializer_list<Value> Elements) + : Value(json::Array(Elements)) {} + +void Value::copyFrom(const Value &M) { + Type = M.Type; + switch (Type) { + case T_Null: + case T_Boolean: + case T_Double: + case T_Integer: + memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); + break; + case T_StringRef: + create<StringRef>(M.as<StringRef>()); + break; + case T_String: + create<std::string>(M.as<std::string>()); + break; + case T_Object: + create<json::Object>(M.as<json::Object>()); + break; + case T_Array: + create<json::Array>(M.as<json::Array>()); + break; + } +} + +void Value::moveFrom(const Value &&M) { + Type = M.Type; + switch (Type) { + case T_Null: + case T_Boolean: + case T_Double: + case T_Integer: + memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer)); + break; + case T_StringRef: + create<StringRef>(M.as<StringRef>()); + break; + case T_String: + create<std::string>(std::move(M.as<std::string>())); + M.Type = T_Null; + break; + case T_Object: + create<json::Object>(std::move(M.as<json::Object>())); + M.Type = T_Null; + break; + case T_Array: + create<json::Array>(std::move(M.as<json::Array>())); + M.Type = T_Null; + break; + } +} + +void Value::destroy() { + switch (Type) { + case T_Null: + case T_Boolean: + case T_Double: + case T_Integer: + break; + case T_StringRef: + as<StringRef>().~StringRef(); + break; + case T_String: + as<std::string>().~basic_string(); + break; + case T_Object: + as<json::Object>().~Object(); + break; + case T_Array: + as<json::Array>().~Array(); + break; + } +} + +bool operator==(const Value &L, const Value &R) { + if (L.kind() != R.kind()) + return false; + switch (L.kind()) { + case Value::Null: + return *L.getAsNull() == *R.getAsNull(); + case Value::Boolean: + return *L.getAsBoolean() == *R.getAsBoolean(); + case Value::Number: + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 + // The same integer must convert to the same double, per the standard. + // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32. + // So we avoid floating point promotion for exact comparisons. + if (L.Type == Value::T_Integer || R.Type == Value::T_Integer) + return L.getAsInteger() == R.getAsInteger(); + return *L.getAsNumber() == *R.getAsNumber(); + case Value::String: + return *L.getAsString() == *R.getAsString(); + case Value::Array: + return *L.getAsArray() == *R.getAsArray(); + case Value::Object: + return *L.getAsObject() == *R.getAsObject(); + } + llvm_unreachable("Unknown value kind"); +} + +namespace { +// Simple recursive-descent JSON parser. +class Parser { +public: + Parser(StringRef JSON) + : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {} + + bool checkUTF8() { + size_t ErrOffset; + if (isUTF8(StringRef(Start, End - Start), &ErrOffset)) + return true; + P = Start + ErrOffset; // For line/column calculation. + return parseError("Invalid UTF-8 sequence"); + } + + bool parseValue(Value &Out); + + bool assertEnd() { + eatWhitespace(); + if (P == End) + return true; + return parseError("Text after end of document"); + } + + Error takeError() { + assert(Err); + return std::move(*Err); + } + +private: + void eatWhitespace() { + while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t')) + ++P; + } + + // On invalid syntax, parseX() functions return false and set Err. + bool parseNumber(char First, Value &Out); + bool parseString(std::string &Out); + bool parseUnicode(std::string &Out); + bool parseError(const char *Msg); // always returns false + + char next() { return P == End ? 0 : *P++; } + char peek() { return P == End ? 0 : *P; } + static bool isNumber(char C) { + return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' || + C == '5' || C == '6' || C == '7' || C == '8' || C == '9' || + C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.'; + } + + Optional<Error> Err; + const char *Start, *P, *End; +}; + +bool Parser::parseValue(Value &Out) { + eatWhitespace(); + if (P == End) + return parseError("Unexpected EOF"); + switch (char C = next()) { + // Bare null/true/false are easy - first char identifies them. + case 'n': + Out = nullptr; + return (next() == 'u' && next() == 'l' && next() == 'l') || + parseError("Invalid JSON value (null?)"); + case 't': + Out = true; + return (next() == 'r' && next() == 'u' && next() == 'e') || + parseError("Invalid JSON value (true?)"); + case 'f': + Out = false; + return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') || + parseError("Invalid JSON value (false?)"); + case '"': { + std::string S; + if (parseString(S)) { + Out = std::move(S); + return true; + } + return false; + } + case '[': { + Out = Array{}; + Array &A = *Out.getAsArray(); + eatWhitespace(); + if (peek() == ']') { + ++P; + return true; + } + for (;;) { + A.emplace_back(nullptr); + if (!parseValue(A.back())) + return false; + eatWhitespace(); + switch (next()) { + case ',': + eatWhitespace(); + continue; + case ']': + return true; + default: + return parseError("Expected , or ] after array element"); + } + } + } + case '{': { + Out = Object{}; + Object &O = *Out.getAsObject(); + eatWhitespace(); + if (peek() == '}') { + ++P; + return true; + } + for (;;) { + if (next() != '"') + return parseError("Expected object key"); + std::string K; + if (!parseString(K)) + return false; + eatWhitespace(); + if (next() != ':') + return parseError("Expected : after object key"); + eatWhitespace(); + if (!parseValue(O[std::move(K)])) + return false; + eatWhitespace(); + switch (next()) { + case ',': + eatWhitespace(); + continue; + case '}': + return true; + default: + return parseError("Expected , or } after object property"); + } + } + } + default: + if (isNumber(C)) + return parseNumber(C, Out); + return parseError("Invalid JSON value"); + } +} + +bool Parser::parseNumber(char First, Value &Out) { + // Read the number into a string. (Must be null-terminated for strto*). + SmallString<24> S; + S.push_back(First); + while (isNumber(peek())) + S.push_back(next()); + char *End; + // Try first to parse as integer, and if so preserve full 64 bits. + // strtoll returns long long >= 64 bits, so check it's in range too. + auto I = std::strtoll(S.c_str(), &End, 10); + if (End == S.end() && I >= std::numeric_limits<int64_t>::min() && + I <= std::numeric_limits<int64_t>::max()) { + Out = int64_t(I); + return true; + } + // If it's not an integer + Out = std::strtod(S.c_str(), &End); + return End == S.end() || parseError("Invalid JSON value (number?)"); +} + +bool Parser::parseString(std::string &Out) { + // leading quote was already consumed. + for (char C = next(); C != '"'; C = next()) { + if (LLVM_UNLIKELY(P == End)) + return parseError("Unterminated string"); + if (LLVM_UNLIKELY((C & 0x1f) == C)) + return parseError("Control character in string"); + if (LLVM_LIKELY(C != '\\')) { + Out.push_back(C); + continue; + } + // Handle escape sequence. + switch (C = next()) { + case '"': + case '\\': + case '/': + Out.push_back(C); + break; + case 'b': + Out.push_back('\b'); + break; + case 'f': + Out.push_back('\f'); + break; + case 'n': + Out.push_back('\n'); + break; + case 'r': + Out.push_back('\r'); + break; + case 't': + Out.push_back('\t'); + break; + case 'u': + if (!parseUnicode(Out)) + return false; + break; + default: + return parseError("Invalid escape sequence"); + } + } + return true; +} + +static void encodeUtf8(uint32_t Rune, std::string &Out) { + if (Rune < 0x80) { + Out.push_back(Rune & 0x7F); + } else if (Rune < 0x800) { + uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + } else if (Rune < 0x10000) { + uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + Out.push_back(ThirdByte); + } else if (Rune < 0x110000) { + uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (Rune & 0x3F); + Out.push_back(FirstByte); + Out.push_back(SecondByte); + Out.push_back(ThirdByte); + Out.push_back(FourthByte); + } else { + llvm_unreachable("Invalid codepoint"); + } +} + +// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed. +// May parse several sequential escapes to ensure proper surrogate handling. +// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates. +// These are invalid Unicode but valid JSON (RFC 8259, section 8.2). +bool Parser::parseUnicode(std::string &Out) { + // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD. + auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); }; + // Decodes 4 hex digits from the stream into Out, returns false on error. + auto Parse4Hex = [this](uint16_t &Out) -> bool { + Out = 0; + char Bytes[] = {next(), next(), next(), next()}; + for (unsigned char C : Bytes) { + if (!std::isxdigit(C)) + return parseError("Invalid \\u escape sequence"); + Out <<= 4; + Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0'); + } + return true; + }; + uint16_t First; // UTF-16 code unit from the first \u escape. + if (!Parse4Hex(First)) + return false; + + // We loop to allow proper surrogate-pair error handling. + while (true) { + // Case 1: the UTF-16 code unit is already a codepoint in the BMP. + if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) { + encodeUtf8(First, Out); + return true; + } + + // Case 2: it's an (unpaired) trailing surrogate. + if (LLVM_UNLIKELY(First >= 0xDC00)) { + Invalid(); + return true; + } + + // Case 3: it's a leading surrogate. We expect a trailing one next. + // Case 3a: there's no trailing \u escape. Don't advance in the stream. + if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) { + Invalid(); // Leading surrogate was unpaired. + return true; + } + P += 2; + uint16_t Second; + if (!Parse4Hex(Second)) + return false; + // Case 3b: there was another \u escape, but it wasn't a trailing surrogate. + if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) { + Invalid(); // Leading surrogate was unpaired. + First = Second; // Second escape still needs to be processed. + continue; + } + // Case 3c: a valid surrogate pair encoding an astral codepoint. + encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out); + return true; + } +} + +bool Parser::parseError(const char *Msg) { + int Line = 1; + const char *StartOfLine = Start; + for (const char *X = Start; X < P; ++X) { + if (*X == 0x0A) { + ++Line; + StartOfLine = X + 1; + } + } + Err.emplace( + std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start)); + return false; +} +} // namespace + +Expected<Value> parse(StringRef JSON) { + Parser P(JSON); + Value E = nullptr; + if (P.checkUTF8()) + if (P.parseValue(E)) + if (P.assertEnd()) + return std::move(E); + return P.takeError(); +} +char ParseError::ID = 0; + +static std::vector<const Object::value_type *> sortedElements(const Object &O) { + std::vector<const Object::value_type *> Elements; + for (const auto &E : O) + Elements.push_back(&E); + llvm::sort(Elements, + [](const Object::value_type *L, const Object::value_type *R) { + return L->first < R->first; + }); + return Elements; +} + +bool isUTF8(llvm::StringRef S, size_t *ErrOffset) { + // Fast-path for ASCII, which is valid UTF-8. + if (LLVM_LIKELY(isASCII(S))) + return true; + + const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data; + if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size()))) + return true; + + if (ErrOffset) + *ErrOffset = Rest - Data; + return false; +} + +std::string fixUTF8(llvm::StringRef S) { + // This isn't particularly efficient, but is only for error-recovery. + std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices. + const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data()); + UTF32 *Out32 = Codepoints.data(); + ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(), + lenientConversion); + Codepoints.resize(Out32 - Codepoints.data()); + std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice + const UTF32 *In32 = Codepoints.data(); + UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]); + ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(), + strictConversion); + Res.resize(reinterpret_cast<char *>(Out8) - Res.data()); + return Res; +} + +static void quote(llvm::raw_ostream &OS, llvm::StringRef S) { + OS << '\"'; + for (unsigned char C : S) { + if (C == 0x22 || C == 0x5C) + OS << '\\'; + if (C >= 0x20) { + OS << C; + continue; + } + OS << '\\'; + switch (C) { + // A few characters are common enough to make short escapes worthwhile. + case '\t': + OS << 't'; + break; + case '\n': + OS << 'n'; + break; + case '\r': + OS << 'r'; + break; + default: + OS << 'u'; + llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4); + break; + } + } + OS << '\"'; +} + +void llvm::json::OStream::value(const Value &V) { + switch (V.kind()) { + case Value::Null: + valueBegin(); + OS << "null"; + return; + case Value::Boolean: + valueBegin(); + OS << (*V.getAsBoolean() ? "true" : "false"); + return; + case Value::Number: + valueBegin(); + if (V.Type == Value::T_Integer) + OS << *V.getAsInteger(); + else + OS << format("%.*g", std::numeric_limits<double>::max_digits10, + *V.getAsNumber()); + return; + case Value::String: + valueBegin(); + quote(OS, *V.getAsString()); + return; + case Value::Array: + return array([&] { + for (const Value &E : *V.getAsArray()) + value(E); + }); + case Value::Object: + return object([&] { + for (const Object::value_type *E : sortedElements(*V.getAsObject())) + attribute(E->first, E->second); + }); + } +} + +void llvm::json::OStream::valueBegin() { + assert(Stack.back().Ctx != Object && "Only attributes allowed here"); + if (Stack.back().HasValue) { + assert(Stack.back().Ctx != Singleton && "Only one value allowed here"); + OS << ','; + } + if (Stack.back().Ctx == Array) + newline(); + Stack.back().HasValue = true; +} + +void llvm::json::OStream::newline() { + if (IndentSize) { + OS.write('\n'); + OS.indent(Indent); + } +} + +void llvm::json::OStream::arrayBegin() { + valueBegin(); + Stack.emplace_back(); + Stack.back().Ctx = Array; + Indent += IndentSize; + OS << '['; +} + +void llvm::json::OStream::arrayEnd() { + assert(Stack.back().Ctx == Array); + Indent -= IndentSize; + if (Stack.back().HasValue) + newline(); + OS << ']'; + Stack.pop_back(); + assert(!Stack.empty()); +} + +void llvm::json::OStream::objectBegin() { + valueBegin(); + Stack.emplace_back(); + Stack.back().Ctx = Object; + Indent += IndentSize; + OS << '{'; +} + +void llvm::json::OStream::objectEnd() { + assert(Stack.back().Ctx == Object); + Indent -= IndentSize; + if (Stack.back().HasValue) + newline(); + OS << '}'; + Stack.pop_back(); + assert(!Stack.empty()); +} + +void llvm::json::OStream::attributeBegin(llvm::StringRef Key) { + assert(Stack.back().Ctx == Object); + if (Stack.back().HasValue) + OS << ','; + newline(); + Stack.back().HasValue = true; + Stack.emplace_back(); + Stack.back().Ctx = Singleton; + if (LLVM_LIKELY(isUTF8(Key))) { + quote(OS, Key); + } else { + assert(false && "Invalid UTF-8 in attribute key"); + quote(OS, fixUTF8(Key)); + } + OS.write(':'); + if (IndentSize) + OS.write(' '); +} + +void llvm::json::OStream::attributeEnd() { + assert(Stack.back().Ctx == Singleton); + assert(Stack.back().HasValue && "Attribute must have a value"); + Stack.pop_back(); + assert(Stack.back().Ctx == Object); +} + +} // namespace json +} // namespace llvm + +void llvm::format_provider<llvm::json::Value>::format( + const llvm::json::Value &E, raw_ostream &OS, StringRef Options) { + unsigned IndentAmount = 0; + if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount)) + llvm_unreachable("json::Value format options should be an integer"); + json::OStream(OS, IndentAmount).value(E); +} + diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp new file mode 100644 index 0000000000000..a6c591fca3121 --- /dev/null +++ b/llvm/lib/Support/KnownBits.cpp @@ -0,0 +1,83 @@ +//===-- KnownBits.cpp - Stores known zeros/ones ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a class for representing known zeros and ones used by +// computeKnownBits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/KnownBits.h" + +using namespace llvm; + +static KnownBits computeForAddCarry( + const KnownBits &LHS, const KnownBits &RHS, + bool CarryZero, bool CarryOne) { + assert(!(CarryZero && CarryOne) && + "Carry can't be zero and one at the same time"); + + APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero; + APInt PossibleSumOne = LHS.One + RHS.One + CarryOne; + + // Compute known bits of the carry. + APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero); + APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One; + + // Compute set of known bits (where all three relevant bits are known). + APInt LHSKnownUnion = LHS.Zero | LHS.One; + APInt RHSKnownUnion = RHS.Zero | RHS.One; + APInt CarryKnownUnion = std::move(CarryKnownZero) | CarryKnownOne; + APInt Known = std::move(LHSKnownUnion) & RHSKnownUnion & CarryKnownUnion; + + assert((PossibleSumZero & Known) == (PossibleSumOne & Known) && + "known bits of sum differ"); + + // Compute known bits of the result. + KnownBits KnownOut; + KnownOut.Zero = ~std::move(PossibleSumZero) & Known; + KnownOut.One = std::move(PossibleSumOne) & Known; + return KnownOut; +} + +KnownBits KnownBits::computeForAddCarry( + const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry) { + assert(Carry.getBitWidth() == 1 && "Carry must be 1-bit"); + return ::computeForAddCarry( + LHS, RHS, Carry.Zero.getBoolValue(), Carry.One.getBoolValue()); +} + +KnownBits KnownBits::computeForAddSub(bool Add, bool NSW, + const KnownBits &LHS, KnownBits RHS) { + KnownBits KnownOut; + if (Add) { + // Sum = LHS + RHS + 0 + KnownOut = ::computeForAddCarry( + LHS, RHS, /*CarryZero*/true, /*CarryOne*/false); + } else { + // Sum = LHS + ~RHS + 1 + std::swap(RHS.Zero, RHS.One); + KnownOut = ::computeForAddCarry( + LHS, RHS, /*CarryZero*/false, /*CarryOne*/true); + } + + // Are we still trying to solve for the sign bit? + if (!KnownOut.isNegative() && !KnownOut.isNonNegative()) { + if (NSW) { + // Adding two non-negative numbers, or subtracting a negative number from + // a non-negative one, can't wrap into negative. + if (LHS.isNonNegative() && RHS.isNonNegative()) + KnownOut.makeNonNegative(); + // Adding two negative numbers, or subtracting a non-negative number from + // a negative one, can't wrap into non-negative. + else if (LHS.isNegative() && RHS.isNegative()) + KnownOut.makeNegative(); + } + } + + return KnownOut; +} diff --git a/llvm/lib/Support/LEB128.cpp b/llvm/lib/Support/LEB128.cpp new file mode 100644 index 0000000000000..d41b673e9c8a5 --- /dev/null +++ b/llvm/lib/Support/LEB128.cpp @@ -0,0 +1,43 @@ +//===- LEB128.cpp - LEB128 utility functions implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements some utility functions for encoding SLEB128 and +// ULEB128 values. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LEB128.h" + +namespace llvm { + +/// Utility function to get the size of the ULEB128-encoded value. +unsigned getULEB128Size(uint64_t Value) { + unsigned Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); + } while (Value); + return Size; +} + +/// Utility function to get the size of the SLEB128-encoded value. +unsigned getSLEB128Size(int64_t Value) { + unsigned Size = 0; + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + Size += sizeof(int8_t); + } while (IsMore); + return Size; +} + +} // namespace llvm diff --git a/llvm/lib/Support/LineIterator.cpp b/llvm/lib/Support/LineIterator.cpp new file mode 100644 index 0000000000000..164436a2c48ed --- /dev/null +++ b/llvm/lib/Support/LineIterator.cpp @@ -0,0 +1,93 @@ +//===- LineIterator.cpp - Implementation of line iteration ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +static bool isAtLineEnd(const char *P) { + if (*P == '\n') + return true; + if (*P == '\r' && *(P + 1) == '\n') + return true; + return false; +} + +static bool skipIfAtLineEnd(const char *&P) { + if (*P == '\n') { + ++P; + return true; + } + if (*P == '\r' && *(P + 1) == '\n') { + P += 2; + return true; + } + return false; +} + +line_iterator::line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks, + char CommentMarker) + : Buffer(Buffer.getBufferSize() ? &Buffer : nullptr), + CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), LineNumber(1), + CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr, + 0) { + // Ensure that if we are constructed on a non-empty memory buffer that it is + // a null terminated buffer. + if (Buffer.getBufferSize()) { + assert(Buffer.getBufferEnd()[0] == '\0'); + // Make sure we don't skip a leading newline if we're keeping blanks + if (SkipBlanks || !isAtLineEnd(Buffer.getBufferStart())) + advance(); + } +} + +void line_iterator::advance() { + assert(Buffer && "Cannot advance past the end!"); + + const char *Pos = CurrentLine.end(); + assert(Pos == Buffer->getBufferStart() || isAtLineEnd(Pos) || *Pos == '\0'); + + if (skipIfAtLineEnd(Pos)) + ++LineNumber; + if (!SkipBlanks && isAtLineEnd(Pos)) { + // Nothing to do for a blank line. + } else if (CommentMarker == '\0') { + // If we're not stripping comments, this is simpler. + while (skipIfAtLineEnd(Pos)) + ++LineNumber; + } else { + // Skip comments and count line numbers, which is a bit more complex. + for (;;) { + if (isAtLineEnd(Pos) && !SkipBlanks) + break; + if (*Pos == CommentMarker) + do { + ++Pos; + } while (*Pos != '\0' && !isAtLineEnd(Pos)); + if (!skipIfAtLineEnd(Pos)) + break; + ++LineNumber; + } + } + + if (*Pos == '\0') { + // We've hit the end of the buffer, reset ourselves to the end state. + Buffer = nullptr; + CurrentLine = StringRef(); + return; + } + + // Measure the line. + size_t Length = 0; + while (Pos[Length] != '\0' && !isAtLineEnd(&Pos[Length])) { + ++Length; + } + + CurrentLine = StringRef(Pos, Length); +} diff --git a/llvm/lib/Support/Locale.cpp b/llvm/lib/Support/Locale.cpp new file mode 100644 index 0000000000000..1b3300b90f2a3 --- /dev/null +++ b/llvm/lib/Support/Locale.cpp @@ -0,0 +1,19 @@ +#include "llvm/Support/Locale.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Unicode.h" + +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef Text) { + return llvm::sys::unicode::columnWidthUTF8(Text); +} + +bool isPrint(int UCS) { + return llvm::sys::unicode::isPrintable(UCS); +} + +} // namespace locale +} // namespace sys +} // namespace llvm diff --git a/llvm/lib/Support/LockFileManager.cpp b/llvm/lib/Support/LockFileManager.cpp new file mode 100644 index 0000000000000..10181192afbd8 --- /dev/null +++ b/llvm/lib/Support/LockFileManager.cpp @@ -0,0 +1,355 @@ +//===--- LockFileManager.cpp - File-level Locking Utility------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LockFileManager.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" +#include <cerrno> +#include <ctime> +#include <memory> +#include <sys/stat.h> +#include <sys/types.h> +#include <system_error> +#include <tuple> +#ifdef _WIN32 +#include <windows.h> +#endif +#if LLVM_ON_UNIX +#include <unistd.h> +#endif + +#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) +#define USE_OSX_GETHOSTUUID 1 +#else +#define USE_OSX_GETHOSTUUID 0 +#endif + +#if USE_OSX_GETHOSTUUID +#include <uuid/uuid.h> +#endif + +using namespace llvm; + +/// Attempt to read the lock file with the given name, if it exists. +/// +/// \param LockFileName The name of the lock file to read. +/// +/// \returns The process ID of the process that owns this lock file +Optional<std::pair<std::string, int> > +LockFileManager::readLockFile(StringRef LockFileName) { + // Read the owning host and PID out of the lock file. If it appears that the + // owning process is dead, the lock file is invalid. + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFile(LockFileName); + if (!MBOrErr) { + sys::fs::remove(LockFileName); + return None; + } + MemoryBuffer &MB = *MBOrErr.get(); + + StringRef Hostname; + StringRef PIDStr; + std::tie(Hostname, PIDStr) = getToken(MB.getBuffer(), " "); + PIDStr = PIDStr.substr(PIDStr.find_first_not_of(" ")); + int PID; + if (!PIDStr.getAsInteger(10, PID)) { + auto Owner = std::make_pair(std::string(Hostname), PID); + if (processStillExecuting(Owner.first, Owner.second)) + return Owner; + } + + // Delete the lock file. It's invalid anyway. + sys::fs::remove(LockFileName); + return None; +} + +static std::error_code getHostID(SmallVectorImpl<char> &HostID) { + HostID.clear(); + +#if USE_OSX_GETHOSTUUID + // On OS X, use the more stable hardware UUID instead of hostname. + struct timespec wait = {1, 0}; // 1 second. + uuid_t uuid; + if (gethostuuid(uuid, &wait) != 0) + return std::error_code(errno, std::system_category()); + + uuid_string_t UUIDStr; + uuid_unparse(uuid, UUIDStr); + StringRef UUIDRef(UUIDStr); + HostID.append(UUIDRef.begin(), UUIDRef.end()); + +#elif LLVM_ON_UNIX + char HostName[256]; + HostName[255] = 0; + HostName[0] = 0; + gethostname(HostName, 255); + StringRef HostNameRef(HostName); + HostID.append(HostNameRef.begin(), HostNameRef.end()); + +#else + StringRef Dummy("localhost"); + HostID.append(Dummy.begin(), Dummy.end()); +#endif + + return std::error_code(); +} + +bool LockFileManager::processStillExecuting(StringRef HostID, int PID) { +#if LLVM_ON_UNIX && !defined(__ANDROID__) + SmallString<256> StoredHostID; + if (getHostID(StoredHostID)) + return true; // Conservatively assume it's executing on error. + + // Check whether the process is dead. If so, we're done. + if (StoredHostID == HostID && getsid(PID) == -1 && errno == ESRCH) + return false; +#endif + + return true; +} + +namespace { + +/// An RAII helper object ensure that the unique lock file is removed. +/// +/// Ensures that if there is an error or a signal before we finish acquiring the +/// lock, the unique file will be removed. And if we successfully take the lock, +/// the signal handler is left in place so that signals while the lock is held +/// will remove the unique lock file. The caller should ensure there is a +/// matching call to sys::DontRemoveFileOnSignal when the lock is released. +class RemoveUniqueLockFileOnSignal { + StringRef Filename; + bool RemoveImmediately; +public: + RemoveUniqueLockFileOnSignal(StringRef Name) + : Filename(Name), RemoveImmediately(true) { + sys::RemoveFileOnSignal(Filename, nullptr); + } + + ~RemoveUniqueLockFileOnSignal() { + if (!RemoveImmediately) { + // Leave the signal handler enabled. It will be removed when the lock is + // released. + return; + } + sys::fs::remove(Filename); + sys::DontRemoveFileOnSignal(Filename); + } + + void lockAcquired() { RemoveImmediately = false; } +}; + +} // end anonymous namespace + +LockFileManager::LockFileManager(StringRef FileName) +{ + this->FileName = FileName; + if (std::error_code EC = sys::fs::make_absolute(this->FileName)) { + std::string S("failed to obtain absolute path for "); + S.append(this->FileName.str()); + setError(EC, S); + return; + } + LockFileName = this->FileName; + LockFileName += ".lock"; + + // If the lock file already exists, don't bother to try to create our own + // lock file; it won't work anyway. Just figure out who owns this lock file. + if ((Owner = readLockFile(LockFileName))) + return; + + // Create a lock file that is unique to this instance. + UniqueLockFileName = LockFileName; + UniqueLockFileName += "-%%%%%%%%"; + int UniqueLockFileID; + if (std::error_code EC = sys::fs::createUniqueFile( + UniqueLockFileName, UniqueLockFileID, UniqueLockFileName)) { + std::string S("failed to create unique file "); + S.append(UniqueLockFileName.str()); + setError(EC, S); + return; + } + + // Write our process ID to our unique lock file. + { + SmallString<256> HostID; + if (auto EC = getHostID(HostID)) { + setError(EC, "failed to get host id"); + return; + } + + raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true); + Out << HostID << ' '; +#if LLVM_ON_UNIX + Out << getpid(); +#else + Out << "1"; +#endif + Out.close(); + + if (Out.has_error()) { + // We failed to write out PID, so report the error, remove the + // unique lock file, and fail. + std::string S("failed to write to "); + S.append(UniqueLockFileName.str()); + setError(Out.error(), S); + sys::fs::remove(UniqueLockFileName); + return; + } + } + + // Clean up the unique file on signal, which also releases the lock if it is + // held since the .lock symlink will point to a nonexistent file. + RemoveUniqueLockFileOnSignal RemoveUniqueFile(UniqueLockFileName); + + while (true) { + // Create a link from the lock file name. If this succeeds, we're done. + std::error_code EC = + sys::fs::create_link(UniqueLockFileName, LockFileName); + if (!EC) { + RemoveUniqueFile.lockAcquired(); + return; + } + + if (EC != errc::file_exists) { + std::string S("failed to create link "); + raw_string_ostream OSS(S); + OSS << LockFileName.str() << " to " << UniqueLockFileName.str(); + setError(EC, OSS.str()); + return; + } + + // Someone else managed to create the lock file first. Read the process ID + // from the lock file. + if ((Owner = readLockFile(LockFileName))) { + // Wipe out our unique lock file (it's useless now) + sys::fs::remove(UniqueLockFileName); + return; + } + + if (!sys::fs::exists(LockFileName)) { + // The previous owner released the lock file before we could read it. + // Try to get ownership again. + continue; + } + + // There is a lock file that nobody owns; try to clean it up and get + // ownership. + if ((EC = sys::fs::remove(LockFileName))) { + std::string S("failed to remove lockfile "); + S.append(UniqueLockFileName.str()); + setError(EC, S); + return; + } + } +} + +LockFileManager::LockFileState LockFileManager::getState() const { + if (Owner) + return LFS_Shared; + + if (ErrorCode) + return LFS_Error; + + return LFS_Owned; +} + +std::string LockFileManager::getErrorMessage() const { + if (ErrorCode) { + std::string Str(ErrorDiagMsg); + std::string ErrCodeMsg = ErrorCode.message(); + raw_string_ostream OSS(Str); + if (!ErrCodeMsg.empty()) + OSS << ": " << ErrCodeMsg; + return OSS.str(); + } + return ""; +} + +LockFileManager::~LockFileManager() { + if (getState() != LFS_Owned) + return; + + // Since we own the lock, remove the lock file and our own unique lock file. + sys::fs::remove(LockFileName); + sys::fs::remove(UniqueLockFileName); + // The unique file is now gone, so remove it from the signal handler. This + // matches a sys::RemoveFileOnSignal() in LockFileManager(). + sys::DontRemoveFileOnSignal(UniqueLockFileName); +} + +LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() { + if (getState() != LFS_Shared) + return Res_Success; + +#ifdef _WIN32 + unsigned long Interval = 1; +#else + struct timespec Interval; + Interval.tv_sec = 0; + Interval.tv_nsec = 1000000; +#endif + // Don't wait more than 40s per iteration. Total timeout for the file + // to appear is ~1.5 minutes. + const unsigned MaxSeconds = 40; + do { + // Sleep for the designated interval, to allow the owning process time to + // finish up and remove the lock file. + // FIXME: Should we hook in to system APIs to get a notification when the + // lock file is deleted? +#ifdef _WIN32 + Sleep(Interval); +#else + nanosleep(&Interval, nullptr); +#endif + + if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) == + errc::no_such_file_or_directory) { + // If the original file wasn't created, somone thought the lock was dead. + if (!sys::fs::exists(FileName)) + return Res_OwnerDied; + return Res_Success; + } + + // If the process owning the lock died without cleaning up, just bail out. + if (!processStillExecuting((*Owner).first, (*Owner).second)) + return Res_OwnerDied; + + // Exponentially increase the time we wait for the lock to be removed. +#ifdef _WIN32 + Interval *= 2; +#else + Interval.tv_sec *= 2; + Interval.tv_nsec *= 2; + if (Interval.tv_nsec >= 1000000000) { + ++Interval.tv_sec; + Interval.tv_nsec -= 1000000000; + } +#endif + } while ( +#ifdef _WIN32 + Interval < MaxSeconds * 1000 +#else + Interval.tv_sec < (time_t)MaxSeconds +#endif + ); + + // Give up. + return Res_Timeout; +} + +std::error_code LockFileManager::unsafeRemoveLockFile() { + return sys::fs::remove(LockFileName); +} diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp new file mode 100644 index 0000000000000..fe77cb3db4139 --- /dev/null +++ b/llvm/lib/Support/LowLevelType.cpp @@ -0,0 +1,55 @@ +//===-- llvm/Support/LowLevelType.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements the more header-heavy bits of the LLT class to +/// avoid polluting users' namespaces. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +LLT::LLT(MVT VT) { + if (VT.isVector()) { + init(/*IsPointer=*/false, VT.getVectorNumElements() > 1, + VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(), + /*AddressSpace=*/0); + } else if (VT.isValid()) { + // Aggregates are no different from real scalars as far as GlobalISel is + // concerned. + assert(VT.getSizeInBits() != 0 && "invalid zero-sized type"); + init(/*IsPointer=*/false, /*IsVector=*/false, /*NumElements=*/0, + VT.getSizeInBits(), /*AddressSpace=*/0); + } else { + IsPointer = false; + IsVector = false; + RawData = 0; + } +} + +void LLT::print(raw_ostream &OS) const { + if (isVector()) + OS << "<" << getNumElements() << " x " << getElementType() << ">"; + else if (isPointer()) + OS << "p" << getAddressSpace(); + else if (isValid()) { + assert(isScalar() && "unexpected type"); + OS << "s" << getScalarSizeInBits(); + } else + OS << "LLT_invalid"; +} + +const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; +const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; +const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo; diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp new file mode 100644 index 0000000000000..9b02f62912fa3 --- /dev/null +++ b/llvm/lib/Support/MD5.cpp @@ -0,0 +1,283 @@ +/* + * This code is derived from (original license follows): + * + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ + +#include "llvm/Support/MD5.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <array> +#include <cstdint> +#include <cstring> + +// The basic MD5 functions. + +// F and G are optimized compared to their RFC 1321 definitions for +// architectures that lack an AND-NOT instruction, just like in Colin Plumb's +// implementation. +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +// The MD5 transformation for all four rounds. +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +// SET reads 4 input bytes in little-endian byte order and stores them +// in a properly aligned word in host byte order. +#define SET(n) \ + (block[(n)] = \ + (MD5_u32plus) ptr[(n) * 4] | ((MD5_u32plus) ptr[(n) * 4 + 1] << 8) | \ + ((MD5_u32plus) ptr[(n) * 4 + 2] << 16) | \ + ((MD5_u32plus) ptr[(n) * 4 + 3] << 24)) +#define GET(n) (block[(n)]) + +using namespace llvm; + +/// This processes one or more 64-byte data blocks, but does NOT update +///the bit counters. There are no alignment requirements. +const uint8_t *MD5::body(ArrayRef<uint8_t> Data) { + const uint8_t *ptr; + MD5_u32plus a, b, c, d; + MD5_u32plus saved_a, saved_b, saved_c, saved_d; + unsigned long Size = Data.size(); + + ptr = Data.data(); + + a = this->a; + b = this->b; + c = this->c; + d = this->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + + // Round 1 + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + + // Round 2 + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + + // Round 3 + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23) + + // Round 4 + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (Size -= 64); + + this->a = a; + this->b = b; + this->c = c; + this->d = d; + + return ptr; +} + +MD5::MD5() = default; + +/// Incrementally add the bytes in \p Data to the hash. +void MD5::update(ArrayRef<uint8_t> Data) { + MD5_u32plus saved_lo; + unsigned long used, free; + const uint8_t *Ptr = Data.data(); + unsigned long Size = Data.size(); + + saved_lo = lo; + if ((lo = (saved_lo + Size) & 0x1fffffff) < saved_lo) + hi++; + hi += Size >> 29; + + used = saved_lo & 0x3f; + + if (used) { + free = 64 - used; + + if (Size < free) { + memcpy(&buffer[used], Ptr, Size); + return; + } + + memcpy(&buffer[used], Ptr, free); + Ptr = Ptr + free; + Size -= free; + body(makeArrayRef(buffer, 64)); + } + + if (Size >= 64) { + Ptr = body(makeArrayRef(Ptr, Size & ~(unsigned long) 0x3f)); + Size &= 0x3f; + } + + memcpy(buffer, Ptr, Size); +} + +/// Add the bytes in the StringRef \p Str to the hash. +// Note that this isn't a string and so this won't include any trailing NULL +// bytes. +void MD5::update(StringRef Str) { + ArrayRef<uint8_t> SVal((const uint8_t *)Str.data(), Str.size()); + update(SVal); +} + +/// Finish the hash and place the resulting hash into \p result. +/// \param Result is assumed to be a minimum of 16-bytes in size. +void MD5::final(MD5Result &Result) { + unsigned long used, free; + + used = lo & 0x3f; + + buffer[used++] = 0x80; + + free = 64 - used; + + if (free < 8) { + memset(&buffer[used], 0, free); + body(makeArrayRef(buffer, 64)); + used = 0; + free = 64; + } + + memset(&buffer[used], 0, free - 8); + + lo <<= 3; + support::endian::write32le(&buffer[56], lo); + support::endian::write32le(&buffer[60], hi); + + body(makeArrayRef(buffer, 64)); + + support::endian::write32le(&Result[0], a); + support::endian::write32le(&Result[4], b); + support::endian::write32le(&Result[8], c); + support::endian::write32le(&Result[12], d); +} + +SmallString<32> MD5::MD5Result::digest() const { + SmallString<32> Str; + raw_svector_ostream Res(Str); + for (int i = 0; i < 16; ++i) + Res << format("%.2x", Bytes[i]); + return Str; +} + +void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) { + Str = Result.digest(); +} + +std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) { + MD5 Hash; + Hash.update(Data); + MD5::MD5Result Res; + Hash.final(Res); + + return Res; +} diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp new file mode 100644 index 0000000000000..053493f72fb5e --- /dev/null +++ b/llvm/lib/Support/ManagedStatic.cpp @@ -0,0 +1,83 @@ +//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ManagedStatic class and llvm_shutdown(). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Threading.h" +#include <cassert> +#include <mutex> +using namespace llvm; + +static const ManagedStaticBase *StaticList = nullptr; +static std::recursive_mutex *ManagedStaticMutex = nullptr; +static llvm::once_flag mutex_init_flag; + +static void initializeMutex() { + ManagedStaticMutex = new std::recursive_mutex(); +} + +static std::recursive_mutex *getManagedStaticMutex() { + llvm::call_once(mutex_init_flag, initializeMutex); + return ManagedStaticMutex; +} + +void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), + void (*Deleter)(void*)) const { + assert(Creator); + if (llvm_is_multithreaded()) { + std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex()); + + if (!Ptr.load(std::memory_order_relaxed)) { + void *Tmp = Creator(); + + Ptr.store(Tmp, std::memory_order_release); + DeleterFn = Deleter; + + // Add to list of managed statics. + Next = StaticList; + StaticList = this; + } + } else { + assert(!Ptr && !DeleterFn && !Next && + "Partially initialized ManagedStatic!?"); + Ptr = Creator(); + DeleterFn = Deleter; + + // Add to list of managed statics. + Next = StaticList; + StaticList = this; + } +} + +void ManagedStaticBase::destroy() const { + assert(DeleterFn && "ManagedStatic not initialized correctly!"); + assert(StaticList == this && + "Not destroyed in reverse order of construction?"); + // Unlink from list. + StaticList = Next; + Next = nullptr; + + // Destroy memory. + DeleterFn(Ptr); + + // Cleanup. + Ptr = nullptr; + DeleterFn = nullptr; +} + +/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. +void llvm::llvm_shutdown() { + std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex()); + + while (StaticList) + StaticList->destroy(); +} diff --git a/llvm/lib/Support/MathExtras.cpp b/llvm/lib/Support/MathExtras.cpp new file mode 100644 index 0000000000000..87c7101c424b2 --- /dev/null +++ b/llvm/lib/Support/MathExtras.cpp @@ -0,0 +1,31 @@ +//===-- MathExtras.cpp - Implement the MathExtras header --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MathExtras.h header +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/MathExtras.h" + +#ifdef _MSC_VER +#include <limits> +#else +#include <math.h> +#endif + +namespace llvm { + +#if defined(_MSC_VER) + // Visual Studio defines the HUGE_VAL class of macros using purposeful + // constant arithmetic overflow, which it then warns on when encountered. + const float huge_valf = std::numeric_limits<float>::infinity(); +#else + const float huge_valf = HUGE_VALF; +#endif + +} diff --git a/llvm/lib/Support/Memory.cpp b/llvm/lib/Support/Memory.cpp new file mode 100644 index 0000000000000..581484268cd87 --- /dev/null +++ b/llvm/lib/Support/Memory.cpp @@ -0,0 +1,53 @@ +//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for allocating memory and dealing +// with memory mapped files +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Memory.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Valgrind.h" + +#ifndef NDEBUG +#include "llvm/Support/raw_ostream.h" +#endif // ifndef NDEBUG + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Memory.inc" +#endif +#ifdef _WIN32 +#include "Windows/Memory.inc" +#endif + +#ifndef NDEBUG + +namespace llvm { +namespace sys { + +raw_ostream &operator<<(raw_ostream &OS, const Memory::ProtectionFlags &PF) { + assert((PF & ~(Memory::MF_READ | Memory::MF_WRITE | Memory::MF_EXEC)) == 0 && + "Unrecognized flags"); + + return OS << (PF & Memory::MF_READ ? 'R' : '-') + << (PF & Memory::MF_WRITE ? 'W' : '-') + << (PF & Memory::MF_EXEC ? 'X' : '-'); +} + +raw_ostream &operator<<(raw_ostream &OS, const MemoryBlock &MB) { + return OS << "[ " << MB.base() << " .. " + << (void *)((char *)MB.base() + MB.allocatedSize()) << " ] (" + << MB.allocatedSize() << " bytes)"; +} + +} // end namespace sys +} // end namespace llvm + +#endif // ifndef NDEBUG diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp new file mode 100644 index 0000000000000..e4027ca7bbfd5 --- /dev/null +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -0,0 +1,525 @@ +//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MemoryBuffer interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Errno.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" +#include <cassert> +#include <cerrno> +#include <cstring> +#include <new> +#include <sys/types.h> +#include <system_error> +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MemoryBuffer implementation itself. +//===----------------------------------------------------------------------===// + +MemoryBuffer::~MemoryBuffer() { } + +/// init - Initialize this MemoryBuffer as a reference to externally allocated +/// memory, memory that we know is already null terminated. +void MemoryBuffer::init(const char *BufStart, const char *BufEnd, + bool RequiresNullTerminator) { + assert((!RequiresNullTerminator || BufEnd[0] == 0) && + "Buffer is not null terminated!"); + BufferStart = BufStart; + BufferEnd = BufEnd; +} + +//===----------------------------------------------------------------------===// +// MemoryBufferMem implementation. +//===----------------------------------------------------------------------===// + +/// CopyStringRef - Copies contents of a StringRef into a block of memory and +/// null-terminates it. +static void CopyStringRef(char *Memory, StringRef Data) { + if (!Data.empty()) + memcpy(Memory, Data.data(), Data.size()); + Memory[Data.size()] = 0; // Null terminate string. +} + +namespace { +struct NamedBufferAlloc { + const Twine &Name; + NamedBufferAlloc(const Twine &Name) : Name(Name) {} +}; +} + +void *operator new(size_t N, const NamedBufferAlloc &Alloc) { + SmallString<256> NameBuf; + StringRef NameRef = Alloc.Name.toStringRef(NameBuf); + + char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1)); + CopyStringRef(Mem + N, NameRef); + return Mem; +} + +namespace { +/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. +template<typename MB> +class MemoryBufferMem : public MB { +public: + MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) { + MemoryBuffer::init(InputData.begin(), InputData.end(), + RequiresNullTerminator); + } + + /// Disable sized deallocation for MemoryBufferMem, because it has + /// tail-allocated data. + void operator delete(void *p) { ::operator delete(p); } + + StringRef getBufferIdentifier() const override { + // The name is stored after the class itself. + return StringRef(reinterpret_cast<const char *>(this + 1)); + } + + MemoryBuffer::BufferKind getBufferKind() const override { + return MemoryBuffer::MemoryBuffer_Malloc; + } +}; +} + +template <typename MB> +static ErrorOr<std::unique_ptr<MB>> +getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, + uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile); + +std::unique_ptr<MemoryBuffer> +MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, + bool RequiresNullTerminator) { + auto *Ret = new (NamedBufferAlloc(BufferName)) + MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator); + return std::unique_ptr<MemoryBuffer>(Ret); +} + +std::unique_ptr<MemoryBuffer> +MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) { + return std::unique_ptr<MemoryBuffer>(getMemBuffer( + Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator)); +} + +static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> +getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) { + auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName); + if (!Buf) + return make_error_code(errc::not_enough_memory); + memcpy(Buf->getBufferStart(), InputData.data(), InputData.size()); + return std::move(Buf); +} + +std::unique_ptr<MemoryBuffer> +MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) { + auto Buf = getMemBufferCopyImpl(InputData, BufferName); + if (Buf) + return std::move(*Buf); + return nullptr; +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize, + bool RequiresNullTerminator) { + SmallString<256> NameBuf; + StringRef NameRef = Filename.toStringRef(NameBuf); + + if (NameRef == "-") + return getSTDIN(); + return getFile(Filename, FileSize, RequiresNullTerminator); +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize, + uint64_t Offset, bool IsVolatile) { + return getFileAux<MemoryBuffer>(FilePath, -1, MapSize, Offset, false, + IsVolatile); +} + +//===----------------------------------------------------------------------===// +// MemoryBuffer::getFile implementation. +//===----------------------------------------------------------------------===// + +namespace { +/// Memory maps a file descriptor using sys::fs::mapped_file_region. +/// +/// This handles converting the offset into a legal offset on the platform. +template<typename MB> +class MemoryBufferMMapFile : public MB { + sys::fs::mapped_file_region MFR; + + static uint64_t getLegalMapOffset(uint64_t Offset) { + return Offset & ~(sys::fs::mapped_file_region::alignment() - 1); + } + + static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) { + return Len + (Offset - getLegalMapOffset(Offset)); + } + + const char *getStart(uint64_t Len, uint64_t Offset) { + return MFR.const_data() + (Offset - getLegalMapOffset(Offset)); + } + +public: + MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len, + uint64_t Offset, std::error_code &EC) + : MFR(FD, MB::Mapmode, getLegalMapSize(Len, Offset), + getLegalMapOffset(Offset), EC) { + if (!EC) { + const char *Start = getStart(Len, Offset); + MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator); + } + } + + /// Disable sized deallocation for MemoryBufferMMapFile, because it has + /// tail-allocated data. + void operator delete(void *p) { ::operator delete(p); } + + StringRef getBufferIdentifier() const override { + // The name is stored after the class itself. + return StringRef(reinterpret_cast<const char *>(this + 1)); + } + + MemoryBuffer::BufferKind getBufferKind() const override { + return MemoryBuffer::MemoryBuffer_MMap; + } +}; +} + +static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> +getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) { + const ssize_t ChunkSize = 4096*4; + SmallString<ChunkSize> Buffer; + // Read into Buffer until we hit EOF. + for (;;) { + Buffer.reserve(Buffer.size() + ChunkSize); + Expected<size_t> ReadBytes = sys::fs::readNativeFile( + FD, makeMutableArrayRef(Buffer.end(), ChunkSize)); + if (!ReadBytes) + return errorToErrorCode(ReadBytes.takeError()); + if (*ReadBytes == 0) + break; + Buffer.set_size(Buffer.size() + *ReadBytes); + } + + return getMemBufferCopyImpl(Buffer, BufferName); +} + + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize, + bool RequiresNullTerminator, bool IsVolatile) { + return getFileAux<MemoryBuffer>(Filename, FileSize, FileSize, 0, + RequiresNullTerminator, IsVolatile); +} + +template <typename MB> +static ErrorOr<std::unique_ptr<MB>> +getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, + uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, + bool IsVolatile); + +template <typename MB> +static ErrorOr<std::unique_ptr<MB>> +getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, + uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) { + Expected<sys::fs::file_t> FDOrErr = + sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None); + if (!FDOrErr) + return errorToErrorCode(FDOrErr.takeError()); + sys::fs::file_t FD = *FDOrErr; + auto Ret = getOpenFileImpl<MB>(FD, Filename, FileSize, MapSize, Offset, + RequiresNullTerminator, IsVolatile); + sys::fs::closeFile(FD); + return Ret; +} + +ErrorOr<std::unique_ptr<WritableMemoryBuffer>> +WritableMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize, + bool IsVolatile) { + return getFileAux<WritableMemoryBuffer>(Filename, FileSize, FileSize, 0, + /*RequiresNullTerminator*/ false, + IsVolatile); +} + +ErrorOr<std::unique_ptr<WritableMemoryBuffer>> +WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, + uint64_t Offset, bool IsVolatile) { + return getFileAux<WritableMemoryBuffer>(Filename, -1, MapSize, Offset, false, + IsVolatile); +} + +std::unique_ptr<WritableMemoryBuffer> +WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) { + using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>; + // Allocate space for the MemoryBuffer, the data and the name. It is important + // that MemoryBuffer and data are aligned so PointerIntPair works with them. + // TODO: Is 16-byte alignment enough? We copy small object files with large + // alignment expectations into this buffer. + SmallString<256> NameBuf; + StringRef NameRef = BufferName.toStringRef(NameBuf); + size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16); + size_t RealLen = AlignedStringLen + Size + 1; + char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow)); + if (!Mem) + return nullptr; + + // The name is stored after the class itself. + CopyStringRef(Mem + sizeof(MemBuffer), NameRef); + + // The buffer begins after the name and must be aligned. + char *Buf = Mem + AlignedStringLen; + Buf[Size] = 0; // Null terminate buffer. + + auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true); + return std::unique_ptr<WritableMemoryBuffer>(Ret); +} + +std::unique_ptr<WritableMemoryBuffer> +WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) { + auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName); + if (!SB) + return nullptr; + memset(SB->getBufferStart(), 0, Size); + return SB; +} + +static bool shouldUseMmap(sys::fs::file_t FD, + size_t FileSize, + size_t MapSize, + off_t Offset, + bool RequiresNullTerminator, + int PageSize, + bool IsVolatile) { + // mmap may leave the buffer without null terminator if the file size changed + // by the time the last page is mapped in, so avoid it if the file size is + // likely to change. + if (IsVolatile) + return false; + + // We don't use mmap for small files because this can severely fragment our + // address space. + if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize) + return false; + + if (!RequiresNullTerminator) + return true; + + // If we don't know the file size, use fstat to find out. fstat on an open + // file descriptor is cheaper than stat on a random path. + // FIXME: this chunk of code is duplicated, but it avoids a fstat when + // RequiresNullTerminator = false and MapSize != -1. + if (FileSize == size_t(-1)) { + sys::fs::file_status Status; + if (sys::fs::status(FD, Status)) + return false; + FileSize = Status.getSize(); + } + + // If we need a null terminator and the end of the map is inside the file, + // we cannot use mmap. + size_t End = Offset + MapSize; + assert(End <= FileSize); + if (End != FileSize) + return false; + + // Don't try to map files that are exactly a multiple of the system page size + // if we need a null terminator. + if ((FileSize & (PageSize -1)) == 0) + return false; + +#if defined(__CYGWIN__) + // Don't try to map files that are exactly a multiple of the physical page size + // if we need a null terminator. + // FIXME: We should reorganize again getPageSize() on Win32. + if ((FileSize & (4096 - 1)) == 0) + return false; +#endif + + return true; +} + +static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> +getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize, + uint64_t Offset) { + Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite( + Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None); + if (!FDOrErr) + return errorToErrorCode(FDOrErr.takeError()); + sys::fs::file_t FD = *FDOrErr; + + // Default is to map the full file. + if (MapSize == uint64_t(-1)) { + // If we don't know the file size, use fstat to find out. fstat on an open + // file descriptor is cheaper than stat on a random path. + if (FileSize == uint64_t(-1)) { + sys::fs::file_status Status; + std::error_code EC = sys::fs::status(FD, Status); + if (EC) + return EC; + + // If this not a file or a block device (e.g. it's a named pipe + // or character device), we can't mmap it, so error out. + sys::fs::file_type Type = Status.type(); + if (Type != sys::fs::file_type::regular_file && + Type != sys::fs::file_type::block_file) + return make_error_code(errc::invalid_argument); + + FileSize = Status.getSize(); + } + MapSize = FileSize; + } + + std::error_code EC; + std::unique_ptr<WriteThroughMemoryBuffer> Result( + new (NamedBufferAlloc(Filename)) + MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize, + Offset, EC)); + if (EC) + return EC; + return std::move(Result); +} + +ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> +WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) { + return getReadWriteFile(Filename, FileSize, FileSize, 0); +} + +/// Map a subrange of the specified file as a WritableMemoryBuffer. +ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> +WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, + uint64_t Offset) { + return getReadWriteFile(Filename, -1, MapSize, Offset); +} + +template <typename MB> +static ErrorOr<std::unique_ptr<MB>> +getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, + uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, + bool IsVolatile) { + static int PageSize = sys::Process::getPageSizeEstimate(); + + // Default is to map the full file. + if (MapSize == uint64_t(-1)) { + // If we don't know the file size, use fstat to find out. fstat on an open + // file descriptor is cheaper than stat on a random path. + if (FileSize == uint64_t(-1)) { + sys::fs::file_status Status; + std::error_code EC = sys::fs::status(FD, Status); + if (EC) + return EC; + + // If this not a file or a block device (e.g. it's a named pipe + // or character device), we can't trust the size. Create the memory + // buffer by copying off the stream. + sys::fs::file_type Type = Status.type(); + if (Type != sys::fs::file_type::regular_file && + Type != sys::fs::file_type::block_file) + return getMemoryBufferForStream(FD, Filename); + + FileSize = Status.getSize(); + } + MapSize = FileSize; + } + + if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, + PageSize, IsVolatile)) { + std::error_code EC; + std::unique_ptr<MB> Result( + new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>( + RequiresNullTerminator, FD, MapSize, Offset, EC)); + if (!EC) + return std::move(Result); + } + + auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); + if (!Buf) { + // Failed to create a buffer. The only way it can fail is if + // new(std::nothrow) returns 0. + return make_error_code(errc::not_enough_memory); + } + + // Read until EOF, zero-initialize the rest. + MutableArrayRef<char> ToRead = Buf->getBuffer(); + while (!ToRead.empty()) { + Expected<size_t> ReadBytes = + sys::fs::readNativeFileSlice(FD, ToRead, Offset); + if (!ReadBytes) + return errorToErrorCode(ReadBytes.takeError()); + if (*ReadBytes == 0) { + std::memset(ToRead.data(), 0, ToRead.size()); + break; + } + ToRead = ToRead.drop_front(*ReadBytes); + Offset += *ReadBytes; + } + + return std::move(Buf); +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, + bool RequiresNullTerminator, bool IsVolatile) { + return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0, + RequiresNullTerminator, IsVolatile); +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize, + int64_t Offset, bool IsVolatile) { + assert(MapSize != uint64_t(-1)); + return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false, + IsVolatile); +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() { + // Read in all of the data from stdin, we cannot mmap stdin. + // + // FIXME: That isn't necessarily true, we should try to mmap stdin and + // fallback if it fails. + sys::ChangeStdinToBinary(); + + return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>"); +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getFileAsStream(const Twine &Filename) { + Expected<sys::fs::file_t> FDOrErr = + sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None); + if (!FDOrErr) + return errorToErrorCode(FDOrErr.takeError()); + sys::fs::file_t FD = *FDOrErr; + ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = + getMemoryBufferForStream(FD, Filename); + sys::fs::closeFile(FD); + return Ret; +} + +MemoryBufferRef MemoryBuffer::getMemBufferRef() const { + StringRef Data = getBuffer(); + StringRef Identifier = getBufferIdentifier(); + return MemoryBufferRef(Data, Identifier); +} + +SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {} diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp new file mode 100644 index 0000000000000..3731e0c563599 --- /dev/null +++ b/llvm/lib/Support/NativeFormatting.cpp @@ -0,0 +1,263 @@ +//===- NativeFormatting.cpp - Low level formatting helpers -------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/NativeFormatting.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Format.h" + +#include <float.h> + +using namespace llvm; + +template<typename T, std::size_t N> +static int format_to_buffer(T Value, char (&Buffer)[N]) { + char *EndPtr = std::end(Buffer); + char *CurPtr = EndPtr; + + do { + *--CurPtr = '0' + char(Value % 10); + Value /= 10; + } while (Value); + return EndPtr - CurPtr; +} + +static void writeWithCommas(raw_ostream &S, ArrayRef<char> Buffer) { + assert(!Buffer.empty()); + + ArrayRef<char> ThisGroup; + int InitialDigits = ((Buffer.size() - 1) % 3) + 1; + ThisGroup = Buffer.take_front(InitialDigits); + S.write(ThisGroup.data(), ThisGroup.size()); + + Buffer = Buffer.drop_front(InitialDigits); + assert(Buffer.size() % 3 == 0); + while (!Buffer.empty()) { + S << ','; + ThisGroup = Buffer.take_front(3); + S.write(ThisGroup.data(), 3); + Buffer = Buffer.drop_front(3); + } +} + +template <typename T> +static void write_unsigned_impl(raw_ostream &S, T N, size_t MinDigits, + IntegerStyle Style, bool IsNegative) { + static_assert(std::is_unsigned<T>::value, "Value is not unsigned!"); + + char NumberBuffer[128]; + std::memset(NumberBuffer, '0', sizeof(NumberBuffer)); + + size_t Len = 0; + Len = format_to_buffer(N, NumberBuffer); + + if (IsNegative) + S << '-'; + + if (Len < MinDigits && Style != IntegerStyle::Number) { + for (size_t I = Len; I < MinDigits; ++I) + S << '0'; + } + + if (Style == IntegerStyle::Number) { + writeWithCommas(S, ArrayRef<char>(std::end(NumberBuffer) - Len, Len)); + } else { + S.write(std::end(NumberBuffer) - Len, Len); + } +} + +template <typename T> +static void write_unsigned(raw_ostream &S, T N, size_t MinDigits, + IntegerStyle Style, bool IsNegative = false) { + // Output using 32-bit div/mod if possible. + if (N == static_cast<uint32_t>(N)) + write_unsigned_impl(S, static_cast<uint32_t>(N), MinDigits, Style, + IsNegative); + else + write_unsigned_impl(S, N, MinDigits, Style, IsNegative); +} + +template <typename T> +static void write_signed(raw_ostream &S, T N, size_t MinDigits, + IntegerStyle Style) { + static_assert(std::is_signed<T>::value, "Value is not signed!"); + + using UnsignedT = typename std::make_unsigned<T>::type; + + if (N >= 0) { + write_unsigned(S, static_cast<UnsignedT>(N), MinDigits, Style); + return; + } + + UnsignedT UN = -(UnsignedT)N; + write_unsigned(S, UN, MinDigits, Style, true); +} + +void llvm::write_integer(raw_ostream &S, unsigned int N, size_t MinDigits, + IntegerStyle Style) { + write_unsigned(S, N, MinDigits, Style); +} + +void llvm::write_integer(raw_ostream &S, int N, size_t MinDigits, + IntegerStyle Style) { + write_signed(S, N, MinDigits, Style); +} + +void llvm::write_integer(raw_ostream &S, unsigned long N, size_t MinDigits, + IntegerStyle Style) { + write_unsigned(S, N, MinDigits, Style); +} + +void llvm::write_integer(raw_ostream &S, long N, size_t MinDigits, + IntegerStyle Style) { + write_signed(S, N, MinDigits, Style); +} + +void llvm::write_integer(raw_ostream &S, unsigned long long N, size_t MinDigits, + IntegerStyle Style) { + write_unsigned(S, N, MinDigits, Style); +} + +void llvm::write_integer(raw_ostream &S, long long N, size_t MinDigits, + IntegerStyle Style) { + write_signed(S, N, MinDigits, Style); +} + +void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, + Optional<size_t> Width) { + const size_t kMaxWidth = 128u; + + size_t W = std::min(kMaxWidth, Width.getValueOr(0u)); + + unsigned Nibbles = (64 - countLeadingZeros(N) + 3) / 4; + bool Prefix = (Style == HexPrintStyle::PrefixLower || + Style == HexPrintStyle::PrefixUpper); + bool Upper = + (Style == HexPrintStyle::Upper || Style == HexPrintStyle::PrefixUpper); + unsigned PrefixChars = Prefix ? 2 : 0; + unsigned NumChars = + std::max(static_cast<unsigned>(W), std::max(1u, Nibbles) + PrefixChars); + + char NumberBuffer[kMaxWidth]; + ::memset(NumberBuffer, '0', llvm::array_lengthof(NumberBuffer)); + if (Prefix) + NumberBuffer[1] = 'x'; + char *EndPtr = NumberBuffer + NumChars; + char *CurPtr = EndPtr; + while (N) { + unsigned char x = static_cast<unsigned char>(N) % 16; + *--CurPtr = hexdigit(x, !Upper); + N /= 16; + } + + S.write(NumberBuffer, NumChars); +} + +void llvm::write_double(raw_ostream &S, double N, FloatStyle Style, + Optional<size_t> Precision) { + size_t Prec = Precision.getValueOr(getDefaultPrecision(Style)); + + if (std::isnan(N)) { + S << "nan"; + return; + } else if (std::isinf(N)) { + S << "INF"; + return; + } + + char Letter; + if (Style == FloatStyle::Exponent) + Letter = 'e'; + else if (Style == FloatStyle::ExponentUpper) + Letter = 'E'; + else + Letter = 'f'; + + SmallString<8> Spec; + llvm::raw_svector_ostream Out(Spec); + Out << "%." << Prec << Letter; + + if (Style == FloatStyle::Exponent || Style == FloatStyle::ExponentUpper) { +#ifdef _WIN32 +// On MSVCRT and compatible, output of %e is incompatible to Posix +// by default. Number of exponent digits should be at least 2. "%+03d" +// FIXME: Implement our formatter to here or Support/Format.h! +#if defined(__MINGW32__) + // FIXME: It should be generic to C++11. + if (N == 0.0 && std::signbit(N)) { + char NegativeZero[] = "-0.000000e+00"; + if (Style == FloatStyle::ExponentUpper) + NegativeZero[strlen(NegativeZero) - 4] = 'E'; + S << NegativeZero; + return; + } +#else + int fpcl = _fpclass(N); + + // negative zero + if (fpcl == _FPCLASS_NZ) { + char NegativeZero[] = "-0.000000e+00"; + if (Style == FloatStyle::ExponentUpper) + NegativeZero[strlen(NegativeZero) - 4] = 'E'; + S << NegativeZero; + return; + } +#endif + + char buf[32]; + unsigned len; + len = format(Spec.c_str(), N).snprint(buf, sizeof(buf)); + if (len <= sizeof(buf) - 2) { + if (len >= 5 && (buf[len - 5] == 'e' || buf[len - 5] == 'E') && + buf[len - 3] == '0') { + int cs = buf[len - 4]; + if (cs == '+' || cs == '-') { + int c1 = buf[len - 2]; + int c0 = buf[len - 1]; + if (isdigit(static_cast<unsigned char>(c1)) && + isdigit(static_cast<unsigned char>(c0))) { + // Trim leading '0': "...e+012" -> "...e+12\0" + buf[len - 3] = c1; + buf[len - 2] = c0; + buf[--len] = 0; + } + } + } + S << buf; + return; + } +#endif + } + + if (Style == FloatStyle::Percent) + N *= 100.0; + + char Buf[32]; + format(Spec.c_str(), N).snprint(Buf, sizeof(Buf)); + S << Buf; + if (Style == FloatStyle::Percent) + S << '%'; +} + +bool llvm::isPrefixedHexStyle(HexPrintStyle S) { + return (S == HexPrintStyle::PrefixLower || S == HexPrintStyle::PrefixUpper); +} + +size_t llvm::getDefaultPrecision(FloatStyle Style) { + switch (Style) { + case FloatStyle::Exponent: + case FloatStyle::ExponentUpper: + return 6; // Number of decimal places. + case FloatStyle::Fixed: + case FloatStyle::Percent: + return 2; // Number of decimal places. + } + LLVM_BUILTIN_UNREACHABLE; +} diff --git a/llvm/lib/Support/Optional.cpp b/llvm/lib/Support/Optional.cpp new file mode 100644 index 0000000000000..2425739c845d3 --- /dev/null +++ b/llvm/lib/Support/Optional.cpp @@ -0,0 +1,14 @@ +//===- Optional.cpp - Optional values ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Optional.h" +#include "llvm/Support/raw_ostream.h" + +llvm::raw_ostream &llvm::operator<<(raw_ostream &OS, NoneType) { + return OS << "None"; +} diff --git a/llvm/lib/Support/Options.cpp b/llvm/lib/Support/Options.cpp new file mode 100644 index 0000000000000..770b7381c20ee --- /dev/null +++ b/llvm/lib/Support/Options.cpp @@ -0,0 +1,32 @@ +//===- llvm/Support/Options.cpp - Debug options support ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the helper objects for defining debug options using the +// new API built on cl::opt, but not requiring the use of static globals. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Options.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +OptionRegistry::~OptionRegistry() { + for (auto IT = Options.begin(); IT != Options.end(); ++IT) + delete IT->second; +} + +void OptionRegistry::addOption(void *Key, cl::Option *O) { + assert(Options.find(Key) == Options.end() && + "Argument with this key already registerd"); + Options.insert(std::make_pair(Key, O)); +} + +static ManagedStatic<OptionRegistry> OR; + +OptionRegistry &OptionRegistry::instance() { return *OR; } diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp new file mode 100644 index 0000000000000..355c64b7d0793 --- /dev/null +++ b/llvm/lib/Support/Parallel.cpp @@ -0,0 +1,118 @@ +//===- llvm/Support/Parallel.cpp - Parallel algorithms --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Parallel.h" +#include "llvm/Config/llvm-config.h" + +#if LLVM_ENABLE_THREADS + +#include "llvm/Support/Threading.h" + +#include <atomic> +#include <stack> +#include <thread> + +namespace llvm { +namespace parallel { +namespace detail { + +namespace { + +/// An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() = default; + virtual void add(std::function<void()> func) = 0; + + static Executor *getDefaultExecutor(); +}; + +/// An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency()) + : Done(ThreadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, ThreadCount] { + for (size_t i = 1; i < ThreadCount; ++i) { + std::thread([=] { work(); }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() override { + std::unique_lock<std::mutex> Lock(Mutex); + Stop = true; + Lock.unlock(); + Cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function<void()> F) override { + std::unique_lock<std::mutex> Lock(Mutex); + WorkStack.push(F); + Lock.unlock(); + Cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop) + break; + auto Task = WorkStack.top(); + WorkStack.pop(); + Lock.unlock(); + Task(); + } + Done.dec(); + } + + std::atomic<bool> Stop{false}; + std::stack<std::function<void()>> WorkStack; + std::mutex Mutex; + std::condition_variable Cond; + parallel::detail::Latch Done; +}; + +Executor *Executor::getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +} // namespace + +static std::atomic<int> TaskGroupInstances; + +// Latch::sync() called by the dtor may cause one thread to block. If is a dead +// lock if all threads in the default executor are blocked. To prevent the dead +// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario +// of nested parallel_for_each(), only the outermost one runs parallelly. +TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {} +TaskGroup::~TaskGroup() { --TaskGroupInstances; } + +void TaskGroup::spawn(std::function<void()> F) { + if (Parallel) { + L.inc(); + Executor::getDefaultExecutor()->add([&, F] { + F(); + L.dec(); + }); + } else { + F(); + } +} + +} // namespace detail +} // namespace parallel +} // namespace llvm +#endif // LLVM_ENABLE_THREADS diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp new file mode 100644 index 0000000000000..14def83802daf --- /dev/null +++ b/llvm/lib/Support/Path.cpp @@ -0,0 +1,1248 @@ +//===-- Path.cpp - Implement OS Path Concept ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system Path API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Path.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include <cctype> +#include <cstring> + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; +using namespace llvm::support::endian; + +namespace { + using llvm::StringRef; + using llvm::sys::path::is_separator; + using llvm::sys::path::Style; + + inline Style real_style(Style style) { +#ifdef _WIN32 + return (style == Style::posix) ? Style::posix : Style::windows; +#else + return (style == Style::windows) ? Style::windows : Style::posix; +#endif + } + + inline const char *separators(Style style) { + if (real_style(style) == Style::windows) + return "\\/"; + return "/"; + } + + inline char preferred_separator(Style style) { + if (real_style(style) == Style::windows) + return '\\'; + return '/'; + } + + StringRef find_first_component(StringRef path, Style style) { + // Look for this first component in the following order. + // * empty (in this case we return an empty string) + // * either C: or {//,\\}net. + // * {/,\} + // * {file,directory}name + + if (path.empty()) + return path; + + if (real_style(style) == Style::windows) { + // C: + if (path.size() >= 2 && + std::isalpha(static_cast<unsigned char>(path[0])) && path[1] == ':') + return path.substr(0, 2); + } + + // //net + if ((path.size() > 2) && is_separator(path[0], style) && + path[0] == path[1] && !is_separator(path[2], style)) { + // Find the next directory separator. + size_t end = path.find_first_of(separators(style), 2); + return path.substr(0, end); + } + + // {/,\} + if (is_separator(path[0], style)) + return path.substr(0, 1); + + // * {file,directory}name + size_t end = path.find_first_of(separators(style)); + return path.substr(0, end); + } + + // Returns the first character of the filename in str. For paths ending in + // '/', it returns the position of the '/'. + size_t filename_pos(StringRef str, Style style) { + if (str.size() > 0 && is_separator(str[str.size() - 1], style)) + return str.size() - 1; + + size_t pos = str.find_last_of(separators(style), str.size() - 1); + + if (real_style(style) == Style::windows) { + if (pos == StringRef::npos) + pos = str.find_last_of(':', str.size() - 2); + } + + if (pos == StringRef::npos || (pos == 1 && is_separator(str[0], style))) + return 0; + + return pos + 1; + } + + // Returns the position of the root directory in str. If there is no root + // directory in str, it returns StringRef::npos. + size_t root_dir_start(StringRef str, Style style) { + // case "c:/" + if (real_style(style) == Style::windows) { + if (str.size() > 2 && str[1] == ':' && is_separator(str[2], style)) + return 2; + } + + // case "//net" + if (str.size() > 3 && is_separator(str[0], style) && str[0] == str[1] && + !is_separator(str[2], style)) { + return str.find_first_of(separators(style), 2); + } + + // case "/" + if (str.size() > 0 && is_separator(str[0], style)) + return 0; + + return StringRef::npos; + } + + // Returns the position past the end of the "parent path" of path. The parent + // path will not end in '/', unless the parent is the root directory. If the + // path has no parent, 0 is returned. + size_t parent_path_end(StringRef path, Style style) { + size_t end_pos = filename_pos(path, style); + + bool filename_was_sep = + path.size() > 0 && is_separator(path[end_pos], style); + + // Skip separators until we reach root dir (or the start of the string). + size_t root_dir_pos = root_dir_start(path, style); + while (end_pos > 0 && + (root_dir_pos == StringRef::npos || end_pos > root_dir_pos) && + is_separator(path[end_pos - 1], style)) + --end_pos; + + if (end_pos == root_dir_pos && !filename_was_sep) { + // We've reached the root dir and the input path was *not* ending in a + // sequence of slashes. Include the root dir in the parent path. + return root_dir_pos + 1; + } + + // Otherwise, just include before the last slash. + return end_pos; + } +} // end unnamed namespace + +enum FSEntity { + FS_Dir, + FS_File, + FS_Name +}; + +static std::error_code +createUniqueEntity(const Twine &Model, int &ResultFD, + SmallVectorImpl<char> &ResultPath, bool MakeAbsolute, + unsigned Mode, FSEntity Type, + sys::fs::OpenFlags Flags = sys::fs::OF_None) { + + // Limit the number of attempts we make, so that we don't infinite loop. E.g. + // "permission denied" could be for a specific file (so we retry with a + // different name) or for the whole directory (retry would always fail). + // Checking which is racy, so we try a number of times, then give up. + std::error_code EC; + for (int Retries = 128; Retries > 0; --Retries) { + sys::fs::createUniquePath(Model, ResultPath, MakeAbsolute); + // Try to open + create the file. + switch (Type) { + case FS_File: { + EC = sys::fs::openFileForReadWrite(Twine(ResultPath.begin()), ResultFD, + sys::fs::CD_CreateNew, Flags, Mode); + if (EC) { + // errc::permission_denied happens on Windows when we try to open a file + // that has been marked for deletion. + if (EC == errc::file_exists || EC == errc::permission_denied) + continue; + return EC; + } + + return std::error_code(); + } + + case FS_Name: { + EC = sys::fs::access(ResultPath.begin(), sys::fs::AccessMode::Exist); + if (EC == errc::no_such_file_or_directory) + return std::error_code(); + if (EC) + return EC; + continue; + } + + case FS_Dir: { + EC = sys::fs::create_directory(ResultPath.begin(), false); + if (EC) { + if (EC == errc::file_exists) + continue; + return EC; + } + return std::error_code(); + } + } + llvm_unreachable("Invalid Type"); + } + return EC; +} + +namespace llvm { +namespace sys { +namespace path { + +const_iterator begin(StringRef path, Style style) { + const_iterator i; + i.Path = path; + i.Component = find_first_component(path, style); + i.Position = 0; + i.S = style; + return i; +} + +const_iterator end(StringRef path) { + const_iterator i; + i.Path = path; + i.Position = path.size(); + return i; +} + +const_iterator &const_iterator::operator++() { + assert(Position < Path.size() && "Tried to increment past end!"); + + // Increment Position to past the current component + Position += Component.size(); + + // Check for end. + if (Position == Path.size()) { + Component = StringRef(); + return *this; + } + + // Both POSIX and Windows treat paths that begin with exactly two separators + // specially. + bool was_net = Component.size() > 2 && is_separator(Component[0], S) && + Component[1] == Component[0] && !is_separator(Component[2], S); + + // Handle separators. + if (is_separator(Path[Position], S)) { + // Root dir. + if (was_net || + // c:/ + (real_style(S) == Style::windows && Component.endswith(":"))) { + Component = Path.substr(Position, 1); + return *this; + } + + // Skip extra separators. + while (Position != Path.size() && is_separator(Path[Position], S)) { + ++Position; + } + + // Treat trailing '/' as a '.', unless it is the root dir. + if (Position == Path.size() && Component != "/") { + --Position; + Component = "."; + return *this; + } + } + + // Find next component. + size_t end_pos = Path.find_first_of(separators(S), Position); + Component = Path.slice(Position, end_pos); + + return *this; +} + +bool const_iterator::operator==(const const_iterator &RHS) const { + return Path.begin() == RHS.Path.begin() && Position == RHS.Position; +} + +ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const { + return Position - RHS.Position; +} + +reverse_iterator rbegin(StringRef Path, Style style) { + reverse_iterator I; + I.Path = Path; + I.Position = Path.size(); + I.S = style; + ++I; + return I; +} + +reverse_iterator rend(StringRef Path) { + reverse_iterator I; + I.Path = Path; + I.Component = Path.substr(0, 0); + I.Position = 0; + return I; +} + +reverse_iterator &reverse_iterator::operator++() { + size_t root_dir_pos = root_dir_start(Path, S); + + // Skip separators unless it's the root directory. + size_t end_pos = Position; + while (end_pos > 0 && (end_pos - 1) != root_dir_pos && + is_separator(Path[end_pos - 1], S)) + --end_pos; + + // Treat trailing '/' as a '.', unless it is the root dir. + if (Position == Path.size() && !Path.empty() && + is_separator(Path.back(), S) && + (root_dir_pos == StringRef::npos || end_pos - 1 > root_dir_pos)) { + --Position; + Component = "."; + return *this; + } + + // Find next separator. + size_t start_pos = filename_pos(Path.substr(0, end_pos), S); + Component = Path.slice(start_pos, end_pos); + Position = start_pos; + return *this; +} + +bool reverse_iterator::operator==(const reverse_iterator &RHS) const { + return Path.begin() == RHS.Path.begin() && Component == RHS.Component && + Position == RHS.Position; +} + +ptrdiff_t reverse_iterator::operator-(const reverse_iterator &RHS) const { + return Position - RHS.Position; +} + +StringRef root_path(StringRef path, Style style) { + const_iterator b = begin(path, style), pos = b, e = end(path); + if (b != e) { + bool has_net = + b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0]; + bool has_drive = (real_style(style) == Style::windows) && b->endswith(":"); + + if (has_net || has_drive) { + if ((++pos != e) && is_separator((*pos)[0], style)) { + // {C:/,//net/}, so get the first two components. + return path.substr(0, b->size() + pos->size()); + } else { + // just {C:,//net}, return the first component. + return *b; + } + } + + // POSIX style root directory. + if (is_separator((*b)[0], style)) { + return *b; + } + } + + return StringRef(); +} + +StringRef root_name(StringRef path, Style style) { + const_iterator b = begin(path, style), e = end(path); + if (b != e) { + bool has_net = + b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0]; + bool has_drive = (real_style(style) == Style::windows) && b->endswith(":"); + + if (has_net || has_drive) { + // just {C:,//net}, return the first component. + return *b; + } + } + + // No path or no name. + return StringRef(); +} + +StringRef root_directory(StringRef path, Style style) { + const_iterator b = begin(path, style), pos = b, e = end(path); + if (b != e) { + bool has_net = + b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0]; + bool has_drive = (real_style(style) == Style::windows) && b->endswith(":"); + + if ((has_net || has_drive) && + // {C:,//net}, skip to the next component. + (++pos != e) && is_separator((*pos)[0], style)) { + return *pos; + } + + // POSIX style root directory. + if (!has_net && is_separator((*b)[0], style)) { + return *b; + } + } + + // No path or no root. + return StringRef(); +} + +StringRef relative_path(StringRef path, Style style) { + StringRef root = root_path(path, style); + return path.substr(root.size()); +} + +void append(SmallVectorImpl<char> &path, Style style, const Twine &a, + const Twine &b, const Twine &c, const Twine &d) { + SmallString<32> a_storage; + SmallString<32> b_storage; + SmallString<32> c_storage; + SmallString<32> d_storage; + + SmallVector<StringRef, 4> components; + if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage)); + if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage)); + if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage)); + if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage)); + + for (auto &component : components) { + bool path_has_sep = + !path.empty() && is_separator(path[path.size() - 1], style); + if (path_has_sep) { + // Strip separators from beginning of component. + size_t loc = component.find_first_not_of(separators(style)); + StringRef c = component.substr(loc); + + // Append it. + path.append(c.begin(), c.end()); + continue; + } + + bool component_has_sep = + !component.empty() && is_separator(component[0], style); + if (!component_has_sep && + !(path.empty() || has_root_name(component, style))) { + // Add a separator. + path.push_back(preferred_separator(style)); + } + + path.append(component.begin(), component.end()); + } +} + +void append(SmallVectorImpl<char> &path, const Twine &a, const Twine &b, + const Twine &c, const Twine &d) { + append(path, Style::native, a, b, c, d); +} + +void append(SmallVectorImpl<char> &path, const_iterator begin, + const_iterator end, Style style) { + for (; begin != end; ++begin) + path::append(path, style, *begin); +} + +StringRef parent_path(StringRef path, Style style) { + size_t end_pos = parent_path_end(path, style); + if (end_pos == StringRef::npos) + return StringRef(); + else + return path.substr(0, end_pos); +} + +void remove_filename(SmallVectorImpl<char> &path, Style style) { + size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()), style); + if (end_pos != StringRef::npos) + path.set_size(end_pos); +} + +void replace_extension(SmallVectorImpl<char> &path, const Twine &extension, + Style style) { + StringRef p(path.begin(), path.size()); + SmallString<32> ext_storage; + StringRef ext = extension.toStringRef(ext_storage); + + // Erase existing extension. + size_t pos = p.find_last_of('.'); + if (pos != StringRef::npos && pos >= filename_pos(p, style)) + path.set_size(pos); + + // Append '.' if needed. + if (ext.size() > 0 && ext[0] != '.') + path.push_back('.'); + + // Append extension. + path.append(ext.begin(), ext.end()); +} + +void replace_path_prefix(SmallVectorImpl<char> &Path, + const StringRef &OldPrefix, const StringRef &NewPrefix, + Style style) { + if (OldPrefix.empty() && NewPrefix.empty()) + return; + + StringRef OrigPath(Path.begin(), Path.size()); + if (!OrigPath.startswith(OldPrefix)) + return; + + // If prefixes have the same size we can simply copy the new one over. + if (OldPrefix.size() == NewPrefix.size()) { + llvm::copy(NewPrefix, Path.begin()); + return; + } + + StringRef RelPath = OrigPath.substr(OldPrefix.size()); + SmallString<256> NewPath; + path::append(NewPath, style, NewPrefix); + path::append(NewPath, style, RelPath); + Path.swap(NewPath); +} + +void native(const Twine &path, SmallVectorImpl<char> &result, Style style) { + assert((!path.isSingleStringRef() || + path.getSingleStringRef().data() != result.data()) && + "path and result are not allowed to overlap!"); + // Clear result. + result.clear(); + path.toVector(result); + native(result, style); +} + +void native(SmallVectorImpl<char> &Path, Style style) { + if (Path.empty()) + return; + if (real_style(style) == Style::windows) { + std::replace(Path.begin(), Path.end(), '/', '\\'); + if (Path[0] == '~' && (Path.size() == 1 || is_separator(Path[1], style))) { + SmallString<128> PathHome; + home_directory(PathHome); + PathHome.append(Path.begin() + 1, Path.end()); + Path = PathHome; + } + } else { + for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) { + if (*PI == '\\') { + auto PN = PI + 1; + if (PN < PE && *PN == '\\') + ++PI; // increment once, the for loop will move over the escaped slash + else + *PI = '/'; + } + } + } +} + +std::string convert_to_slash(StringRef path, Style style) { + if (real_style(style) != Style::windows) + return path; + + std::string s = path.str(); + std::replace(s.begin(), s.end(), '\\', '/'); + return s; +} + +StringRef filename(StringRef path, Style style) { return *rbegin(path, style); } + +StringRef stem(StringRef path, Style style) { + StringRef fname = filename(path, style); + size_t pos = fname.find_last_of('.'); + if (pos == StringRef::npos) + return fname; + else + if ((fname.size() == 1 && fname == ".") || + (fname.size() == 2 && fname == "..")) + return fname; + else + return fname.substr(0, pos); +} + +StringRef extension(StringRef path, Style style) { + StringRef fname = filename(path, style); + size_t pos = fname.find_last_of('.'); + if (pos == StringRef::npos) + return StringRef(); + else + if ((fname.size() == 1 && fname == ".") || + (fname.size() == 2 && fname == "..")) + return StringRef(); + else + return fname.substr(pos); +} + +bool is_separator(char value, Style style) { + if (value == '/') + return true; + if (real_style(style) == Style::windows) + return value == '\\'; + return false; +} + +StringRef get_separator(Style style) { + if (real_style(style) == Style::windows) + return "\\"; + return "/"; +} + +bool has_root_name(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !root_name(p, style).empty(); +} + +bool has_root_directory(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !root_directory(p, style).empty(); +} + +bool has_root_path(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !root_path(p, style).empty(); +} + +bool has_relative_path(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !relative_path(p, style).empty(); +} + +bool has_filename(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !filename(p, style).empty(); +} + +bool has_parent_path(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !parent_path(p, style).empty(); +} + +bool has_stem(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !stem(p, style).empty(); +} + +bool has_extension(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !extension(p, style).empty(); +} + +bool is_absolute(const Twine &path, Style style) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + bool rootDir = has_root_directory(p, style); + bool rootName = + (real_style(style) != Style::windows) || has_root_name(p, style); + + return rootDir && rootName; +} + +bool is_relative(const Twine &path, Style style) { + return !is_absolute(path, style); +} + +StringRef remove_leading_dotslash(StringRef Path, Style style) { + // Remove leading "./" (or ".//" or "././" etc.) + while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1], style)) { + Path = Path.substr(2); + while (Path.size() > 0 && is_separator(Path[0], style)) + Path = Path.substr(1); + } + return Path; +} + +static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot, + Style style) { + SmallVector<StringRef, 16> components; + + // Skip the root path, then look for traversal in the components. + StringRef rel = path::relative_path(path, style); + for (StringRef C : + llvm::make_range(path::begin(rel, style), path::end(rel))) { + if (C == ".") + continue; + // Leading ".." will remain in the path unless it's at the root. + if (remove_dot_dot && C == "..") { + if (!components.empty() && components.back() != "..") { + components.pop_back(); + continue; + } + if (path::is_absolute(path, style)) + continue; + } + components.push_back(C); + } + + SmallString<256> buffer = path::root_path(path, style); + for (StringRef C : components) + path::append(buffer, style, C); + return buffer; +} + +bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot, + Style style) { + StringRef p(path.data(), path.size()); + + SmallString<256> result = remove_dots(p, remove_dot_dot, style); + if (result == path) + return false; + + path.swap(result); + return true; +} + +} // end namespace path + +namespace fs { + +std::error_code getUniqueID(const Twine Path, UniqueID &Result) { + file_status Status; + std::error_code EC = status(Path, Status); + if (EC) + return EC; + Result = Status.getUniqueID(); + return std::error_code(); +} + +void createUniquePath(const Twine &Model, SmallVectorImpl<char> &ResultPath, + bool MakeAbsolute) { + SmallString<128> ModelStorage; + Model.toVector(ModelStorage); + + if (MakeAbsolute) { + // Make model absolute by prepending a temp directory if it's not already. + if (!sys::path::is_absolute(Twine(ModelStorage))) { + SmallString<128> TDir; + sys::path::system_temp_directory(true, TDir); + sys::path::append(TDir, Twine(ModelStorage)); + ModelStorage.swap(TDir); + } + } + + ResultPath = ModelStorage; + ResultPath.push_back(0); + ResultPath.pop_back(); + + // Replace '%' with random chars. + for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) { + if (ModelStorage[i] == '%') + ResultPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15]; + } +} + +std::error_code createUniqueFile(const Twine &Model, int &ResultFd, + SmallVectorImpl<char> &ResultPath, + unsigned Mode) { + return createUniqueEntity(Model, ResultFd, ResultPath, false, Mode, FS_File); +} + +static std::error_code createUniqueFile(const Twine &Model, int &ResultFd, + SmallVectorImpl<char> &ResultPath, + unsigned Mode, OpenFlags Flags) { + return createUniqueEntity(Model, ResultFd, ResultPath, false, Mode, FS_File, + Flags); +} + +std::error_code createUniqueFile(const Twine &Model, + SmallVectorImpl<char> &ResultPath, + unsigned Mode) { + int FD; + auto EC = createUniqueFile(Model, FD, ResultPath, Mode); + if (EC) + return EC; + // FD is only needed to avoid race conditions. Close it right away. + close(FD); + return EC; +} + +static std::error_code +createTemporaryFile(const Twine &Model, int &ResultFD, + llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) { + SmallString<128> Storage; + StringRef P = Model.toNullTerminatedStringRef(Storage); + assert(P.find_first_of(separators(Style::native)) == StringRef::npos && + "Model must be a simple filename."); + // Use P.begin() so that createUniqueEntity doesn't need to recreate Storage. + return createUniqueEntity(P.begin(), ResultFD, ResultPath, true, + owner_read | owner_write, Type); +} + +static std::error_code +createTemporaryFile(const Twine &Prefix, StringRef Suffix, int &ResultFD, + llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) { + const char *Middle = Suffix.empty() ? "-%%%%%%" : "-%%%%%%."; + return createTemporaryFile(Prefix + Middle + Suffix, ResultFD, ResultPath, + Type); +} + +std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, + int &ResultFD, + SmallVectorImpl<char> &ResultPath) { + return createTemporaryFile(Prefix, Suffix, ResultFD, ResultPath, FS_File); +} + +std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, + SmallVectorImpl<char> &ResultPath) { + int FD; + auto EC = createTemporaryFile(Prefix, Suffix, FD, ResultPath); + if (EC) + return EC; + // FD is only needed to avoid race conditions. Close it right away. + close(FD); + return EC; +} + + +// This is a mkdtemp with a different pattern. We use createUniqueEntity mostly +// for consistency. We should try using mkdtemp. +std::error_code createUniqueDirectory(const Twine &Prefix, + SmallVectorImpl<char> &ResultPath) { + int Dummy; + return createUniqueEntity(Prefix + "-%%%%%%", Dummy, ResultPath, true, 0, + FS_Dir); +} + +std::error_code +getPotentiallyUniqueFileName(const Twine &Model, + SmallVectorImpl<char> &ResultPath) { + int Dummy; + return createUniqueEntity(Model, Dummy, ResultPath, false, 0, FS_Name); +} + +std::error_code +getPotentiallyUniqueTempFileName(const Twine &Prefix, StringRef Suffix, + SmallVectorImpl<char> &ResultPath) { + int Dummy; + return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name); +} + +void make_absolute(const Twine ¤t_directory, + SmallVectorImpl<char> &path) { + StringRef p(path.data(), path.size()); + + bool rootDirectory = path::has_root_directory(p); + bool rootName = path::has_root_name(p); + + // Already absolute. + if ((rootName || real_style(Style::native) != Style::windows) && + rootDirectory) + return; + + // All of the following conditions will need the current directory. + SmallString<128> current_dir; + current_directory.toVector(current_dir); + + // Relative path. Prepend the current directory. + if (!rootName && !rootDirectory) { + // Append path to the current directory. + path::append(current_dir, p); + // Set path to the result. + path.swap(current_dir); + return; + } + + if (!rootName && rootDirectory) { + StringRef cdrn = path::root_name(current_dir); + SmallString<128> curDirRootName(cdrn.begin(), cdrn.end()); + path::append(curDirRootName, p); + // Set path to the result. + path.swap(curDirRootName); + return; + } + + if (rootName && !rootDirectory) { + StringRef pRootName = path::root_name(p); + StringRef bRootDirectory = path::root_directory(current_dir); + StringRef bRelativePath = path::relative_path(current_dir); + StringRef pRelativePath = path::relative_path(p); + + SmallString<128> res; + path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath); + path.swap(res); + return; + } + + llvm_unreachable("All rootName and rootDirectory combinations should have " + "occurred above!"); +} + +std::error_code make_absolute(SmallVectorImpl<char> &path) { + if (path::is_absolute(path)) + return {}; + + SmallString<128> current_dir; + if (std::error_code ec = current_path(current_dir)) + return ec; + + make_absolute(current_dir, path); + return {}; +} + +std::error_code create_directories(const Twine &Path, bool IgnoreExisting, + perms Perms) { + SmallString<128> PathStorage; + StringRef P = Path.toStringRef(PathStorage); + + // Be optimistic and try to create the directory + std::error_code EC = create_directory(P, IgnoreExisting, Perms); + // If we succeeded, or had any error other than the parent not existing, just + // return it. + if (EC != errc::no_such_file_or_directory) + return EC; + + // We failed because of a no_such_file_or_directory, try to create the + // parent. + StringRef Parent = path::parent_path(P); + if (Parent.empty()) + return EC; + + if ((EC = create_directories(Parent, IgnoreExisting, Perms))) + return EC; + + return create_directory(P, IgnoreExisting, Perms); +} + +static std::error_code copy_file_internal(int ReadFD, int WriteFD) { + const size_t BufSize = 4096; + char *Buf = new char[BufSize]; + int BytesRead = 0, BytesWritten = 0; + for (;;) { + BytesRead = read(ReadFD, Buf, BufSize); + if (BytesRead <= 0) + break; + while (BytesRead) { + BytesWritten = write(WriteFD, Buf, BytesRead); + if (BytesWritten < 0) + break; + BytesRead -= BytesWritten; + } + if (BytesWritten < 0) + break; + } + delete[] Buf; + + if (BytesRead < 0 || BytesWritten < 0) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +#ifndef __APPLE__ +std::error_code copy_file(const Twine &From, const Twine &To) { + int ReadFD, WriteFD; + if (std::error_code EC = openFileForRead(From, ReadFD, OF_None)) + return EC; + if (std::error_code EC = + openFileForWrite(To, WriteFD, CD_CreateAlways, OF_None)) { + close(ReadFD); + return EC; + } + + std::error_code EC = copy_file_internal(ReadFD, WriteFD); + + close(ReadFD); + close(WriteFD); + + return EC; +} +#endif + +std::error_code copy_file(const Twine &From, int ToFD) { + int ReadFD; + if (std::error_code EC = openFileForRead(From, ReadFD, OF_None)) + return EC; + + std::error_code EC = copy_file_internal(ReadFD, ToFD); + + close(ReadFD); + + return EC; +} + +ErrorOr<MD5::MD5Result> md5_contents(int FD) { + MD5 Hash; + + constexpr size_t BufSize = 4096; + std::vector<uint8_t> Buf(BufSize); + int BytesRead = 0; + for (;;) { + BytesRead = read(FD, Buf.data(), BufSize); + if (BytesRead <= 0) + break; + Hash.update(makeArrayRef(Buf.data(), BytesRead)); + } + + if (BytesRead < 0) + return std::error_code(errno, std::generic_category()); + MD5::MD5Result Result; + Hash.final(Result); + return Result; +} + +ErrorOr<MD5::MD5Result> md5_contents(const Twine &Path) { + int FD; + if (auto EC = openFileForRead(Path, FD, OF_None)) + return EC; + + auto Result = md5_contents(FD); + close(FD); + return Result; +} + +bool exists(const basic_file_status &status) { + return status_known(status) && status.type() != file_type::file_not_found; +} + +bool status_known(const basic_file_status &s) { + return s.type() != file_type::status_error; +} + +file_type get_file_type(const Twine &Path, bool Follow) { + file_status st; + if (status(Path, st, Follow)) + return file_type::status_error; + return st.type(); +} + +bool is_directory(const basic_file_status &status) { + return status.type() == file_type::directory_file; +} + +std::error_code is_directory(const Twine &path, bool &result) { + file_status st; + if (std::error_code ec = status(path, st)) + return ec; + result = is_directory(st); + return std::error_code(); +} + +bool is_regular_file(const basic_file_status &status) { + return status.type() == file_type::regular_file; +} + +std::error_code is_regular_file(const Twine &path, bool &result) { + file_status st; + if (std::error_code ec = status(path, st)) + return ec; + result = is_regular_file(st); + return std::error_code(); +} + +bool is_symlink_file(const basic_file_status &status) { + return status.type() == file_type::symlink_file; +} + +std::error_code is_symlink_file(const Twine &path, bool &result) { + file_status st; + if (std::error_code ec = status(path, st, false)) + return ec; + result = is_symlink_file(st); + return std::error_code(); +} + +bool is_other(const basic_file_status &status) { + return exists(status) && + !is_regular_file(status) && + !is_directory(status); +} + +std::error_code is_other(const Twine &Path, bool &Result) { + file_status FileStatus; + if (std::error_code EC = status(Path, FileStatus)) + return EC; + Result = is_other(FileStatus); + return std::error_code(); +} + +void directory_entry::replace_filename(const Twine &Filename, file_type Type, + basic_file_status Status) { + SmallString<128> PathStr = path::parent_path(Path); + path::append(PathStr, Filename); + this->Path = PathStr.str(); + this->Type = Type; + this->Status = Status; +} + +ErrorOr<perms> getPermissions(const Twine &Path) { + file_status Status; + if (std::error_code EC = status(Path, Status)) + return EC; + + return Status.permissions(); +} + +} // end namespace fs +} // end namespace sys +} // end namespace llvm + +// Include the truly platform-specific parts. +#if defined(LLVM_ON_UNIX) +#include "Unix/Path.inc" +#endif +#if defined(_WIN32) +#include "Windows/Path.inc" +#endif + +namespace llvm { +namespace sys { +namespace fs { +TempFile::TempFile(StringRef Name, int FD) : TmpName(Name), FD(FD) {} +TempFile::TempFile(TempFile &&Other) { *this = std::move(Other); } +TempFile &TempFile::operator=(TempFile &&Other) { + TmpName = std::move(Other.TmpName); + FD = Other.FD; + Other.Done = true; + Other.FD = -1; + return *this; +} + +TempFile::~TempFile() { assert(Done); } + +Error TempFile::discard() { + Done = true; + if (FD != -1 && close(FD) == -1) { + std::error_code EC = std::error_code(errno, std::generic_category()); + return errorCodeToError(EC); + } + FD = -1; + +#ifdef _WIN32 + // On windows closing will remove the file. + TmpName = ""; + return Error::success(); +#else + // Always try to close and remove. + std::error_code RemoveEC; + if (!TmpName.empty()) { + RemoveEC = fs::remove(TmpName); + sys::DontRemoveFileOnSignal(TmpName); + if (!RemoveEC) + TmpName = ""; + } + return errorCodeToError(RemoveEC); +#endif +} + +Error TempFile::keep(const Twine &Name) { + assert(!Done); + Done = true; + // Always try to close and rename. +#ifdef _WIN32 + // If we can't cancel the delete don't rename. + auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); + std::error_code RenameEC = setDeleteDisposition(H, false); + if (!RenameEC) { + RenameEC = rename_fd(FD, Name); + // If rename failed because it's cross-device, copy instead + if (RenameEC == + std::error_code(ERROR_NOT_SAME_DEVICE, std::system_category())) { + RenameEC = copy_file(TmpName, Name); + setDeleteDisposition(H, true); + } + } + + // If we can't rename, discard the temporary file. + if (RenameEC) + setDeleteDisposition(H, true); +#else + std::error_code RenameEC = fs::rename(TmpName, Name); + if (RenameEC) { + // If we can't rename, try to copy to work around cross-device link issues. + RenameEC = sys::fs::copy_file(TmpName, Name); + // If we can't rename or copy, discard the temporary file. + if (RenameEC) + remove(TmpName); + } + sys::DontRemoveFileOnSignal(TmpName); +#endif + + if (!RenameEC) + TmpName = ""; + + if (close(FD) == -1) { + std::error_code EC(errno, std::generic_category()); + return errorCodeToError(EC); + } + FD = -1; + + return errorCodeToError(RenameEC); +} + +Error TempFile::keep() { + assert(!Done); + Done = true; + +#ifdef _WIN32 + auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); + if (std::error_code EC = setDeleteDisposition(H, false)) + return errorCodeToError(EC); +#else + sys::DontRemoveFileOnSignal(TmpName); +#endif + + TmpName = ""; + + if (close(FD) == -1) { + std::error_code EC(errno, std::generic_category()); + return errorCodeToError(EC); + } + FD = -1; + + return Error::success(); +} + +Expected<TempFile> TempFile::create(const Twine &Model, unsigned Mode) { + int FD; + SmallString<128> ResultPath; + if (std::error_code EC = + createUniqueFile(Model, FD, ResultPath, Mode, OF_Delete)) + return errorCodeToError(EC); + + TempFile Ret(ResultPath, FD); +#ifndef _WIN32 + if (sys::RemoveFileOnSignal(ResultPath)) { + // Make sure we delete the file when RemoveFileOnSignal fails. + consumeError(Ret.discard()); + std::error_code EC(errc::operation_not_permitted); + return errorCodeToError(EC); + } +#endif + return std::move(Ret); +} +} + +} // end namsspace sys +} // end namespace llvm diff --git a/llvm/lib/Support/PluginLoader.cpp b/llvm/lib/Support/PluginLoader.cpp new file mode 100644 index 0000000000000..6fe195ffda7ac --- /dev/null +++ b/llvm/lib/Support/PluginLoader.cpp @@ -0,0 +1,46 @@ +//===-- PluginLoader.cpp - Implement -load command line option ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the -load <plugin> command line option handler. +// +//===----------------------------------------------------------------------===// + +#define DONT_GET_PLUGIN_LOADER_OPTION +#include "llvm/Support/PluginLoader.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" +#include <vector> +using namespace llvm; + +static ManagedStatic<std::vector<std::string> > Plugins; +static ManagedStatic<sys::SmartMutex<true> > PluginsLock; + +void PluginLoader::operator=(const std::string &Filename) { + sys::SmartScopedLock<true> Lock(*PluginsLock); + std::string Error; + if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) { + errs() << "Error opening '" << Filename << "': " << Error + << "\n -load request ignored.\n"; + } else { + Plugins->push_back(Filename); + } +} + +unsigned PluginLoader::getNumPlugins() { + sys::SmartScopedLock<true> Lock(*PluginsLock); + return Plugins.isConstructed() ? Plugins->size() : 0; +} + +std::string &PluginLoader::getPlugin(unsigned num) { + sys::SmartScopedLock<true> Lock(*PluginsLock); + assert(Plugins.isConstructed() && num < Plugins->size() && + "Asking for an out of bounds plugin"); + return (*Plugins)[num]; +} diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp new file mode 100644 index 0000000000000..bfb238cc85391 --- /dev/null +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -0,0 +1,300 @@ +//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for dealing with the possibility of +// Unix signals occurring while your program is running. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm-c/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/Watchdog.h" +#include "llvm/Support/raw_ostream.h" + +#include <atomic> +#include <cstdarg> +#include <cstdio> +#include <tuple> + +#ifdef HAVE_CRASHREPORTERCLIENT_H +#include <CrashReporterClient.h> +#endif + +using namespace llvm; + +// If backtrace support is not enabled, compile out support for pretty stack +// traces. This has the secondary effect of not requiring thread local storage +// when backtrace support is disabled. +#if ENABLE_BACKTRACES + +// We need a thread local pointer to manage the stack of our stack trace +// objects, but we *really* cannot tolerate destructors running and do not want +// to pay any overhead of synchronizing. As a consequence, we use a raw +// thread-local variable. +static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr; + +// The use of 'volatile' here is to ensure that any particular thread always +// reloads the value of the counter. The 'std::atomic' allows us to specify that +// this variable is accessed in an unsychronized way (it's not actually +// synchronizing). This does technically mean that the value may not appear to +// be the same across threads running simultaneously on different CPUs, but in +// practice the worst that will happen is that we won't print a stack trace when +// we could have. +// +// This is initialized to 1 because 0 is used as a sentinel for "not enabled on +// the current thread". If the user happens to overflow an 'unsigned' with +// SIGINFO requests, it's possible that some threads will stop responding to it, +// but the program won't crash. +static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter = + ATOMIC_VAR_INIT(1); +static LLVM_THREAD_LOCAL unsigned ThreadLocalSigInfoGenerationCounter = 0; + +namespace llvm { +PrettyStackTraceEntry *ReverseStackTrace(PrettyStackTraceEntry *Head) { + PrettyStackTraceEntry *Prev = nullptr; + while (Head) + std::tie(Prev, Head, Head->NextEntry) = + std::make_tuple(Head, Head->NextEntry, Prev); + return Prev; +} +} + +static void PrintStack(raw_ostream &OS) { + // Print out the stack in reverse order. To avoid recursion (which is likely + // to fail if we crashed due to stack overflow), we do an up-front pass to + // reverse the stack, then print it, then reverse it again. + unsigned ID = 0; + SaveAndRestore<PrettyStackTraceEntry *> SavedStack{PrettyStackTraceHead, + nullptr}; + PrettyStackTraceEntry *ReversedStack = ReverseStackTrace(SavedStack.get()); + for (const PrettyStackTraceEntry *Entry = ReversedStack; Entry; + Entry = Entry->getNextEntry()) { + OS << ID++ << ".\t"; + sys::Watchdog W(5); + Entry->print(OS); + } + llvm::ReverseStackTrace(ReversedStack); +} + +/// Print the current stack trace to the specified stream. +/// +/// Marked NOINLINE so it can be called from debuggers. +LLVM_ATTRIBUTE_NOINLINE +static void PrintCurStackTrace(raw_ostream &OS) { + // Don't print an empty trace. + if (!PrettyStackTraceHead) return; + + // If there are pretty stack frames registered, walk and emit them. + OS << "Stack dump:\n"; + + PrintStack(OS); + OS.flush(); +} + +// Integrate with crash reporter libraries. +#if defined (__APPLE__) && defined(HAVE_CRASHREPORTERCLIENT_H) +// If any clients of llvm try to link to libCrashReporterClient.a themselves, +// only one crash info struct will be used. +extern "C" { +CRASH_REPORTER_CLIENT_HIDDEN +struct crashreporter_annotations_t gCRAnnotations + __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) +#if CRASHREPORTER_ANNOTATIONS_VERSION < 5 + = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 }; +#else + = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0, 0 }; +#endif +} +#elif defined(__APPLE__) && HAVE_CRASHREPORTER_INFO +extern "C" const char *__crashreporter_info__ + __attribute__((visibility("hidden"))) = 0; +asm(".desc ___crashreporter_info__, 0x10"); +#endif + +static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED; +static void setCrashLogMessage(const char *msg) { +#ifdef HAVE_CRASHREPORTERCLIENT_H + (void)CRSetCrashLogMessage(msg); +#elif HAVE_CRASHREPORTER_INFO + __crashreporter_info__ = msg; +#endif + // Don't reorder subsequent operations: whatever comes after might crash and + // we want the system crash handling to see the message we just set. + std::atomic_signal_fence(std::memory_order_seq_cst); +} + +#ifdef __APPLE__ +using CrashHandlerString = SmallString<2048>; +using CrashHandlerStringStorage = + std::aligned_storage<sizeof(CrashHandlerString), + alignof(CrashHandlerString)>::type; +static CrashHandlerStringStorage crashHandlerStringStorage; +#endif + +/// This callback is run if a fatal signal is delivered to the process, it +/// prints the pretty stack trace. +static void CrashHandler(void *) { +#ifndef __APPLE__ + // On non-apple systems, just emit the crash stack trace to stderr. + PrintCurStackTrace(errs()); +#else + // Emit the crash stack trace to a SmallString, put it where the system crash + // handling will find it, and also send it to stderr. + // + // The SmallString is fairly large in the hope that we don't allocate (we're + // handling a fatal signal, something is already pretty wrong, allocation + // might not work). Further, we don't use a magic static in case that's also + // borked. We leak any allocation that does occur because the program is about + // to die anyways. This is technically racy if we were handling two fatal + // signals, however if we're in that situation a race is the least of our + // worries. + auto &crashHandlerString = + *new (&crashHandlerStringStorage) CrashHandlerString; + + // If we crash while trying to print the stack trace, we still want the system + // crash handling to have some partial information. That'll work out as long + // as the SmallString doesn't allocate. If it does allocate then the system + // crash handling will see some garbage because the inline buffer now contains + // a pointer. + setCrashLogMessage(crashHandlerString.c_str()); + + { + raw_svector_ostream Stream(crashHandlerString); + PrintCurStackTrace(Stream); + } + + if (!crashHandlerString.empty()) { + setCrashLogMessage(crashHandlerString.c_str()); + errs() << crashHandlerString.str(); + } else + setCrashLogMessage("No crash information."); +#endif +} + +static void printForSigInfoIfNeeded() { + unsigned CurrentSigInfoGeneration = + GlobalSigInfoGenerationCounter.load(std::memory_order_relaxed); + if (ThreadLocalSigInfoGenerationCounter == 0 || + ThreadLocalSigInfoGenerationCounter == CurrentSigInfoGeneration) { + return; + } + + PrintCurStackTrace(errs()); + ThreadLocalSigInfoGenerationCounter = CurrentSigInfoGeneration; +} + +#endif // ENABLE_BACKTRACES + +PrettyStackTraceEntry::PrettyStackTraceEntry() { +#if ENABLE_BACKTRACES + // Handle SIGINFO first, because we haven't finished constructing yet. + printForSigInfoIfNeeded(); + // Link ourselves. + NextEntry = PrettyStackTraceHead; + PrettyStackTraceHead = this; +#endif +} + +PrettyStackTraceEntry::~PrettyStackTraceEntry() { +#if ENABLE_BACKTRACES + assert(PrettyStackTraceHead == this && + "Pretty stack trace entry destruction is out of order"); + PrettyStackTraceHead = NextEntry; + // Handle SIGINFO first, because we already started destructing. + printForSigInfoIfNeeded(); +#endif +} + +void PrettyStackTraceString::print(raw_ostream &OS) const { OS << Str << "\n"; } + +PrettyStackTraceFormat::PrettyStackTraceFormat(const char *Format, ...) { + va_list AP; + va_start(AP, Format); + const int SizeOrError = vsnprintf(nullptr, 0, Format, AP); + va_end(AP); + if (SizeOrError < 0) { + return; + } + + const int Size = SizeOrError + 1; // '\0' + Str.resize(Size); + va_start(AP, Format); + vsnprintf(Str.data(), Size, Format, AP); + va_end(AP); +} + +void PrettyStackTraceFormat::print(raw_ostream &OS) const { OS << Str << "\n"; } + +void PrettyStackTraceProgram::print(raw_ostream &OS) const { + OS << "Program arguments: "; + // Print the argument list. + for (unsigned i = 0, e = ArgC; i != e; ++i) + OS << ArgV[i] << ' '; + OS << '\n'; +} + +#if ENABLE_BACKTRACES +static bool RegisterCrashPrinter() { + sys::AddSignalHandler(CrashHandler, nullptr); + return false; +} +#endif + +void llvm::EnablePrettyStackTrace() { +#if ENABLE_BACKTRACES + // The first time this is called, we register the crash printer. + static bool HandlerRegistered = RegisterCrashPrinter(); + (void)HandlerRegistered; +#endif +} + +void llvm::EnablePrettyStackTraceOnSigInfoForThisThread(bool ShouldEnable) { +#if ENABLE_BACKTRACES + if (!ShouldEnable) { + ThreadLocalSigInfoGenerationCounter = 0; + return; + } + + // The first time this is called, we register the SIGINFO handler. + static bool HandlerRegistered = []{ + sys::SetInfoSignalFunction([]{ + GlobalSigInfoGenerationCounter.fetch_add(1, std::memory_order_relaxed); + }); + return false; + }(); + (void)HandlerRegistered; + + // Next, enable it for the current thread. + ThreadLocalSigInfoGenerationCounter = + GlobalSigInfoGenerationCounter.load(std::memory_order_relaxed); +#endif +} + +const void *llvm::SavePrettyStackState() { +#if ENABLE_BACKTRACES + return PrettyStackTraceHead; +#else + return nullptr; +#endif +} + +void llvm::RestorePrettyStackState(const void *Top) { +#if ENABLE_BACKTRACES + PrettyStackTraceHead = + static_cast<PrettyStackTraceEntry *>(const_cast<void *>(Top)); +#endif +} + +void LLVMEnablePrettyStackTrace() { + EnablePrettyStackTrace(); +} diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp new file mode 100644 index 0000000000000..5b64710081597 --- /dev/null +++ b/llvm/lib/Support/Process.cpp @@ -0,0 +1,97 @@ +//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system Process concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Process.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Config/config.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" + +using namespace llvm; +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +Optional<std::string> Process::FindInEnvPath(StringRef EnvName, + StringRef FileName) { + return FindInEnvPath(EnvName, FileName, {}); +} + +Optional<std::string> Process::FindInEnvPath(StringRef EnvName, + StringRef FileName, + ArrayRef<std::string> IgnoreList) { + assert(!path::is_absolute(FileName)); + Optional<std::string> FoundPath; + Optional<std::string> OptPath = Process::GetEnv(EnvName); + if (!OptPath.hasValue()) + return FoundPath; + + const char EnvPathSeparatorStr[] = {EnvPathSeparator, '\0'}; + SmallVector<StringRef, 8> Dirs; + SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr); + + for (StringRef Dir : Dirs) { + if (Dir.empty()) + continue; + + if (any_of(IgnoreList, [&](StringRef S) { return fs::equivalent(S, Dir); })) + continue; + + SmallString<128> FilePath(Dir); + path::append(FilePath, FileName); + if (fs::exists(Twine(FilePath))) { + FoundPath = FilePath.str(); + break; + } + } + + return FoundPath; +} + + +#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" + +#define ALLCOLORS(FGBG,BOLD) {\ + COLOR(FGBG, "0", BOLD),\ + COLOR(FGBG, "1", BOLD),\ + COLOR(FGBG, "2", BOLD),\ + COLOR(FGBG, "3", BOLD),\ + COLOR(FGBG, "4", BOLD),\ + COLOR(FGBG, "5", BOLD),\ + COLOR(FGBG, "6", BOLD),\ + COLOR(FGBG, "7", BOLD)\ + } + +static const char colorcodes[2][2][8][10] = { + { ALLCOLORS("3",""), ALLCOLORS("3","1;") }, + { ALLCOLORS("4",""), ALLCOLORS("4","1;") } +}; + +// A CMake option controls wheter we emit core dumps by default. An application +// may disable core dumps by calling Process::PreventCoreFiles(). +static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS; + +bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; } + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Process.inc" +#endif +#ifdef _WIN32 +#include "Windows/Process.inc" +#endif diff --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp new file mode 100644 index 0000000000000..0a9363c59fc68 --- /dev/null +++ b/llvm/lib/Support/Program.cpp @@ -0,0 +1,82 @@ +//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system Program concept. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Program.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/llvm-config.h" +#include <system_error> +using namespace llvm; +using namespace sys; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +static bool Execute(ProcessInfo &PI, StringRef Program, + ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env, + ArrayRef<Optional<StringRef>> Redirects, + unsigned MemoryLimit, std::string *ErrMsg); + +int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args, + Optional<ArrayRef<StringRef>> Env, + ArrayRef<Optional<StringRef>> Redirects, + unsigned SecondsToWait, unsigned MemoryLimit, + std::string *ErrMsg, bool *ExecutionFailed) { + assert(Redirects.empty() || Redirects.size() == 3); + ProcessInfo PI; + if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) { + if (ExecutionFailed) + *ExecutionFailed = false; + ProcessInfo Result = Wait( + PI, SecondsToWait, /*WaitUntilTerminates=*/SecondsToWait == 0, ErrMsg); + return Result.ReturnCode; + } + + if (ExecutionFailed) + *ExecutionFailed = true; + + return -1; +} + +ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef<StringRef> Args, + Optional<ArrayRef<StringRef>> Env, + ArrayRef<Optional<StringRef>> Redirects, + unsigned MemoryLimit, std::string *ErrMsg, + bool *ExecutionFailed) { + assert(Redirects.empty() || Redirects.size() == 3); + ProcessInfo PI; + if (ExecutionFailed) + *ExecutionFailed = false; + if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) + if (ExecutionFailed) + *ExecutionFailed = true; + + return PI; +} + +bool sys::commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef<const char *> Args) { + SmallVector<StringRef, 8> StringRefArgs; + StringRefArgs.reserve(Args.size()); + for (const char *A : Args) + StringRefArgs.emplace_back(A); + return commandLineFitsWithinSystemLimits(Program, StringRefArgs); +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Program.inc" +#endif +#ifdef _WIN32 +#include "Windows/Program.inc" +#endif diff --git a/llvm/lib/Support/RWMutex.cpp b/llvm/lib/Support/RWMutex.cpp new file mode 100644 index 0000000000000..5accf73e5f940 --- /dev/null +++ b/llvm/lib/Support/RWMutex.cpp @@ -0,0 +1,136 @@ +//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm::sys::RWMutex class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/RWMutex.h" +#include "llvm/Config/config.h" + +#if defined(LLVM_USE_RW_MUTEX_IMPL) +using namespace llvm; +using namespace sys; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +// Define all methods as no-ops if threading is explicitly disabled + +RWMutexImpl::RWMutexImpl() = default; +RWMutexImpl::~RWMutexImpl() = default; + +bool RWMutexImpl::lock_shared() { return true; } +bool RWMutexImpl::unlock_shared() { return true; } +bool RWMutexImpl::lock() { return true; } +bool RWMutexImpl::unlock() { return true; } + +#else + +#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT) + +#include <cassert> +#include <cstdlib> +#include <pthread.h> + +// Construct a RWMutex using pthread calls +RWMutexImpl::RWMutexImpl() +{ + // Declare the pthread_rwlock data structures + pthread_rwlock_t* rwlock = + static_cast<pthread_rwlock_t*>(safe_malloc(sizeof(pthread_rwlock_t))); + +#ifdef __APPLE__ + // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init. + bzero(rwlock, sizeof(pthread_rwlock_t)); +#endif + + // Initialize the rwlock + int errorcode = pthread_rwlock_init(rwlock, nullptr); + (void)errorcode; + assert(errorcode == 0); + + // Assign the data member + data_ = rwlock; +} + +// Destruct a RWMutex +RWMutexImpl::~RWMutexImpl() +{ + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != nullptr); + pthread_rwlock_destroy(rwlock); + free(rwlock); +} + +bool +RWMutexImpl::lock_shared() +{ + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != nullptr); + + int errorcode = pthread_rwlock_rdlock(rwlock); + return errorcode == 0; +} + +bool +RWMutexImpl::unlock_shared() +{ + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != nullptr); + + int errorcode = pthread_rwlock_unlock(rwlock); + return errorcode == 0; +} + +bool +RWMutexImpl::lock() +{ + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != nullptr); + + int errorcode = pthread_rwlock_wrlock(rwlock); + return errorcode == 0; +} + +bool +RWMutexImpl::unlock() +{ + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != nullptr); + + int errorcode = pthread_rwlock_unlock(rwlock); + return errorcode == 0; +} + +#else + +RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { } + +RWMutexImpl::~RWMutexImpl() { + delete static_cast<MutexImpl *>(data_); +} + +bool RWMutexImpl::lock_shared() { + return static_cast<MutexImpl *>(data_)->acquire(); +} + +bool RWMutexImpl::unlock_shared() { + return static_cast<MutexImpl *>(data_)->release(); +} + +bool RWMutexImpl::lock() { + return static_cast<MutexImpl *>(data_)->acquire(); +} + +bool RWMutexImpl::unlock() { + return static_cast<MutexImpl *>(data_)->release(); +} + +#endif +#endif +#endif diff --git a/llvm/lib/Support/RandomNumberGenerator.cpp b/llvm/lib/Support/RandomNumberGenerator.cpp new file mode 100644 index 0000000000000..09fad19799859 --- /dev/null +++ b/llvm/lib/Support/RandomNumberGenerator.cpp @@ -0,0 +1,84 @@ +//===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements deterministic random number generation (RNG). +// The current implementation is NOT cryptographically secure as it uses +// the C++11 <random> facilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#ifdef _WIN32 +#include "Windows/WindowsSupport.h" +#else +#include "Unix/Unix.h" +#endif + +using namespace llvm; + +#define DEBUG_TYPE "rng" + +static cl::opt<uint64_t> Seed("rng-seed", cl::value_desc("seed"), cl::Hidden, + cl::desc("Seed for the random number generator"), + cl::init(0)); + +RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) { + LLVM_DEBUG(if (Seed == 0) dbgs() + << "Warning! Using unseeded random number generator.\n"); + + // Combine seed and salts using std::seed_seq. + // Data: Seed-low, Seed-high, Salt + // Note: std::seed_seq can only store 32-bit values, even though we + // are using a 64-bit RNG. This isn't a problem since the Mersenne + // twister constructor copies these correctly into its initial state. + std::vector<uint32_t> Data; + Data.resize(2 + Salt.size()); + Data[0] = Seed; + Data[1] = Seed >> 32; + + llvm::copy(Salt, Data.begin() + 2); + + std::seed_seq SeedSeq(Data.begin(), Data.end()); + Generator.seed(SeedSeq); +} + +RandomNumberGenerator::result_type RandomNumberGenerator::operator()() { + return Generator(); +} + +// Get random vector of specified size +std::error_code llvm::getRandomBytes(void *Buffer, size_t Size) { +#ifdef _WIN32 + HCRYPTPROV hProvider; + if (CryptAcquireContext(&hProvider, 0, 0, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) { + ScopedCryptContext ScopedHandle(hProvider); + if (CryptGenRandom(hProvider, Size, static_cast<BYTE *>(Buffer))) + return std::error_code(); + } + return std::error_code(GetLastError(), std::system_category()); +#else + int Fd = open("/dev/urandom", O_RDONLY); + if (Fd != -1) { + std::error_code Ret; + ssize_t BytesRead = read(Fd, Buffer, Size); + if (BytesRead == -1) + Ret = std::error_code(errno, std::system_category()); + else if (BytesRead != static_cast<ssize_t>(Size)) + Ret = std::error_code(EIO, std::system_category()); + if (close(Fd) == -1) + Ret = std::error_code(errno, std::system_category()); + + return Ret; + } + return std::error_code(errno, std::system_category()); +#endif +} diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp new file mode 100644 index 0000000000000..8da345d4f1404 --- /dev/null +++ b/llvm/lib/Support/Regex.cpp @@ -0,0 +1,224 @@ +//===-- Regex.cpp - Regular Expression matcher implementation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a POSIX regular expression matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Regex.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include <string> + +// Important this comes last because it defines "_REGEX_H_". At least on +// Darwin, if included before any header that (transitively) includes +// xlocale.h, this will cause trouble, because of missing regex-related types. +#include "regex_impl.h" + +using namespace llvm; + +Regex::Regex() : preg(nullptr), error(REG_BADPAT) {} + +Regex::Regex(StringRef regex, unsigned Flags) { + unsigned flags = 0; + preg = new llvm_regex(); + preg->re_endp = regex.end(); + if (Flags & IgnoreCase) + flags |= REG_ICASE; + if (Flags & Newline) + flags |= REG_NEWLINE; + if (!(Flags & BasicRegex)) + flags |= REG_EXTENDED; + error = llvm_regcomp(preg, regex.data(), flags|REG_PEND); +} + +Regex::Regex(Regex &®ex) { + preg = regex.preg; + error = regex.error; + regex.preg = nullptr; + regex.error = REG_BADPAT; +} + +Regex::~Regex() { + if (preg) { + llvm_regfree(preg); + delete preg; + } +} + +namespace { + +/// Utility to convert a regex error code into a human-readable string. +void RegexErrorToString(int error, struct llvm_regex *preg, + std::string &Error) { + size_t len = llvm_regerror(error, preg, nullptr, 0); + + Error.resize(len - 1); + llvm_regerror(error, preg, &Error[0], len); +} + +} // namespace + +bool Regex::isValid(std::string &Error) const { + if (!error) + return true; + + RegexErrorToString(error, preg, Error); + return false; +} + +/// getNumMatches - In a valid regex, return the number of parenthesized +/// matches it contains. +unsigned Regex::getNumMatches() const { + return preg->re_nsub; +} + +bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches, + std::string *Error) const { + // Reset error, if given. + if (Error && !Error->empty()) + *Error = ""; + + // Check if the regex itself didn't successfully compile. + if (Error ? !isValid(*Error) : !isValid()) + return false; + + unsigned nmatch = Matches ? preg->re_nsub+1 : 0; + + // pmatch needs to have at least one element. + SmallVector<llvm_regmatch_t, 8> pm; + pm.resize(nmatch > 0 ? nmatch : 1); + pm[0].rm_so = 0; + pm[0].rm_eo = String.size(); + + int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + + // Failure to match is not an error, it's just a normal return value. + // Any other error code is considered abnormal, and is logged in the Error. + if (rc == REG_NOMATCH) + return false; + if (rc != 0) { + if (Error) + RegexErrorToString(error, preg, *Error); + return false; + } + + // There was a match. + + if (Matches) { // match position requested + Matches->clear(); + + for (unsigned i = 0; i != nmatch; ++i) { + if (pm[i].rm_so == -1) { + // this group didn't match + Matches->push_back(StringRef()); + continue; + } + assert(pm[i].rm_eo >= pm[i].rm_so); + Matches->push_back(StringRef(String.data()+pm[i].rm_so, + pm[i].rm_eo-pm[i].rm_so)); + } + } + + return true; +} + +std::string Regex::sub(StringRef Repl, StringRef String, + std::string *Error) const { + SmallVector<StringRef, 8> Matches; + + // Return the input if there was no match. + if (!match(String, &Matches, Error)) + return String; + + // Otherwise splice in the replacement string, starting with the prefix before + // the match. + std::string Res(String.begin(), Matches[0].begin()); + + // Then the replacement string, honoring possible substitutions. + while (!Repl.empty()) { + // Skip to the next escape. + std::pair<StringRef, StringRef> Split = Repl.split('\\'); + + // Add the skipped substring. + Res += Split.first; + + // Check for terminimation and trailing backslash. + if (Split.second.empty()) { + if (Repl.size() != Split.first.size() && + Error && Error->empty()) + *Error = "replacement string contained trailing backslash"; + break; + } + + // Otherwise update the replacement string and interpret escapes. + Repl = Split.second; + + // FIXME: We should have a StringExtras function for mapping C99 escapes. + switch (Repl[0]) { + // Treat all unrecognized characters as self-quoting. + default: + Res += Repl[0]; + Repl = Repl.substr(1); + break; + + // Single character escapes. + case 't': + Res += '\t'; + Repl = Repl.substr(1); + break; + case 'n': + Res += '\n'; + Repl = Repl.substr(1); + break; + + // Decimal escapes are backreferences. + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + // Extract the backreference number. + StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789")); + Repl = Repl.substr(Ref.size()); + + unsigned RefValue; + if (!Ref.getAsInteger(10, RefValue) && + RefValue < Matches.size()) + Res += Matches[RefValue]; + else if (Error && Error->empty()) + *Error = ("invalid backreference string '" + Twine(Ref) + "'").str(); + break; + } + } + } + + // And finally the suffix. + Res += StringRef(Matches[0].end(), String.end() - Matches[0].end()); + + return Res; +} + +// These are the special characters matched in functions like "p_ere_exp". +static const char RegexMetachars[] = "()^$|*+?.[]\\{}"; + +bool Regex::isLiteralERE(StringRef Str) { + // Check for regex metacharacters. This list was derived from our regex + // implementation in regcomp.c and double checked against the POSIX extended + // regular expression specification. + return Str.find_first_of(RegexMetachars) == StringRef::npos; +} + +std::string Regex::escape(StringRef String) { + std::string RegexStr; + for (unsigned i = 0, e = String.size(); i != e; ++i) { + if (strchr(RegexMetachars, String[i])) + RegexStr += '\\'; + RegexStr += String[i]; + } + + return RegexStr; +} diff --git a/llvm/lib/Support/SHA1.cpp b/llvm/lib/Support/SHA1.cpp new file mode 100644 index 0000000000000..47a5f07fbe7b1 --- /dev/null +++ b/llvm/lib/Support/SHA1.cpp @@ -0,0 +1,280 @@ +//====- SHA1.cpp - Private copy of the SHA1 implementation ---*- C++ -* ======// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This code is taken from public domain +// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c and +// http://cvsweb.netbsd.org/bsdweb.cgi/src/common/lib/libc/hash/sha1/sha1.c?rev=1.6) +// and modified by wrapping it in a C++ interface for LLVM, +// and removing unnecessary code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SHA1.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Host.h" +using namespace llvm; + +#include <stdint.h> +#include <string.h> + +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN +#define SHA_BIG_ENDIAN +#endif + +static uint32_t rol(uint32_t Number, int Bits) { + return (Number << Bits) | (Number >> (32 - Bits)); +} + +static uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; } + +static uint32_t blk(uint32_t *Buf, int I) { + Buf[I & 15] = rol(Buf[(I + 13) & 15] ^ Buf[(I + 8) & 15] ^ Buf[(I + 2) & 15] ^ + Buf[I & 15], + 1); + return Buf[I & 15]; +} + +static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, + int I, uint32_t *Buf) { + E += ((B & (C ^ D)) ^ D) + blk0(Buf, I) + 0x5A827999 + rol(A, 5); + B = rol(B, 30); +} + +static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, + int I, uint32_t *Buf) { + E += ((B & (C ^ D)) ^ D) + blk(Buf, I) + 0x5A827999 + rol(A, 5); + B = rol(B, 30); +} + +static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, + int I, uint32_t *Buf) { + E += (B ^ C ^ D) + blk(Buf, I) + 0x6ED9EBA1 + rol(A, 5); + B = rol(B, 30); +} + +static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, + int I, uint32_t *Buf) { + E += (((B | C) & D) | (B & C)) + blk(Buf, I) + 0x8F1BBCDC + rol(A, 5); + B = rol(B, 30); +} + +static void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, + int I, uint32_t *Buf) { + E += (B ^ C ^ D) + blk(Buf, I) + 0xCA62C1D6 + rol(A, 5); + B = rol(B, 30); +} + +/* code */ +#define SHA1_K0 0x5a827999 +#define SHA1_K20 0x6ed9eba1 +#define SHA1_K40 0x8f1bbcdc +#define SHA1_K60 0xca62c1d6 + +#define SEED_0 0x67452301 +#define SEED_1 0xefcdab89 +#define SEED_2 0x98badcfe +#define SEED_3 0x10325476 +#define SEED_4 0xc3d2e1f0 + +void SHA1::init() { + InternalState.State[0] = SEED_0; + InternalState.State[1] = SEED_1; + InternalState.State[2] = SEED_2; + InternalState.State[3] = SEED_3; + InternalState.State[4] = SEED_4; + InternalState.ByteCount = 0; + InternalState.BufferOffset = 0; +} + +void SHA1::hashBlock() { + uint32_t A = InternalState.State[0]; + uint32_t B = InternalState.State[1]; + uint32_t C = InternalState.State[2]; + uint32_t D = InternalState.State[3]; + uint32_t E = InternalState.State[4]; + + // 4 rounds of 20 operations each. Loop unrolled. + r0(A, B, C, D, E, 0, InternalState.Buffer.L); + r0(E, A, B, C, D, 1, InternalState.Buffer.L); + r0(D, E, A, B, C, 2, InternalState.Buffer.L); + r0(C, D, E, A, B, 3, InternalState.Buffer.L); + r0(B, C, D, E, A, 4, InternalState.Buffer.L); + r0(A, B, C, D, E, 5, InternalState.Buffer.L); + r0(E, A, B, C, D, 6, InternalState.Buffer.L); + r0(D, E, A, B, C, 7, InternalState.Buffer.L); + r0(C, D, E, A, B, 8, InternalState.Buffer.L); + r0(B, C, D, E, A, 9, InternalState.Buffer.L); + r0(A, B, C, D, E, 10, InternalState.Buffer.L); + r0(E, A, B, C, D, 11, InternalState.Buffer.L); + r0(D, E, A, B, C, 12, InternalState.Buffer.L); + r0(C, D, E, A, B, 13, InternalState.Buffer.L); + r0(B, C, D, E, A, 14, InternalState.Buffer.L); + r0(A, B, C, D, E, 15, InternalState.Buffer.L); + r1(E, A, B, C, D, 16, InternalState.Buffer.L); + r1(D, E, A, B, C, 17, InternalState.Buffer.L); + r1(C, D, E, A, B, 18, InternalState.Buffer.L); + r1(B, C, D, E, A, 19, InternalState.Buffer.L); + + r2(A, B, C, D, E, 20, InternalState.Buffer.L); + r2(E, A, B, C, D, 21, InternalState.Buffer.L); + r2(D, E, A, B, C, 22, InternalState.Buffer.L); + r2(C, D, E, A, B, 23, InternalState.Buffer.L); + r2(B, C, D, E, A, 24, InternalState.Buffer.L); + r2(A, B, C, D, E, 25, InternalState.Buffer.L); + r2(E, A, B, C, D, 26, InternalState.Buffer.L); + r2(D, E, A, B, C, 27, InternalState.Buffer.L); + r2(C, D, E, A, B, 28, InternalState.Buffer.L); + r2(B, C, D, E, A, 29, InternalState.Buffer.L); + r2(A, B, C, D, E, 30, InternalState.Buffer.L); + r2(E, A, B, C, D, 31, InternalState.Buffer.L); + r2(D, E, A, B, C, 32, InternalState.Buffer.L); + r2(C, D, E, A, B, 33, InternalState.Buffer.L); + r2(B, C, D, E, A, 34, InternalState.Buffer.L); + r2(A, B, C, D, E, 35, InternalState.Buffer.L); + r2(E, A, B, C, D, 36, InternalState.Buffer.L); + r2(D, E, A, B, C, 37, InternalState.Buffer.L); + r2(C, D, E, A, B, 38, InternalState.Buffer.L); + r2(B, C, D, E, A, 39, InternalState.Buffer.L); + + r3(A, B, C, D, E, 40, InternalState.Buffer.L); + r3(E, A, B, C, D, 41, InternalState.Buffer.L); + r3(D, E, A, B, C, 42, InternalState.Buffer.L); + r3(C, D, E, A, B, 43, InternalState.Buffer.L); + r3(B, C, D, E, A, 44, InternalState.Buffer.L); + r3(A, B, C, D, E, 45, InternalState.Buffer.L); + r3(E, A, B, C, D, 46, InternalState.Buffer.L); + r3(D, E, A, B, C, 47, InternalState.Buffer.L); + r3(C, D, E, A, B, 48, InternalState.Buffer.L); + r3(B, C, D, E, A, 49, InternalState.Buffer.L); + r3(A, B, C, D, E, 50, InternalState.Buffer.L); + r3(E, A, B, C, D, 51, InternalState.Buffer.L); + r3(D, E, A, B, C, 52, InternalState.Buffer.L); + r3(C, D, E, A, B, 53, InternalState.Buffer.L); + r3(B, C, D, E, A, 54, InternalState.Buffer.L); + r3(A, B, C, D, E, 55, InternalState.Buffer.L); + r3(E, A, B, C, D, 56, InternalState.Buffer.L); + r3(D, E, A, B, C, 57, InternalState.Buffer.L); + r3(C, D, E, A, B, 58, InternalState.Buffer.L); + r3(B, C, D, E, A, 59, InternalState.Buffer.L); + + r4(A, B, C, D, E, 60, InternalState.Buffer.L); + r4(E, A, B, C, D, 61, InternalState.Buffer.L); + r4(D, E, A, B, C, 62, InternalState.Buffer.L); + r4(C, D, E, A, B, 63, InternalState.Buffer.L); + r4(B, C, D, E, A, 64, InternalState.Buffer.L); + r4(A, B, C, D, E, 65, InternalState.Buffer.L); + r4(E, A, B, C, D, 66, InternalState.Buffer.L); + r4(D, E, A, B, C, 67, InternalState.Buffer.L); + r4(C, D, E, A, B, 68, InternalState.Buffer.L); + r4(B, C, D, E, A, 69, InternalState.Buffer.L); + r4(A, B, C, D, E, 70, InternalState.Buffer.L); + r4(E, A, B, C, D, 71, InternalState.Buffer.L); + r4(D, E, A, B, C, 72, InternalState.Buffer.L); + r4(C, D, E, A, B, 73, InternalState.Buffer.L); + r4(B, C, D, E, A, 74, InternalState.Buffer.L); + r4(A, B, C, D, E, 75, InternalState.Buffer.L); + r4(E, A, B, C, D, 76, InternalState.Buffer.L); + r4(D, E, A, B, C, 77, InternalState.Buffer.L); + r4(C, D, E, A, B, 78, InternalState.Buffer.L); + r4(B, C, D, E, A, 79, InternalState.Buffer.L); + + InternalState.State[0] += A; + InternalState.State[1] += B; + InternalState.State[2] += C; + InternalState.State[3] += D; + InternalState.State[4] += E; +} + +void SHA1::addUncounted(uint8_t Data) { +#ifdef SHA_BIG_ENDIAN + InternalState.Buffer.C[InternalState.BufferOffset] = Data; +#else + InternalState.Buffer.C[InternalState.BufferOffset ^ 3] = Data; +#endif + + InternalState.BufferOffset++; + if (InternalState.BufferOffset == BLOCK_LENGTH) { + hashBlock(); + InternalState.BufferOffset = 0; + } +} + +void SHA1::writebyte(uint8_t Data) { + ++InternalState.ByteCount; + addUncounted(Data); +} + +void SHA1::update(ArrayRef<uint8_t> Data) { + for (auto &C : Data) + writebyte(C); +} + +void SHA1::pad() { + // Implement SHA-1 padding (fips180-2 5.1.1) + + // Pad with 0x80 followed by 0x00 until the end of the block + addUncounted(0x80); + while (InternalState.BufferOffset != 56) + addUncounted(0x00); + + // Append length in the last 8 bytes + addUncounted(0); // We're only using 32 bit lengths + addUncounted(0); // But SHA-1 supports 64 bit lengths + addUncounted(0); // So zero pad the top bits + addUncounted(InternalState.ByteCount >> 29); // Shifting to multiply by 8 + addUncounted(InternalState.ByteCount >> + 21); // as SHA-1 supports bitstreams as well as + addUncounted(InternalState.ByteCount >> 13); // byte. + addUncounted(InternalState.ByteCount >> 5); + addUncounted(InternalState.ByteCount << 3); +} + +StringRef SHA1::final() { + // Pad to complete the last block + pad(); + +#ifdef SHA_BIG_ENDIAN + // Just copy the current state + for (int i = 0; i < 5; i++) { + HashResult[i] = InternalState.State[i]; + } +#else + // Swap byte order back + for (int i = 0; i < 5; i++) { + HashResult[i] = (((InternalState.State[i]) << 24) & 0xff000000) | + (((InternalState.State[i]) << 8) & 0x00ff0000) | + (((InternalState.State[i]) >> 8) & 0x0000ff00) | + (((InternalState.State[i]) >> 24) & 0x000000ff); + } +#endif + + // Return pointer to hash (20 characters) + return StringRef((char *)HashResult, HASH_LENGTH); +} + +StringRef SHA1::result() { + auto StateToRestore = InternalState; + + auto Hash = final(); + + // Restore the state + InternalState = StateToRestore; + + // Return pointer to hash (20 characters) + return Hash; +} + +std::array<uint8_t, 20> SHA1::hash(ArrayRef<uint8_t> Data) { + SHA1 Hash; + Hash.update(Data); + StringRef S = Hash.final(); + + std::array<uint8_t, 20> Arr; + memcpy(Arr.data(), S.data(), S.size()); + return Arr; +} diff --git a/llvm/lib/Support/ScaledNumber.cpp b/llvm/lib/Support/ScaledNumber.cpp new file mode 100644 index 0000000000000..54d4cc33410b1 --- /dev/null +++ b/llvm/lib/Support/ScaledNumber.cpp @@ -0,0 +1,323 @@ +//==- lib/Support/ScaledNumber.cpp - Support for scaled numbers -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of some scaled number algorithms. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ScaledNumber.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::ScaledNumbers; + +std::pair<uint64_t, int16_t> ScaledNumbers::multiply64(uint64_t LHS, + uint64_t RHS) { + // Separate into two 32-bit digits (U.L). + auto getU = [](uint64_t N) { return N >> 32; }; + auto getL = [](uint64_t N) { return N & UINT32_MAX; }; + uint64_t UL = getU(LHS), LL = getL(LHS), UR = getU(RHS), LR = getL(RHS); + + // Compute cross products. + uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; + + // Sum into two 64-bit digits. + uint64_t Upper = P1, Lower = P4; + auto addWithCarry = [&](uint64_t N) { + uint64_t NewLower = Lower + (getL(N) << 32); + Upper += getU(N) + (NewLower < Lower); + Lower = NewLower; + }; + addWithCarry(P2); + addWithCarry(P3); + + // Check whether the upper digit is empty. + if (!Upper) + return std::make_pair(Lower, 0); + + // Shift as little as possible to maximize precision. + unsigned LeadingZeros = countLeadingZeros(Upper); + int Shift = 64 - LeadingZeros; + if (LeadingZeros) + Upper = Upper << LeadingZeros | Lower >> Shift; + return getRounded(Upper, Shift, + Shift && (Lower & UINT64_C(1) << (Shift - 1))); +} + +static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); } + +std::pair<uint32_t, int16_t> ScaledNumbers::divide32(uint32_t Dividend, + uint32_t Divisor) { + assert(Dividend && "expected non-zero dividend"); + assert(Divisor && "expected non-zero divisor"); + + // Use 64-bit math and canonicalize the dividend to gain precision. + uint64_t Dividend64 = Dividend; + int Shift = 0; + if (int Zeros = countLeadingZeros(Dividend64)) { + Shift -= Zeros; + Dividend64 <<= Zeros; + } + uint64_t Quotient = Dividend64 / Divisor; + uint64_t Remainder = Dividend64 % Divisor; + + // If Quotient needs to be shifted, leave the rounding to getAdjusted(). + if (Quotient > UINT32_MAX) + return getAdjusted<uint32_t>(Quotient, Shift); + + // Round based on the value of the next bit. + return getRounded<uint32_t>(Quotient, Shift, Remainder >= getHalf(Divisor)); +} + +std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend, + uint64_t Divisor) { + assert(Dividend && "expected non-zero dividend"); + assert(Divisor && "expected non-zero divisor"); + + // Minimize size of divisor. + int Shift = 0; + if (int Zeros = countTrailingZeros(Divisor)) { + Shift -= Zeros; + Divisor >>= Zeros; + } + + // Check for powers of two. + if (Divisor == 1) + return std::make_pair(Dividend, Shift); + + // Maximize size of dividend. + if (int Zeros = countLeadingZeros(Dividend)) { + Shift -= Zeros; + Dividend <<= Zeros; + } + + // Start with the result of a divide. + uint64_t Quotient = Dividend / Divisor; + Dividend %= Divisor; + + // Continue building the quotient with long division. + while (!(Quotient >> 63) && Dividend) { + // Shift Dividend and check for overflow. + bool IsOverflow = Dividend >> 63; + Dividend <<= 1; + --Shift; + + // Get the next bit of Quotient. + Quotient <<= 1; + if (IsOverflow || Divisor <= Dividend) { + Quotient |= 1; + Dividend -= Divisor; + } + } + + return getRounded(Quotient, Shift, Dividend >= getHalf(Divisor)); +} + +int ScaledNumbers::compareImpl(uint64_t L, uint64_t R, int ScaleDiff) { + assert(ScaleDiff >= 0 && "wrong argument order"); + assert(ScaleDiff < 64 && "numbers too far apart"); + + uint64_t L_adjusted = L >> ScaleDiff; + if (L_adjusted < R) + return -1; + if (L_adjusted > R) + return 1; + + return L > L_adjusted << ScaleDiff ? 1 : 0; +} + +static void appendDigit(std::string &Str, unsigned D) { + assert(D < 10); + Str += '0' + D % 10; +} + +static void appendNumber(std::string &Str, uint64_t N) { + while (N) { + appendDigit(Str, N % 10); + N /= 10; + } +} + +static bool doesRoundUp(char Digit) { + switch (Digit) { + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } +} + +static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) { + assert(E >= ScaledNumbers::MinScale); + assert(E <= ScaledNumbers::MaxScale); + + // Find a new E, but don't let it increase past MaxScale. + int LeadingZeros = ScaledNumberBase::countLeadingZeros64(D); + int NewE = std::min(ScaledNumbers::MaxScale, E + 63 - LeadingZeros); + int Shift = 63 - (NewE - E); + assert(Shift <= LeadingZeros); + assert(Shift == LeadingZeros || NewE == ScaledNumbers::MaxScale); + assert(Shift >= 0 && Shift < 64 && "undefined behavior"); + D <<= Shift; + E = NewE; + + // Check for a denormal. + unsigned AdjustedE = E + 16383; + if (!(D >> 63)) { + assert(E == ScaledNumbers::MaxScale); + AdjustedE = 0; + } + + // Build the float and print it. + uint64_t RawBits[2] = {D, AdjustedE}; + APFloat Float(APFloat::x87DoubleExtended(), APInt(80, RawBits)); + SmallVector<char, 24> Chars; + Float.toString(Chars, Precision, 0); + return std::string(Chars.begin(), Chars.end()); +} + +static std::string stripTrailingZeros(const std::string &Float) { + size_t NonZero = Float.find_last_not_of('0'); + assert(NonZero != std::string::npos && "no . in floating point string"); + + if (Float[NonZero] == '.') + ++NonZero; + + return Float.substr(0, NonZero + 1); +} + +std::string ScaledNumberBase::toString(uint64_t D, int16_t E, int Width, + unsigned Precision) { + if (!D) + return "0.0"; + + // Canonicalize exponent and digits. + uint64_t Above0 = 0; + uint64_t Below0 = 0; + uint64_t Extra = 0; + int ExtraShift = 0; + if (E == 0) { + Above0 = D; + } else if (E > 0) { + if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) { + D <<= Shift; + E -= Shift; + + if (!E) + Above0 = D; + } + } else if (E > -64) { + Above0 = D >> -E; + Below0 = D << (64 + E); + } else if (E == -64) { + // Special case: shift by 64 bits is undefined behavior. + Below0 = D; + } else if (E > -120) { + Below0 = D >> (-E - 64); + Extra = D << (128 + E); + ExtraShift = -64 - E; + } + + // Fall back on APFloat for very small and very large numbers. + if (!Above0 && !Below0) + return toStringAPFloat(D, E, Precision); + + // Append the digits before the decimal. + std::string Str; + size_t DigitsOut = 0; + if (Above0) { + appendNumber(Str, Above0); + DigitsOut = Str.size(); + } else + appendDigit(Str, 0); + std::reverse(Str.begin(), Str.end()); + + // Return early if there's nothing after the decimal. + if (!Below0) + return Str + ".0"; + + // Append the decimal and beyond. + Str += '.'; + uint64_t Error = UINT64_C(1) << (64 - Width); + + // We need to shift Below0 to the right to make space for calculating + // digits. Save the precision we're losing in Extra. + Extra = (Below0 & 0xf) << 56 | (Extra >> 8); + Below0 >>= 4; + size_t SinceDot = 0; + size_t AfterDot = Str.size(); + do { + if (ExtraShift) { + --ExtraShift; + Error *= 5; + } else + Error *= 10; + + Below0 *= 10; + Extra *= 10; + Below0 += (Extra >> 60); + Extra = Extra & (UINT64_MAX >> 4); + appendDigit(Str, Below0 >> 60); + Below0 = Below0 & (UINT64_MAX >> 4); + if (DigitsOut || Str.back() != '0') + ++DigitsOut; + ++SinceDot; + } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 && + (!Precision || DigitsOut <= Precision || SinceDot < 2)); + + // Return early for maximum precision. + if (!Precision || DigitsOut <= Precision) + return stripTrailingZeros(Str); + + // Find where to truncate. + size_t Truncate = + std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1); + + // Check if there's anything to truncate. + if (Truncate >= Str.size()) + return stripTrailingZeros(Str); + + bool Carry = doesRoundUp(Str[Truncate]); + if (!Carry) + return stripTrailingZeros(Str.substr(0, Truncate)); + + // Round with the first truncated digit. + for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend(); + I != E; ++I) { + if (*I == '.') + continue; + if (*I == '9') { + *I = '0'; + continue; + } + + ++*I; + Carry = false; + break; + } + + // Add "1" in front if we still need to carry. + return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate)); +} + +raw_ostream &ScaledNumberBase::print(raw_ostream &OS, uint64_t D, int16_t E, + int Width, unsigned Precision) { + return OS << toString(D, E, Width, Precision); +} + +void ScaledNumberBase::dump(uint64_t D, int16_t E, int Width) { + print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E + << "]"; +} diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp new file mode 100644 index 0000000000000..981dfbff520a1 --- /dev/null +++ b/llvm/lib/Support/ScopedPrinter.cpp @@ -0,0 +1,46 @@ +#include "llvm/Support/ScopedPrinter.h" + +#include "llvm/Support/Format.h" +#include <cctype> + +using namespace llvm::support; + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value) { + OS << "0x" << to_hexString(Value.Value); + return OS; +} + +const std::string to_hexString(uint64_t Value, bool UpperCase) { + std::string number; + llvm::raw_string_ostream stream(number); + stream << format_hex_no_prefix(Value, 1, UpperCase); + return stream.str(); +} + +void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str, + ArrayRef<uint8_t> Data, bool Block, + uint32_t StartOffset) { + if (Data.size() > 16) + Block = true; + + if (Block) { + startLine() << Label; + if (!Str.empty()) + OS << ": " << Str; + OS << " (\n"; + if (!Data.empty()) + OS << format_bytes_with_ascii(Data, StartOffset, 16, 4, + (IndentLevel + 1) * 2, true) + << "\n"; + startLine() << ")\n"; + } else { + startLine() << Label << ":"; + if (!Str.empty()) + OS << " " << Str; + OS << " (" << format_bytes(Data, None, Data.size(), 1, 0, true) << ")\n"; + } +} + +} // namespace llvm diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp new file mode 100644 index 0000000000000..173a07f009d23 --- /dev/null +++ b/llvm/lib/Support/Signals.cpp @@ -0,0 +1,224 @@ +//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for dealing with the possibility of +// Unix signals occurring while your program is running. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Signals.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Options.h" +#include <vector> + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +using namespace llvm; + +// Use explicit storage to avoid accessing cl::opt in a signal handler. +static bool DisableSymbolicationFlag = false; +static cl::opt<bool, true> + DisableSymbolication("disable-symbolication", + cl::desc("Disable symbolizing crash backtraces."), + cl::location(DisableSymbolicationFlag), cl::Hidden); + +// Callbacks to run in signal handler must be lock-free because a signal handler +// could be running as we add new callbacks. We don't add unbounded numbers of +// callbacks, an array is therefore sufficient. +struct CallbackAndCookie { + sys::SignalHandlerCallback Callback; + void *Cookie; + enum class Status { Empty, Initializing, Initialized, Executing }; + std::atomic<Status> Flag; +}; +static constexpr size_t MaxSignalHandlerCallbacks = 8; +static CallbackAndCookie CallBacksToRun[MaxSignalHandlerCallbacks]; + +// Signal-safe. +void sys::RunSignalHandlers() { + for (size_t I = 0; I < MaxSignalHandlerCallbacks; ++I) { + auto &RunMe = CallBacksToRun[I]; + auto Expected = CallbackAndCookie::Status::Initialized; + auto Desired = CallbackAndCookie::Status::Executing; + if (!RunMe.Flag.compare_exchange_strong(Expected, Desired)) + continue; + (*RunMe.Callback)(RunMe.Cookie); + RunMe.Callback = nullptr; + RunMe.Cookie = nullptr; + RunMe.Flag.store(CallbackAndCookie::Status::Empty); + } +} + +// Signal-safe. +static void insertSignalHandler(sys::SignalHandlerCallback FnPtr, + void *Cookie) { + for (size_t I = 0; I < MaxSignalHandlerCallbacks; ++I) { + auto &SetMe = CallBacksToRun[I]; + auto Expected = CallbackAndCookie::Status::Empty; + auto Desired = CallbackAndCookie::Status::Initializing; + if (!SetMe.Flag.compare_exchange_strong(Expected, Desired)) + continue; + SetMe.Callback = FnPtr; + SetMe.Cookie = Cookie; + SetMe.Flag.store(CallbackAndCookie::Status::Initialized); + return; + } + report_fatal_error("too many signal callbacks already registered"); +} + +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool); + +/// Format a pointer value as hexadecimal. Zero pad it out so its always the +/// same width. +static FormattedNumber format_ptr(void *PC) { + // Each byte is two hex digits plus 2 for the 0x prefix. + unsigned PtrWidth = 2 + 2 * sizeof(void *); + return format_hex((uint64_t)PC, PtrWidth); +} + +/// Helper that launches llvm-symbolizer and symbolizes a backtrace. +LLVM_ATTRIBUTE_USED +static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, + int Depth, llvm::raw_ostream &OS) { + if (DisableSymbolicationFlag) + return false; + + // Don't recursively invoke the llvm-symbolizer binary. + if (Argv0.find("llvm-symbolizer") != std::string::npos) + return false; + + // FIXME: Subtract necessary number from StackTrace entries to turn return addresses + // into actual instruction addresses. + // Use llvm-symbolizer tool to symbolize the stack traces. First look for it + // alongside our binary, then in $PATH. + ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code(); + if (!Argv0.empty()) { + StringRef Parent = llvm::sys::path::parent_path(Argv0); + if (!Parent.empty()) + LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent); + } + if (!LLVMSymbolizerPathOrErr) + LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer"); + if (!LLVMSymbolizerPathOrErr) + return false; + const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; + + // If we don't know argv0 or the address of main() at this point, try + // to guess it anyway (it's possible on some platforms). + std::string MainExecutableName = + sys::fs::exists(Argv0) ? (std::string)Argv0 + : sys::fs::getMainExecutable(nullptr, nullptr); + BumpPtrAllocator Allocator; + StringSaver StrPool(Allocator); + std::vector<const char *> Modules(Depth, nullptr); + std::vector<intptr_t> Offsets(Depth, 0); + if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(), + MainExecutableName.c_str(), StrPool)) + return false; + int InputFD; + SmallString<32> InputFile, OutputFile; + sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile); + sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile); + FileRemover InputRemover(InputFile.c_str()); + FileRemover OutputRemover(OutputFile.c_str()); + + { + raw_fd_ostream Input(InputFD, true); + for (int i = 0; i < Depth; i++) { + if (Modules[i]) + Input << Modules[i] << " " << (void*)Offsets[i] << "\n"; + } + } + + Optional<StringRef> Redirects[] = {StringRef(InputFile), + StringRef(OutputFile), StringRef("")}; + StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining", +#ifdef _WIN32 + // Pass --relative-address on Windows so that we don't + // have to add ImageBase from PE file. + // FIXME: Make this the default for llvm-symbolizer. + "--relative-address", +#endif + "--demangle"}; + int RunResult = + sys::ExecuteAndWait(LLVMSymbolizerPath, Args, None, Redirects); + if (RunResult != 0) + return false; + + // This report format is based on the sanitizer stack trace printer. See + // sanitizer_stacktrace_printer.cc in compiler-rt. + auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return false; + StringRef Output = OutputBuf.get()->getBuffer(); + SmallVector<StringRef, 32> Lines; + Output.split(Lines, "\n"); + auto CurLine = Lines.begin(); + int frame_no = 0; + for (int i = 0; i < Depth; i++) { + auto PrintLineHeader = [&]() { + OS << right_justify(formatv("#{0}", frame_no++).str(), + std::log10(Depth) + 2) + << ' ' << format_ptr(StackTrace[i]) << ' '; + }; + if (!Modules[i]) { + PrintLineHeader(); + OS << '\n'; + continue; + } + // Read pairs of lines (function name and file/line info) until we + // encounter empty line. + for (;;) { + if (CurLine == Lines.end()) + return false; + StringRef FunctionName = *CurLine++; + if (FunctionName.empty()) + break; + PrintLineHeader(); + if (!FunctionName.startswith("??")) + OS << FunctionName << ' '; + if (CurLine == Lines.end()) + return false; + StringRef FileLineInfo = *CurLine++; + if (!FileLineInfo.startswith("??")) + OS << FileLineInfo; + else + OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")"; + OS << "\n"; + } + } + return true; +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Signals.inc" +#endif +#ifdef _WIN32 +#include "Windows/Signals.inc" +#endif diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp new file mode 100644 index 0000000000000..aa159e1da2ae8 --- /dev/null +++ b/llvm/lib/Support/Signposts.cpp @@ -0,0 +1,121 @@ +//===-- Signposts.cpp - Interval debug annotations ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Signposts.h" +#include "llvm/Support/Timer.h" + +#include "llvm/Config/config.h" +#if LLVM_SUPPORT_XCODE_SIGNPOSTS +#include "llvm/ADT/DenseMap.h" +#include <os/signpost.h> +#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS + +using namespace llvm; + +#if LLVM_SUPPORT_XCODE_SIGNPOSTS +namespace { +os_log_t *LogCreator() { + os_log_t *X = new os_log_t; + *X = os_log_create("org.llvm.signposts", OS_LOG_CATEGORY_POINTS_OF_INTEREST); + return X; +} +void LogDeleter(os_log_t *X) { + os_release(*X); + delete X; +} +} // end anonymous namespace + +namespace llvm { +class SignpostEmitterImpl { + using LogPtrTy = + std::unique_ptr<os_log_t, std::function<void(os_log_t *)>>; + using LogTy = LogPtrTy::element_type; + + LogPtrTy SignpostLog; + DenseMap<const Timer *, os_signpost_id_t> Signposts; + + LogTy &getLogger() const { return *SignpostLog; } + os_signpost_id_t getSignpostForTimer(const Timer *T) { + const auto &I = Signposts.find(T); + if (I != Signposts.end()) + return I->second; + + const auto &Inserted = Signposts.insert( + std::make_pair(T, os_signpost_id_make_with_pointer(getLogger(), T))); + return Inserted.first->second; + } + +public: + SignpostEmitterImpl() : SignpostLog(LogCreator(), LogDeleter), Signposts() {} + + bool isEnabled() const { return os_signpost_enabled(*SignpostLog); } + + void startTimerInterval(Timer *T) { + if (isEnabled()) { + // Both strings used here are required to be constant literal strings + os_signpost_interval_begin(getLogger(), getSignpostForTimer(T), + "Pass Timers", "Begin %s", + T->getName().c_str()); + } + } + + void endTimerInterval(Timer *T) { + if (isEnabled()) { + // Both strings used here are required to be constant literal strings + os_signpost_interval_end(getLogger(), getSignpostForTimer(T), + "Pass Timers", "End %s", T->getName().c_str()); + } + } +}; +} // end namespace llvm +#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS + +#if LLVM_SUPPORT_XCODE_SIGNPOSTS +#define HAVE_ANY_SIGNPOST_IMPL 1 +#else +#define HAVE_ANY_SIGNPOST_IMPL 0 +#endif + +SignpostEmitter::SignpostEmitter() { +#if HAVE_ANY_SIGNPOST_IMPL + Impl = new SignpostEmitterImpl(); +#else // if HAVE_ANY_SIGNPOST_IMPL + Impl = nullptr; +#endif // if !HAVE_ANY_SIGNPOST_IMPL +} + +SignpostEmitter::~SignpostEmitter() { +#if HAVE_ANY_SIGNPOST_IMPL + delete Impl; +#endif // if HAVE_ANY_SIGNPOST_IMPL +} + +bool SignpostEmitter::isEnabled() const { +#if HAVE_ANY_SIGNPOST_IMPL + return Impl->isEnabled(); +#else + return false; +#endif // if !HAVE_ANY_SIGNPOST_IMPL +} + +void SignpostEmitter::startTimerInterval(Timer *T) { +#if HAVE_ANY_SIGNPOST_IMPL + if (Impl == nullptr) + return; + return Impl->startTimerInterval(T); +#endif // if !HAVE_ANY_SIGNPOST_IMPL +} + +void SignpostEmitter::endTimerInterval(Timer *T) { +#if HAVE_ANY_SIGNPOST_IMPL + if (Impl == nullptr) + return; + Impl->endTimerInterval(T); +#endif // if !HAVE_ANY_SIGNPOST_IMPL +} diff --git a/llvm/lib/Support/SmallPtrSet.cpp b/llvm/lib/Support/SmallPtrSet.cpp new file mode 100644 index 0000000000000..f60464c8e7561 --- /dev/null +++ b/llvm/lib/Support/SmallPtrSet.cpp @@ -0,0 +1,270 @@ +//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SmallPtrSet class. See SmallPtrSet.h for an +// overview of the algorithm. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstdlib> + +using namespace llvm; + +void SmallPtrSetImplBase::shrink_and_clear() { + assert(!isSmall() && "Can't shrink a small set!"); + free(CurArray); + + // Reduce the number of buckets. + unsigned Size = size(); + CurArraySize = Size > 16 ? 1 << (Log2_32_Ceil(Size) + 1) : 32; + NumNonEmpty = NumTombstones = 0; + + // Install the new array. Clear all the buckets to empty. + CurArray = (const void**)safe_malloc(sizeof(void*) * CurArraySize); + + memset(CurArray, -1, CurArraySize*sizeof(void*)); +} + +std::pair<const void *const *, bool> +SmallPtrSetImplBase::insert_imp_big(const void *Ptr) { + if (LLVM_UNLIKELY(size() * 4 >= CurArraySize * 3)) { + // If more than 3/4 of the array is full, grow. + Grow(CurArraySize < 64 ? 128 : CurArraySize * 2); + } else if (LLVM_UNLIKELY(CurArraySize - NumNonEmpty < CurArraySize / 8)) { + // If fewer of 1/8 of the array is empty (meaning that many are filled with + // tombstones), rehash. + Grow(CurArraySize); + } + + // Okay, we know we have space. Find a hash bucket. + const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr)); + if (*Bucket == Ptr) + return std::make_pair(Bucket, false); // Already inserted, good. + + // Otherwise, insert it! + if (*Bucket == getTombstoneMarker()) + --NumTombstones; + else + ++NumNonEmpty; // Track density. + *Bucket = Ptr; + incrementEpoch(); + return std::make_pair(Bucket, true); +} + +const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const { + unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1); + unsigned ArraySize = CurArraySize; + unsigned ProbeAmt = 1; + const void *const *Array = CurArray; + const void *const *Tombstone = nullptr; + while (true) { + // If we found an empty bucket, the pointer doesn't exist in the set. + // Return a tombstone if we've seen one so far, or the empty bucket if + // not. + if (LLVM_LIKELY(Array[Bucket] == getEmptyMarker())) + return Tombstone ? Tombstone : Array+Bucket; + + // Found Ptr's bucket? + if (LLVM_LIKELY(Array[Bucket] == Ptr)) + return Array+Bucket; + + // If this is a tombstone, remember it. If Ptr ends up not in the set, we + // prefer to return it than something that would require more probing. + if (Array[Bucket] == getTombstoneMarker() && !Tombstone) + Tombstone = Array+Bucket; // Remember the first tombstone found. + + // It's a hash collision or a tombstone. Reprobe. + Bucket = (Bucket + ProbeAmt++) & (ArraySize-1); + } +} + +/// Grow - Allocate a larger backing store for the buckets and move it over. +/// +void SmallPtrSetImplBase::Grow(unsigned NewSize) { + const void **OldBuckets = CurArray; + const void **OldEnd = EndPointer(); + bool WasSmall = isSmall(); + + // Install the new array. Clear all the buckets to empty. + const void **NewBuckets = (const void**) safe_malloc(sizeof(void*) * NewSize); + + // Reset member only if memory was allocated successfully + CurArray = NewBuckets; + CurArraySize = NewSize; + memset(CurArray, -1, NewSize*sizeof(void*)); + + // Copy over all valid entries. + for (const void **BucketPtr = OldBuckets; BucketPtr != OldEnd; ++BucketPtr) { + // Copy over the element if it is valid. + const void *Elt = *BucketPtr; + if (Elt != getTombstoneMarker() && Elt != getEmptyMarker()) + *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt); + } + + if (!WasSmall) + free(OldBuckets); + NumNonEmpty -= NumTombstones; + NumTombstones = 0; +} + +SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage, + const SmallPtrSetImplBase &that) { + SmallArray = SmallStorage; + + // If we're becoming small, prepare to insert into our stack space + if (that.isSmall()) { + CurArray = SmallArray; + // Otherwise, allocate new heap space (unless we were the same size) + } else { + CurArray = (const void**)safe_malloc(sizeof(void*) * that.CurArraySize); + } + + // Copy over the that array. + CopyHelper(that); +} + +SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage, + unsigned SmallSize, + SmallPtrSetImplBase &&that) { + SmallArray = SmallStorage; + MoveHelper(SmallSize, std::move(that)); +} + +void SmallPtrSetImplBase::CopyFrom(const SmallPtrSetImplBase &RHS) { + assert(&RHS != this && "Self-copy should be handled by the caller."); + + if (isSmall() && RHS.isSmall()) + assert(CurArraySize == RHS.CurArraySize && + "Cannot assign sets with different small sizes"); + + // If we're becoming small, prepare to insert into our stack space + if (RHS.isSmall()) { + if (!isSmall()) + free(CurArray); + CurArray = SmallArray; + // Otherwise, allocate new heap space (unless we were the same size) + } else if (CurArraySize != RHS.CurArraySize) { + if (isSmall()) + CurArray = (const void**)safe_malloc(sizeof(void*) * RHS.CurArraySize); + else { + const void **T = (const void**)safe_realloc(CurArray, + sizeof(void*) * RHS.CurArraySize); + CurArray = T; + } + } + + CopyHelper(RHS); +} + +void SmallPtrSetImplBase::CopyHelper(const SmallPtrSetImplBase &RHS) { + // Copy over the new array size + CurArraySize = RHS.CurArraySize; + + // Copy over the contents from the other set + std::copy(RHS.CurArray, RHS.EndPointer(), CurArray); + + NumNonEmpty = RHS.NumNonEmpty; + NumTombstones = RHS.NumTombstones; +} + +void SmallPtrSetImplBase::MoveFrom(unsigned SmallSize, + SmallPtrSetImplBase &&RHS) { + if (!isSmall()) + free(CurArray); + MoveHelper(SmallSize, std::move(RHS)); +} + +void SmallPtrSetImplBase::MoveHelper(unsigned SmallSize, + SmallPtrSetImplBase &&RHS) { + assert(&RHS != this && "Self-move should be handled by the caller."); + + if (RHS.isSmall()) { + // Copy a small RHS rather than moving. + CurArray = SmallArray; + std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, CurArray); + } else { + CurArray = RHS.CurArray; + RHS.CurArray = RHS.SmallArray; + } + + // Copy the rest of the trivial members. + CurArraySize = RHS.CurArraySize; + NumNonEmpty = RHS.NumNonEmpty; + NumTombstones = RHS.NumTombstones; + + // Make the RHS small and empty. + RHS.CurArraySize = SmallSize; + assert(RHS.CurArray == RHS.SmallArray); + RHS.NumNonEmpty = 0; + RHS.NumTombstones = 0; +} + +void SmallPtrSetImplBase::swap(SmallPtrSetImplBase &RHS) { + if (this == &RHS) return; + + // We can only avoid copying elements if neither set is small. + if (!this->isSmall() && !RHS.isSmall()) { + std::swap(this->CurArray, RHS.CurArray); + std::swap(this->CurArraySize, RHS.CurArraySize); + std::swap(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap(this->NumTombstones, RHS.NumTombstones); + return; + } + + // FIXME: From here on we assume that both sets have the same small size. + + // If only RHS is small, copy the small elements into LHS and move the pointer + // from LHS to RHS. + if (!this->isSmall() && RHS.isSmall()) { + assert(RHS.CurArray == RHS.SmallArray); + std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, this->SmallArray); + std::swap(RHS.CurArraySize, this->CurArraySize); + std::swap(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap(this->NumTombstones, RHS.NumTombstones); + RHS.CurArray = this->CurArray; + this->CurArray = this->SmallArray; + return; + } + + // If only LHS is small, copy the small elements into RHS and move the pointer + // from RHS to LHS. + if (this->isSmall() && !RHS.isSmall()) { + assert(this->CurArray == this->SmallArray); + std::copy(this->CurArray, this->CurArray + this->NumNonEmpty, + RHS.SmallArray); + std::swap(RHS.CurArraySize, this->CurArraySize); + std::swap(RHS.NumNonEmpty, this->NumNonEmpty); + std::swap(RHS.NumTombstones, this->NumTombstones); + this->CurArray = RHS.CurArray; + RHS.CurArray = RHS.SmallArray; + return; + } + + // Both a small, just swap the small elements. + assert(this->isSmall() && RHS.isSmall()); + unsigned MinNonEmpty = std::min(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap_ranges(this->SmallArray, this->SmallArray + MinNonEmpty, + RHS.SmallArray); + if (this->NumNonEmpty > MinNonEmpty) { + std::copy(this->SmallArray + MinNonEmpty, + this->SmallArray + this->NumNonEmpty, + RHS.SmallArray + MinNonEmpty); + } else { + std::copy(RHS.SmallArray + MinNonEmpty, RHS.SmallArray + RHS.NumNonEmpty, + this->SmallArray + MinNonEmpty); + } + assert(this->CurArraySize == RHS.CurArraySize); + std::swap(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap(this->NumTombstones, RHS.NumTombstones); +} diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp new file mode 100644 index 0000000000000..36f0a81f6b00d --- /dev/null +++ b/llvm/lib/Support/SmallVector.cpp @@ -0,0 +1,65 @@ +//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SmallVector class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +// Check that no bytes are wasted and everything is well-aligned. +namespace { +struct Struct16B { + alignas(16) void *X; +}; +struct Struct32B { + alignas(32) void *X; +}; +} +static_assert(sizeof(SmallVector<void *, 0>) == + sizeof(unsigned) * 2 + sizeof(void *), + "wasted space in SmallVector size 0"); +static_assert(alignof(SmallVector<Struct16B, 0>) >= alignof(Struct16B), + "wrong alignment for 16-byte aligned T"); +static_assert(alignof(SmallVector<Struct32B, 0>) >= alignof(Struct32B), + "wrong alignment for 32-byte aligned T"); +static_assert(sizeof(SmallVector<Struct16B, 0>) >= alignof(Struct16B), + "missing padding for 16-byte aligned T"); +static_assert(sizeof(SmallVector<Struct32B, 0>) >= alignof(Struct32B), + "missing padding for 32-byte aligned T"); +static_assert(sizeof(SmallVector<void *, 1>) == + sizeof(unsigned) * 2 + sizeof(void *) * 2, + "wasted space in SmallVector size 1"); + +/// grow_pod - This is an implementation of the grow() method which only works +/// on POD-like datatypes and is out of line to reduce code duplication. +void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity, + size_t TSize) { + // Ensure we can fit the new capacity in 32 bits. + if (MinCapacity > UINT32_MAX) + report_bad_alloc_error("SmallVector capacity overflow during allocation"); + + size_t NewCapacity = 2 * capacity() + 1; // Always grow. + NewCapacity = + std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX)); + + void *NewElts; + if (BeginX == FirstEl) { + NewElts = safe_malloc(NewCapacity * TSize); + + // Copy the elements over. No need to run dtors on PODs. + memcpy(NewElts, this->BeginX, size() * TSize); + } else { + // If this wasn't grown from the inline copy, grow the allocated space. + NewElts = safe_realloc(this->BeginX, NewCapacity * TSize); + } + + this->BeginX = NewElts; + this->Capacity = NewCapacity; +} diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp new file mode 100644 index 0000000000000..2a241f18c3627 --- /dev/null +++ b/llvm/lib/Support/SourceMgr.cpp @@ -0,0 +1,501 @@ +//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceMgr class. This class is used as a simple +// substrate for diagnostics, #include handling, and other low level things for +// simple parsers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SourceMgr.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Locale.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <limits> +#include <memory> +#include <string> +#include <utility> + +using namespace llvm; + +static const size_t TabStop = 8; + +unsigned SourceMgr::AddIncludeFile(const std::string &Filename, + SMLoc IncludeLoc, + std::string &IncludedFile) { + IncludedFile = Filename; + ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = + MemoryBuffer::getFile(IncludedFile); + + // If the file didn't exist directly, see if it's in an include path. + for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr; + ++i) { + IncludedFile = + IncludeDirectories[i] + sys::path::get_separator().data() + Filename; + NewBufOrErr = MemoryBuffer::getFile(IncludedFile); + } + + if (!NewBufOrErr) + return 0; + + return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc); +} + +unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { + for (unsigned i = 0, e = Buffers.size(); i != e; ++i) + if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() && + // Use <= here so that a pointer to the null at the end of the buffer + // is included as part of the buffer. + Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd()) + return i + 1; + return 0; +} + +template <typename T> +unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { + + // Ensure OffsetCache is allocated and populated with offsets of all the + // '\n' bytes. + std::vector<T> *Offsets = nullptr; + if (OffsetCache.isNull()) { + Offsets = new std::vector<T>(); + OffsetCache = Offsets; + size_t Sz = Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits<T>::max()); + StringRef S = Buffer->getBuffer(); + for (size_t N = 0; N < Sz; ++N) { + if (S[N] == '\n') { + Offsets->push_back(static_cast<T>(N)); + } + } + } else { + Offsets = OffsetCache.get<std::vector<T> *>(); + } + + const char *BufStart = Buffer->getBufferStart(); + assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); + ptrdiff_t PtrDiff = Ptr - BufStart; + assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max()); + T PtrOffset = static_cast<T>(PtrDiff); + + // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get + // the line number. + return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1; +} + +SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) + : Buffer(std::move(Other.Buffer)), + OffsetCache(Other.OffsetCache), + IncludeLoc(Other.IncludeLoc) { + Other.OffsetCache = nullptr; +} + +SourceMgr::SrcBuffer::~SrcBuffer() { + if (!OffsetCache.isNull()) { + if (OffsetCache.is<std::vector<uint8_t>*>()) + delete OffsetCache.get<std::vector<uint8_t>*>(); + else if (OffsetCache.is<std::vector<uint16_t>*>()) + delete OffsetCache.get<std::vector<uint16_t>*>(); + else if (OffsetCache.is<std::vector<uint32_t>*>()) + delete OffsetCache.get<std::vector<uint32_t>*>(); + else + delete OffsetCache.get<std::vector<uint64_t>*>(); + OffsetCache = nullptr; + } +} + +std::pair<unsigned, unsigned> +SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { + if (!BufferID) + BufferID = FindBufferContainingLoc(Loc); + assert(BufferID && "Invalid Location!"); + + auto &SB = getBufferInfo(BufferID); + const char *Ptr = Loc.getPointer(); + + size_t Sz = SB.Buffer->getBufferSize(); + unsigned LineNo; + if (Sz <= std::numeric_limits<uint8_t>::max()) + LineNo = SB.getLineNumber<uint8_t>(Ptr); + else if (Sz <= std::numeric_limits<uint16_t>::max()) + LineNo = SB.getLineNumber<uint16_t>(Ptr); + else if (Sz <= std::numeric_limits<uint32_t>::max()) + LineNo = SB.getLineNumber<uint32_t>(Ptr); + else + LineNo = SB.getLineNumber<uint64_t>(Ptr); + + const char *BufStart = SB.Buffer->getBufferStart(); + size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r"); + if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0; + return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs); +} + +void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { + if (IncludeLoc == SMLoc()) return; // Top of stack. + + unsigned CurBuf = FindBufferContainingLoc(IncludeLoc); + assert(CurBuf && "Invalid or unspecified location!"); + + PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); + + OS << "Included from " + << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; +} + +SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, + const Twine &Msg, + ArrayRef<SMRange> Ranges, + ArrayRef<SMFixIt> FixIts) const { + // First thing to do: find the current buffer containing the specified + // location to pull out the source line. + SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges; + std::pair<unsigned, unsigned> LineAndCol; + StringRef BufferID = "<unknown>"; + std::string LineStr; + + if (Loc.isValid()) { + unsigned CurBuf = FindBufferContainingLoc(Loc); + assert(CurBuf && "Invalid or unspecified location!"); + + const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf); + BufferID = CurMB->getBufferIdentifier(); + + // Scan backward to find the start of the line. + const char *LineStart = Loc.getPointer(); + const char *BufStart = CurMB->getBufferStart(); + while (LineStart != BufStart && LineStart[-1] != '\n' && + LineStart[-1] != '\r') + --LineStart; + + // Get the end of the line. + const char *LineEnd = Loc.getPointer(); + const char *BufEnd = CurMB->getBufferEnd(); + while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r') + ++LineEnd; + LineStr = std::string(LineStart, LineEnd); + + // Convert any ranges to column ranges that only intersect the line of the + // location. + for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { + SMRange R = Ranges[i]; + if (!R.isValid()) continue; + + // If the line doesn't contain any part of the range, then ignore it. + if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) + continue; + + // Ignore pieces of the range that go onto other lines. + if (R.Start.getPointer() < LineStart) + R.Start = SMLoc::getFromPointer(LineStart); + if (R.End.getPointer() > LineEnd) + R.End = SMLoc::getFromPointer(LineEnd); + + // Translate from SMLoc ranges to column ranges. + // FIXME: Handle multibyte characters. + ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, + R.End.getPointer()-LineStart)); + } + + LineAndCol = getLineAndColumn(Loc, CurBuf); + } + + return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, + LineAndCol.second-1, Kind, Msg.str(), + LineStr, ColRanges, FixIts); +} + +void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, + bool ShowColors) const { + // Report the message with the diagnostic handler if present. + if (DiagHandler) { + DiagHandler(Diagnostic, DiagContext); + return; + } + + if (Diagnostic.getLoc().isValid()) { + unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc()); + assert(CurBuf && "Invalid or unspecified location!"); + PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); + } + + Diagnostic.print(nullptr, OS, ShowColors); +} + +void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, + SourceMgr::DiagKind Kind, + const Twine &Msg, ArrayRef<SMRange> Ranges, + ArrayRef<SMFixIt> FixIts, bool ShowColors) const { + PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors); +} + +void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, + const Twine &Msg, ArrayRef<SMRange> Ranges, + ArrayRef<SMFixIt> FixIts, bool ShowColors) const { + PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors); +} + +//===----------------------------------------------------------------------===// +// SMDiagnostic Implementation +//===----------------------------------------------------------------------===// + +SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, + int Line, int Col, SourceMgr::DiagKind Kind, + StringRef Msg, StringRef LineStr, + ArrayRef<std::pair<unsigned,unsigned>> Ranges, + ArrayRef<SMFixIt> Hints) + : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind), + Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()), + FixIts(Hints.begin(), Hints.end()) { + llvm::sort(FixIts); +} + +static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, + ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){ + if (FixIts.empty()) + return; + + const char *LineStart = SourceLine.begin(); + const char *LineEnd = SourceLine.end(); + + size_t PrevHintEndCol = 0; + + for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); + I != E; ++I) { + // If the fixit contains a newline or tab, ignore it. + if (I->getText().find_first_of("\n\r\t") != StringRef::npos) + continue; + + SMRange R = I->getRange(); + + // If the line doesn't contain any part of the range, then ignore it. + if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) + continue; + + // Translate from SMLoc to column. + // Ignore pieces of the range that go onto other lines. + // FIXME: Handle multibyte characters in the source line. + unsigned FirstCol; + if (R.Start.getPointer() < LineStart) + FirstCol = 0; + else + FirstCol = R.Start.getPointer() - LineStart; + + // If we inserted a long previous hint, push this one forwards, and add + // an extra space to show that this is not part of the previous + // completion. This is sort of the best we can do when two hints appear + // to overlap. + // + // Note that if this hint is located immediately after the previous + // hint, no space will be added, since the location is more important. + unsigned HintCol = FirstCol; + if (HintCol < PrevHintEndCol) + HintCol = PrevHintEndCol + 1; + + // FIXME: This assertion is intended to catch unintended use of multibyte + // characters in fixits. If we decide to do this, we'll have to track + // separate byte widths for the source and fixit lines. + assert((size_t)sys::locale::columnWidth(I->getText()) == + I->getText().size()); + + // This relies on one byte per column in our fixit hints. + unsigned LastColumnModified = HintCol + I->getText().size(); + if (LastColumnModified > FixItLine.size()) + FixItLine.resize(LastColumnModified, ' '); + + std::copy(I->getText().begin(), I->getText().end(), + FixItLine.begin() + HintCol); + + PrevHintEndCol = LastColumnModified; + + // For replacements, mark the removal range with '~'. + // FIXME: Handle multibyte characters in the source line. + unsigned LastCol; + if (R.End.getPointer() >= LineEnd) + LastCol = LineEnd - LineStart; + else + LastCol = R.End.getPointer() - LineStart; + + std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~'); + } +} + +static void printSourceLine(raw_ostream &S, StringRef LineContents) { + // Print out the source line one character at a time, so we can expand tabs. + for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { + size_t NextTab = LineContents.find('\t', i); + // If there were no tabs left, print the rest, we are done. + if (NextTab == StringRef::npos) { + S << LineContents.drop_front(i); + break; + } + + // Otherwise, print from i to NextTab. + S << LineContents.slice(i, NextTab); + OutCol += NextTab - i; + i = NextTab; + + // If we have a tab, emit at least one space, then round up to 8 columns. + do { + S << ' '; + ++OutCol; + } while ((OutCol % TabStop) != 0); + } + S << '\n'; +} + +static bool isNonASCII(char c) { + return c & 0x80; +} + +void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, + bool ShowColors, bool ShowKindLabel) const { + { + WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors); + + if (ProgName && ProgName[0]) + S << ProgName << ": "; + + if (!Filename.empty()) { + if (Filename == "-") + S << "<stdin>"; + else + S << Filename; + + if (LineNo != -1) { + S << ':' << LineNo; + if (ColumnNo != -1) + S << ':' << (ColumnNo + 1); + } + S << ": "; + } + } + + if (ShowKindLabel) { + switch (Kind) { + case SourceMgr::DK_Error: + WithColor::error(OS, "", !ShowColors); + break; + case SourceMgr::DK_Warning: + WithColor::warning(OS, "", !ShowColors); + break; + case SourceMgr::DK_Note: + WithColor::note(OS, "", !ShowColors); + break; + case SourceMgr::DK_Remark: + WithColor::remark(OS, "", !ShowColors); + break; + } + } + + WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors) + << Message << '\n'; + + if (LineNo == -1 || ColumnNo == -1) + return; + + // FIXME: If there are multibyte or multi-column characters in the source, all + // our ranges will be wrong. To do this properly, we'll need a byte-to-column + // map like Clang's TextDiagnostic. For now, we'll just handle tabs by + // expanding them later, and bail out rather than show incorrect ranges and + // misaligned fixits for any other odd characters. + if (find_if(LineContents, isNonASCII) != LineContents.end()) { + printSourceLine(OS, LineContents); + return; + } + size_t NumColumns = LineContents.size(); + + // Build the line with the caret and ranges. + std::string CaretLine(NumColumns+1, ' '); + + // Expand any ranges. + for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { + std::pair<unsigned, unsigned> R = Ranges[r]; + std::fill(&CaretLine[R.first], + &CaretLine[std::min((size_t)R.second, CaretLine.size())], + '~'); + } + + // Add any fix-its. + // FIXME: Find the beginning of the line properly for multibyte characters. + std::string FixItInsertionLine; + buildFixItLine(CaretLine, FixItInsertionLine, FixIts, + makeArrayRef(Loc.getPointer() - ColumnNo, + LineContents.size())); + + // Finally, plop on the caret. + if (unsigned(ColumnNo) <= NumColumns) + CaretLine[ColumnNo] = '^'; + else + CaretLine[NumColumns] = '^'; + + // ... and remove trailing whitespace so the output doesn't wrap for it. We + // know that the line isn't completely empty because it has the caret in it at + // least. + CaretLine.erase(CaretLine.find_last_not_of(' ')+1); + + printSourceLine(OS, LineContents); + + { + WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors); + + // Print out the caret line, matching tabs in the source line. + for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) { + if (i >= LineContents.size() || LineContents[i] != '\t') { + S << CaretLine[i]; + ++OutCol; + continue; + } + + // Okay, we have a tab. Insert the appropriate number of characters. + do { + S << CaretLine[i]; + ++OutCol; + } while ((OutCol % TabStop) != 0); + } + S << '\n'; + } + + // Print out the replacement line, matching tabs in the source line. + if (FixItInsertionLine.empty()) + return; + + for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { + if (i >= LineContents.size() || LineContents[i] != '\t') { + OS << FixItInsertionLine[i]; + ++OutCol; + continue; + } + + // Okay, we have a tab. Insert the appropriate number of characters. + do { + OS << FixItInsertionLine[i]; + // FIXME: This is trying not to break up replacements, but then to re-sync + // with the tabs between replacements. This will fail, though, if two + // fix-it replacements are exactly adjacent, or if a fix-it contains a + // space. Really we should be precomputing column widths, which we'll + // need anyway for multibyte chars. + if (FixItInsertionLine[i] != ' ') + ++i; + ++OutCol; + } while (((OutCol % TabStop) != 0) && i != e); + } + OS << '\n'; +} diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp new file mode 100644 index 0000000000000..9bd1f18a4ee7d --- /dev/null +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -0,0 +1,231 @@ +//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables, or to instrument some functions or global variables in a specific +// way, based on a user-supplied list. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include <string> +#include <system_error> +#include <utility> + +#include <stdio.h> +namespace llvm { + +bool SpecialCaseList::Matcher::insert(std::string Regexp, + unsigned LineNumber, + std::string &REError) { + if (Regexp.empty()) { + REError = "Supplied regexp was blank"; + return false; + } + + if (Regex::isLiteralERE(Regexp)) { + Strings[Regexp] = LineNumber; + return true; + } + Trigrams.insert(Regexp); + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str(); + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + if (!CheckRE.isValid(REError)) + return false; + + RegExes.emplace_back( + std::make_pair(std::make_unique<Regex>(std::move(CheckRE)), LineNumber)); + return true; +} + +unsigned SpecialCaseList::Matcher::match(StringRef Query) const { + auto It = Strings.find(Query); + if (It != Strings.end()) + return It->second; + if (Trigrams.isDefinitelyOut(Query)) + return false; + for (auto& RegExKV : RegExes) + if (RegExKV.first->match(Query)) + return RegExKV.second; + return 0; +} + +std::unique_ptr<SpecialCaseList> +SpecialCaseList::create(const std::vector<std::string> &Paths, + std::string &Error) { + std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); + if (SCL->createInternal(Paths, Error)) + return SCL; + return nullptr; +} + +std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB, + std::string &Error) { + std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); + if (SCL->createInternal(MB, Error)) + return SCL; + return nullptr; +} + +std::unique_ptr<SpecialCaseList> +SpecialCaseList::createOrDie(const std::vector<std::string> &Paths) { + std::string Error; + if (auto SCL = create(Paths, Error)) + return SCL; + report_fatal_error(Error); +} + +bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths, + std::string &Error) { + StringMap<size_t> Sections; + for (const auto &Path : Paths) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFile(Path); + if (std::error_code EC = FileOrErr.getError()) { + Error = (Twine("can't open file '") + Path + "': " + EC.message()).str(); + return false; + } + std::string ParseError; + if (!parse(FileOrErr.get().get(), Sections, ParseError)) { + Error = (Twine("error parsing file '") + Path + "': " + ParseError).str(); + return false; + } + } + return true; +} + +bool SpecialCaseList::createInternal(const MemoryBuffer *MB, + std::string &Error) { + StringMap<size_t> Sections; + if (!parse(MB, Sections, Error)) + return false; + return true; +} + +bool SpecialCaseList::parse(const MemoryBuffer *MB, + StringMap<size_t> &SectionsMap, + std::string &Error) { + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + MB->getBuffer().split(Lines, '\n'); + + unsigned LineNo = 1; + StringRef Section = "*"; + + for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) { + *I = I->trim(); + // Ignore empty lines and lines starting with "#" + if (I->empty() || I->startswith("#")) + continue; + + // Save section names + if (I->startswith("[")) { + if (!I->endswith("]")) { + Error = (Twine("malformed section header on line ") + Twine(LineNo) + + ": " + *I).str(); + return false; + } + + Section = I->slice(1, I->size() - 1); + + std::string REError; + Regex CheckRE(Section); + if (!CheckRE.isValid(REError)) { + Error = + (Twine("malformed regex for section ") + Section + ": '" + REError) + .str(); + return false; + } + + continue; + } + + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + if (SplitLine.second.empty()) { + // Missing ':' in the line. + Error = (Twine("malformed line ") + Twine(LineNo) + ": '" + + SplitLine.first + "'").str(); + return false; + } + + std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); + std::string Regexp = SplitRegexp.first; + StringRef Category = SplitRegexp.second; + + // Create this section if it has not been seen before. + if (SectionsMap.find(Section) == SectionsMap.end()) { + std::unique_ptr<Matcher> M = std::make_unique<Matcher>(); + std::string REError; + if (!M->insert(Section, LineNo, REError)) { + Error = (Twine("malformed section ") + Section + ": '" + REError).str(); + return false; + } + + SectionsMap[Section] = Sections.size(); + Sections.emplace_back(std::move(M)); + } + + auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category]; + std::string REError; + if (!Entry.insert(std::move(Regexp), LineNo, REError)) { + Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" + + SplitLine.second + "': " + REError).str(); + return false; + } + } + return true; +} + +SpecialCaseList::~SpecialCaseList() {} + +bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, + StringRef Query, StringRef Category) const { + return inSectionBlame(Section, Prefix, Query, Category); +} + +unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, + StringRef Query, + StringRef Category) const { + for (auto &SectionIter : Sections) + if (SectionIter.SectionMatcher->match(Section)) { + unsigned Blame = + inSectionBlame(SectionIter.Entries, Prefix, Query, Category); + if (Blame) + return Blame; + } + return 0; +} + +unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, + StringRef Prefix, StringRef Query, + StringRef Category) const { + SectionEntries::const_iterator I = Entries.find(Prefix); + if (I == Entries.end()) return 0; + StringMap<Matcher>::const_iterator II = I->second.find(Category); + if (II == I->second.end()) return 0; + + return II->getValue().match(Query); +} + +} // namespace llvm diff --git a/llvm/lib/Support/Statistic.cpp b/llvm/lib/Support/Statistic.cpp new file mode 100644 index 0000000000000..8b4177c7fba67 --- /dev/null +++ b/llvm/lib/Support/Statistic.cpp @@ -0,0 +1,265 @@ +//===-- Statistic.cpp - Easy way to expose stats information --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the 'Statistic' class, which is designed to be an easy +// way to expose various success metrics from passes. These statistics are +// printed at the end of a run, when the -stats command line option is enabled +// on the command line. +// +// This is useful for reporting information like the number of instructions +// simplified, optimized or removed by various transformations, like this: +// +// static Statistic NumInstEliminated("GCSE", "Number of instructions killed"); +// +// Later, in the code: ++NumInstEliminated; +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstring> +using namespace llvm; + +/// -stats - Command line option to cause transformations to emit stats about +/// what they did. +/// +static cl::opt<bool> Stats( + "stats", + cl::desc("Enable statistics output from program (available with Asserts)"), + cl::Hidden); + +static cl::opt<bool> StatsAsJSON("stats-json", + cl::desc("Display statistics as json data"), + cl::Hidden); + +static bool Enabled; +static bool PrintOnExit; + +namespace { +/// This class is used in a ManagedStatic so that it is created on demand (when +/// the first statistic is bumped) and destroyed only when llvm_shutdown is +/// called. We print statistics from the destructor. +/// This class is also used to look up statistic values from applications that +/// use LLVM. +class StatisticInfo { + std::vector<TrackingStatistic *> Stats; + + friend void llvm::PrintStatistics(); + friend void llvm::PrintStatistics(raw_ostream &OS); + friend void llvm::PrintStatisticsJSON(raw_ostream &OS); + + /// Sort statistics by debugtype,name,description. + void sort(); +public: + using const_iterator = std::vector<TrackingStatistic *>::const_iterator; + + StatisticInfo(); + ~StatisticInfo(); + + void addStatistic(TrackingStatistic *S) { Stats.push_back(S); } + + const_iterator begin() const { return Stats.begin(); } + const_iterator end() const { return Stats.end(); } + iterator_range<const_iterator> statistics() const { + return {begin(), end()}; + } + + void reset(); +}; +} // end anonymous namespace + +static ManagedStatic<StatisticInfo> StatInfo; +static ManagedStatic<sys::SmartMutex<true> > StatLock; + +/// RegisterStatistic - The first time a statistic is bumped, this method is +/// called. +void TrackingStatistic::RegisterStatistic() { + // If stats are enabled, inform StatInfo that this statistic should be + // printed. + // llvm_shutdown calls destructors while holding the ManagedStatic mutex. + // These destructors end up calling PrintStatistics, which takes StatLock. + // Since dereferencing StatInfo and StatLock can require taking the + // ManagedStatic mutex, doing so with StatLock held would lead to a lock + // order inversion. To avoid that, we dereference the ManagedStatics first, + // and only take StatLock afterwards. + if (!Initialized.load(std::memory_order_relaxed)) { + sys::SmartMutex<true> &Lock = *StatLock; + StatisticInfo &SI = *StatInfo; + sys::SmartScopedLock<true> Writer(Lock); + // Check Initialized again after acquiring the lock. + if (Initialized.load(std::memory_order_relaxed)) + return; + if (Stats || Enabled) + SI.addStatistic(this); + + // Remember we have been registered. + Initialized.store(true, std::memory_order_release); + } +} + +StatisticInfo::StatisticInfo() { + // Ensure timergroup lists are created first so they are destructed after us. + TimerGroup::ConstructTimerLists(); +} + +// Print information when destroyed, iff command line option is specified. +StatisticInfo::~StatisticInfo() { + if (::Stats || PrintOnExit) + llvm::PrintStatistics(); +} + +void llvm::EnableStatistics(bool PrintOnExit) { + Enabled = true; + ::PrintOnExit = PrintOnExit; +} + +bool llvm::AreStatisticsEnabled() { + return Enabled || Stats; +} + +void StatisticInfo::sort() { + llvm::stable_sort( + Stats, [](const TrackingStatistic *LHS, const TrackingStatistic *RHS) { + if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) + return Cmp < 0; + + if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) + return Cmp < 0; + + return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; + }); +} + +void StatisticInfo::reset() { + sys::SmartScopedLock<true> Writer(*StatLock); + + // Tell each statistic that it isn't registered so it has to register + // again. We're holding the lock so it won't be able to do so until we're + // finished. Once we've forced it to re-register (after we return), then zero + // the value. + for (auto *Stat : Stats) { + // Value updates to a statistic that complete before this statement in the + // iteration for that statistic will be lost as intended. + Stat->Initialized = false; + Stat->Value = 0; + } + + // Clear the registration list and release the lock once we're done. Any + // pending updates from other threads will safely take effect after we return. + // That might not be what the user wants if they're measuring a compilation + // but it's their responsibility to prevent concurrent compilations to make + // a single compilation measurable. + Stats.clear(); +} + +void llvm::PrintStatistics(raw_ostream &OS) { + StatisticInfo &Stats = *StatInfo; + + // Figure out how long the biggest Value and Name fields are. + unsigned MaxDebugTypeLen = 0, MaxValLen = 0; + for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) { + MaxValLen = std::max(MaxValLen, + (unsigned)utostr(Stats.Stats[i]->getValue()).size()); + MaxDebugTypeLen = std::max(MaxDebugTypeLen, + (unsigned)std::strlen(Stats.Stats[i]->getDebugType())); + } + + Stats.sort(); + + // Print out the statistics header... + OS << "===" << std::string(73, '-') << "===\n" + << " ... Statistics Collected ...\n" + << "===" << std::string(73, '-') << "===\n\n"; + + // Print all of the statistics. + for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) + OS << format("%*u %-*s - %s\n", + MaxValLen, Stats.Stats[i]->getValue(), + MaxDebugTypeLen, Stats.Stats[i]->getDebugType(), + Stats.Stats[i]->getDesc()); + + OS << '\n'; // Flush the output stream. + OS.flush(); +} + +void llvm::PrintStatisticsJSON(raw_ostream &OS) { + sys::SmartScopedLock<true> Reader(*StatLock); + StatisticInfo &Stats = *StatInfo; + + Stats.sort(); + + // Print all of the statistics. + OS << "{\n"; + const char *delim = ""; + for (const TrackingStatistic *Stat : Stats.Stats) { + OS << delim; + assert(yaml::needsQuotes(Stat->getDebugType()) == yaml::QuotingType::None && + "Statistic group/type name is simple."); + assert(yaml::needsQuotes(Stat->getName()) == yaml::QuotingType::None && + "Statistic name is simple"); + OS << "\t\"" << Stat->getDebugType() << '.' << Stat->getName() << "\": " + << Stat->getValue(); + delim = ",\n"; + } + // Print timers. + TimerGroup::printAllJSONValues(OS, delim); + + OS << "\n}\n"; + OS.flush(); +} + +void llvm::PrintStatistics() { +#if LLVM_ENABLE_STATS + sys::SmartScopedLock<true> Reader(*StatLock); + StatisticInfo &Stats = *StatInfo; + + // Statistics not enabled? + if (Stats.Stats.empty()) return; + + // Get the stream to write to. + std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile(); + if (StatsAsJSON) + PrintStatisticsJSON(*OutStream); + else + PrintStatistics(*OutStream); + +#else + // Check if the -stats option is set instead of checking + // !Stats.Stats.empty(). In release builds, Statistics operators + // do nothing, so stats are never Registered. + if (Stats) { + // Get the stream to write to. + std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile(); + (*OutStream) << "Statistics are disabled. " + << "Build with asserts or with -DLLVM_ENABLE_STATS\n"; + } +#endif +} + +const std::vector<std::pair<StringRef, unsigned>> llvm::GetStatistics() { + sys::SmartScopedLock<true> Reader(*StatLock); + std::vector<std::pair<StringRef, unsigned>> ReturnStats; + + for (const auto &Stat : StatInfo->statistics()) + ReturnStats.emplace_back(Stat->getName(), Stat->getValue()); + return ReturnStats; +} + +void llvm::ResetStatistics() { + StatInfo->reset(); +} diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp new file mode 100644 index 0000000000000..af8dd463e125d --- /dev/null +++ b/llvm/lib/Support/StringExtras.cpp @@ -0,0 +1,92 @@ +//===-- StringExtras.cpp - Implement the StringExtras header --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringExtras.h header +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// StrInStrNoCase - Portable version of strcasestr. Locates the first +/// occurrence of string 's1' in string 's2', ignoring case. Returns +/// the offset of s2 in s1 or npos if s2 cannot be found. +StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) { + size_t N = s2.size(), M = s1.size(); + if (N > M) + return StringRef::npos; + for (size_t i = 0, e = M - N + 1; i != e; ++i) + if (s1.substr(i, N).equals_lower(s2)) + return i; + return StringRef::npos; +} + +/// getToken - This function extracts one token from source, ignoring any +/// leading characters that appear in the Delimiters string, and ending the +/// token at any of the characters that appear in the Delimiters string. If +/// there are no tokens in the source string, an empty string is returned. +/// The function returns a pair containing the extracted token and the +/// remaining tail string. +std::pair<StringRef, StringRef> llvm::getToken(StringRef Source, + StringRef Delimiters) { + // Figure out where the token starts. + StringRef::size_type Start = Source.find_first_not_of(Delimiters); + + // Find the next occurrence of the delimiter. + StringRef::size_type End = Source.find_first_of(Delimiters, Start); + + return std::make_pair(Source.slice(Start, End), Source.substr(End)); +} + +/// SplitString - Split up the specified string according to the specified +/// delimiters, appending the result fragments to the output list. +void llvm::SplitString(StringRef Source, + SmallVectorImpl<StringRef> &OutFragments, + StringRef Delimiters) { + std::pair<StringRef, StringRef> S = getToken(Source, Delimiters); + while (!S.first.empty()) { + OutFragments.push_back(S.first); + S = getToken(S.second, Delimiters); + } +} + +void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + unsigned char C = Name[i]; + if (C == '\\') + Out << '\\' << C; + else if (isPrint(C) && C != '"') + Out << C; + else + Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); + } +} + +void llvm::printHTMLEscaped(StringRef String, raw_ostream &Out) { + for (char C : String) { + if (C == '&') + Out << "&"; + else if (C == '<') + Out << "<"; + else if (C == '>') + Out << ">"; + else if (C == '\"') + Out << """; + else if (C == '\'') + Out << "'"; + else + Out << C; + } +} + +void llvm::printLowerCase(StringRef String, raw_ostream &Out) { + for (const char C : String) + Out << toLower(C); +} diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp new file mode 100644 index 0000000000000..6b5ea020dd46d --- /dev/null +++ b/llvm/lib/Support/StringMap.cpp @@ -0,0 +1,261 @@ +//===--- StringMap.cpp - String Hash table map implementation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringMap class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DJB.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> + +using namespace llvm; + +/// Returns the number of buckets to allocate to ensure that the DenseMap can +/// accommodate \p NumEntries without need to grow(). +static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { + // Ensure that "NumEntries * 4 < NumBuckets * 3" + if (NumEntries == 0) + return 0; + // +1 is required because of the strict equality. + // For example if NumEntries is 48, we need to return 401. + return NextPowerOf2(NumEntries * 4 / 3 + 1); +} + +StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { + ItemSize = itemSize; + + // If a size is specified, initialize the table with that many buckets. + if (InitSize) { + // The table will grow when the number of entries reach 3/4 of the number of + // buckets. To guarantee that "InitSize" number of entries can be inserted + // in the table without growing, we allocate just what is needed here. + init(getMinBucketToReserveForEntries(InitSize)); + return; + } + + // Otherwise, initialize it with zero buckets to avoid the allocation. + TheTable = nullptr; + NumBuckets = 0; + NumItems = 0; + NumTombstones = 0; +} + +void StringMapImpl::init(unsigned InitSize) { + assert((InitSize & (InitSize-1)) == 0 && + "Init Size must be a power of 2 or zero!"); + + unsigned NewNumBuckets = InitSize ? InitSize : 16; + NumItems = 0; + NumTombstones = 0; + + TheTable = static_cast<StringMapEntryBase **>( + safe_calloc(NewNumBuckets+1, + sizeof(StringMapEntryBase **) + sizeof(unsigned))); + + // Set the member only if TheTable was successfully allocated + NumBuckets = NewNumBuckets; + + // Allocate one extra bucket, set it to look filled so the iterators stop at + // end. + TheTable[NumBuckets] = (StringMapEntryBase*)2; +} + +/// LookupBucketFor - Look up the bucket that the specified string should end +/// up in. If it already exists as a key in the map, the Item pointer for the +/// specified bucket will be non-null. Otherwise, it will be null. In either +/// case, the FullHashValue field of the bucket will be set to the hash value +/// of the string. +unsigned StringMapImpl::LookupBucketFor(StringRef Name) { + unsigned HTSize = NumBuckets; + if (HTSize == 0) { // Hash table unallocated so far? + init(16); + HTSize = NumBuckets; + } + unsigned FullHashValue = djbHash(Name, 0); + unsigned BucketNo = FullHashValue & (HTSize-1); + unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); + + unsigned ProbeAmt = 1; + int FirstTombstone = -1; + while (true) { + StringMapEntryBase *BucketItem = TheTable[BucketNo]; + // If we found an empty bucket, this key isn't in the table yet, return it. + if (LLVM_LIKELY(!BucketItem)) { + // If we found a tombstone, we want to reuse the tombstone instead of an + // empty bucket. This reduces probing. + if (FirstTombstone != -1) { + HashTable[FirstTombstone] = FullHashValue; + return FirstTombstone; + } + + HashTable[BucketNo] = FullHashValue; + return BucketNo; + } + + if (BucketItem == getTombstoneVal()) { + // Skip over tombstones. However, remember the first one we see. + if (FirstTombstone == -1) FirstTombstone = BucketNo; + } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + + // Do the comparison like this because Name isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// FindKey - Look up the bucket that contains the specified key. If it exists +/// in the map, return the bucket number of the key. Otherwise return -1. +/// This does not modify the map. +int StringMapImpl::FindKey(StringRef Key) const { + unsigned HTSize = NumBuckets; + if (HTSize == 0) return -1; // Really empty table? + unsigned FullHashValue = djbHash(Key, 0); + unsigned BucketNo = FullHashValue & (HTSize-1); + unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); + + unsigned ProbeAmt = 1; + while (true) { + StringMapEntryBase *BucketItem = TheTable[BucketNo]; + // If we found an empty bucket, this key isn't in the table yet, return. + if (LLVM_LIKELY(!BucketItem)) + return -1; + + if (BucketItem == getTombstoneVal()) { + // Ignore tombstones. + } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// RemoveKey - Remove the specified StringMapEntry from the table, but do not +/// delete it. This aborts if the value isn't in the table. +void StringMapImpl::RemoveKey(StringMapEntryBase *V) { + const char *VStr = (char*)V + ItemSize; + StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength())); + (void)V2; + assert(V == V2 && "Didn't find key?"); +} + +/// RemoveKey - Remove the StringMapEntry for the specified key from the +/// table, returning it. If the key is not in the table, this returns null. +StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { + int Bucket = FindKey(Key); + if (Bucket == -1) return nullptr; + + StringMapEntryBase *Result = TheTable[Bucket]; + TheTable[Bucket] = getTombstoneVal(); + --NumItems; + ++NumTombstones; + assert(NumItems + NumTombstones <= NumBuckets); + + return Result; +} + +/// RehashTable - Grow the table, redistributing values into the buckets with +/// the appropriate mod-of-hashtable-size. +unsigned StringMapImpl::RehashTable(unsigned BucketNo) { + unsigned NewSize; + unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); + + // If the hash table is now more than 3/4 full, or if fewer than 1/8 of + // the buckets are empty (meaning that many are filled with tombstones), + // grow/rehash the table. + if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) { + NewSize = NumBuckets*2; + } else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <= + NumBuckets / 8)) { + NewSize = NumBuckets; + } else { + return BucketNo; + } + + unsigned NewBucketNo = BucketNo; + // Allocate one extra bucket which will always be non-empty. This allows the + // iterators to stop at end. + auto NewTableArray = static_cast<StringMapEntryBase **>( + safe_calloc(NewSize+1, sizeof(StringMapEntryBase *) + sizeof(unsigned))); + + unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1); + NewTableArray[NewSize] = (StringMapEntryBase*)2; + + // Rehash all the items into their new buckets. Luckily :) we already have + // the hash values available, so we don't have to rehash any strings. + for (unsigned I = 0, E = NumBuckets; I != E; ++I) { + StringMapEntryBase *Bucket = TheTable[I]; + if (Bucket && Bucket != getTombstoneVal()) { + // Fast case, bucket available. + unsigned FullHash = HashTable[I]; + unsigned NewBucket = FullHash & (NewSize-1); + if (!NewTableArray[NewBucket]) { + NewTableArray[FullHash & (NewSize-1)] = Bucket; + NewHashArray[FullHash & (NewSize-1)] = FullHash; + if (I == BucketNo) + NewBucketNo = NewBucket; + continue; + } + + // Otherwise probe for a spot. + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); + } while (NewTableArray[NewBucket]); + + // Finally found a slot. Fill it in. + NewTableArray[NewBucket] = Bucket; + NewHashArray[NewBucket] = FullHash; + if (I == BucketNo) + NewBucketNo = NewBucket; + } + } + + free(TheTable); + + TheTable = NewTableArray; + NumBuckets = NewSize; + NumTombstones = 0; + return NewBucketNo; +} diff --git a/llvm/lib/Support/StringPool.cpp b/llvm/lib/Support/StringPool.cpp new file mode 100644 index 0000000000000..82351017b8cca --- /dev/null +++ b/llvm/lib/Support/StringPool.cpp @@ -0,0 +1,34 @@ +//===-- StringPool.cpp - Interned string pool -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringPool class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringPool.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +StringPool::StringPool() {} + +StringPool::~StringPool() { + assert(InternTable.empty() && "PooledStringPtr leaked!"); +} + +PooledStringPtr StringPool::intern(StringRef Key) { + table_t::iterator I = InternTable.find(Key); + if (I != InternTable.end()) + return PooledStringPtr(&*I); + + entry_t *S = entry_t::Create(Key); + S->getValue().Pool = this; + InternTable.insert(S); + + return PooledStringPtr(S); +} diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp new file mode 100644 index 0000000000000..4bafc4ec71819 --- /dev/null +++ b/llvm/lib/Support/StringRef.cpp @@ -0,0 +1,599 @@ +//===-- StringRef.cpp - Lightweight String References ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/edit_distance.h" +#include <bitset> + +using namespace llvm; + +// MSVC emits references to this into the translation units which reference it. +#ifndef _MSC_VER +const size_t StringRef::npos; +#endif + +// strncasecmp() is not available on non-POSIX systems, so define an +// alternative function here. +static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { + for (size_t I = 0; I < Length; ++I) { + unsigned char LHC = toLower(LHS[I]); + unsigned char RHC = toLower(RHS[I]); + if (LHC != RHC) + return LHC < RHC ? -1 : 1; + } + return 0; +} + +/// compare_lower - Compare strings, ignoring case. +int StringRef::compare_lower(StringRef RHS) const { + if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length))) + return Res; + if (Length == RHS.Length) + return 0; + return Length < RHS.Length ? -1 : 1; +} + +/// Check if this string starts with the given \p Prefix, ignoring case. +bool StringRef::startswith_lower(StringRef Prefix) const { + return Length >= Prefix.Length && + ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0; +} + +/// Check if this string ends with the given \p Suffix, ignoring case. +bool StringRef::endswith_lower(StringRef Suffix) const { + return Length >= Suffix.Length && + ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; +} + +size_t StringRef::find_lower(char C, size_t From) const { + char L = toLower(C); + return find_if([L](char D) { return toLower(D) == L; }, From); +} + +/// compare_numeric - Compare strings, handle embedded numbers. +int StringRef::compare_numeric(StringRef RHS) const { + for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { + // Check for sequences of digits. + if (isDigit(Data[I]) && isDigit(RHS.Data[I])) { + // The longer sequence of numbers is considered larger. + // This doesn't really handle prefixed zeros well. + size_t J; + for (J = I + 1; J != E + 1; ++J) { + bool ld = J < Length && isDigit(Data[J]); + bool rd = J < RHS.Length && isDigit(RHS.Data[J]); + if (ld != rd) + return rd ? -1 : 1; + if (!rd) + break; + } + // The two number sequences have the same length (J-I), just memcmp them. + if (int Res = compareMemory(Data + I, RHS.Data + I, J - I)) + return Res < 0 ? -1 : 1; + // Identical number sequences, continue search after the numbers. + I = J - 1; + continue; + } + if (Data[I] != RHS.Data[I]) + return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1; + } + if (Length == RHS.Length) + return 0; + return Length < RHS.Length ? -1 : 1; +} + +// Compute the edit distance between the two given strings. +unsigned StringRef::edit_distance(llvm::StringRef Other, + bool AllowReplacements, + unsigned MaxEditDistance) const { + return llvm::ComputeEditDistance( + makeArrayRef(data(), size()), + makeArrayRef(Other.data(), Other.size()), + AllowReplacements, MaxEditDistance); +} + +//===----------------------------------------------------------------------===// +// String Operations +//===----------------------------------------------------------------------===// + +std::string StringRef::lower() const { + std::string Result(size(), char()); + for (size_type i = 0, e = size(); i != e; ++i) { + Result[i] = toLower(Data[i]); + } + return Result; +} + +std::string StringRef::upper() const { + std::string Result(size(), char()); + for (size_type i = 0, e = size(); i != e; ++i) { + Result[i] = toUpper(Data[i]); + } + return Result; +} + +//===----------------------------------------------------------------------===// +// String Searching +//===----------------------------------------------------------------------===// + + +/// find - Search for the first string \arg Str in the string. +/// +/// \return - The index of the first occurrence of \arg Str, or npos if not +/// found. +size_t StringRef::find(StringRef Str, size_t From) const { + if (From > Length) + return npos; + + const char *Start = Data + From; + size_t Size = Length - From; + + const char *Needle = Str.data(); + size_t N = Str.size(); + if (N == 0) + return From; + if (Size < N) + return npos; + if (N == 1) { + const char *Ptr = (const char *)::memchr(Start, Needle[0], Size); + return Ptr == nullptr ? npos : Ptr - Data; + } + + const char *Stop = Start + (Size - N + 1); + + // For short haystacks or unsupported needles fall back to the naive algorithm + if (Size < 16 || N > 255) { + do { + if (std::memcmp(Start, Needle, N) == 0) + return Start - Data; + ++Start; + } while (Start < Stop); + return npos; + } + + // Build the bad char heuristic table, with uint8_t to reduce cache thrashing. + uint8_t BadCharSkip[256]; + std::memset(BadCharSkip, N, 256); + for (unsigned i = 0; i != N-1; ++i) + BadCharSkip[(uint8_t)Str[i]] = N-1-i; + + do { + uint8_t Last = Start[N - 1]; + if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1])) + if (std::memcmp(Start, Needle, N - 1) == 0) + return Start - Data; + + // Otherwise skip the appropriate number of bytes. + Start += BadCharSkip[Last]; + } while (Start < Stop); + + return npos; +} + +size_t StringRef::find_lower(StringRef Str, size_t From) const { + StringRef This = substr(From); + while (This.size() >= Str.size()) { + if (This.startswith_lower(Str)) + return From; + This = This.drop_front(); + ++From; + } + return npos; +} + +size_t StringRef::rfind_lower(char C, size_t From) const { + From = std::min(From, Length); + size_t i = From; + while (i != 0) { + --i; + if (toLower(Data[i]) == toLower(C)) + return i; + } + return npos; +} + +/// rfind - Search for the last string \arg Str in the string. +/// +/// \return - The index of the last occurrence of \arg Str, or npos if not +/// found. +size_t StringRef::rfind(StringRef Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = Length - N + 1, e = 0; i != e;) { + --i; + if (substr(i, N).equals(Str)) + return i; + } + return npos; +} + +size_t StringRef::rfind_lower(StringRef Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = Length - N + 1, e = 0; i != e;) { + --i; + if (substr(i, N).equals_lower(Str)) + return i; + } + return npos; +} + +/// find_first_of - Find the first character in the string that is in \arg +/// Chars, or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_first_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0; i != Chars.size(); ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + if (CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + +/// find_first_not_of - Find the first character in the string that is not +/// \arg C or npos if not found. +StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + if (Data[i] != C) + return i; + return npos; +} + +/// find_first_not_of - Find the first character in the string that is not +/// in the string \arg Chars, or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_first_not_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0; i != Chars.size(); ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + if (!CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + +/// find_last_of - Find the last character in the string that is in \arg C, +/// or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_last_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0; i != Chars.size(); ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) + if (CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + +/// find_last_not_of - Find the last character in the string that is not +/// \arg C, or npos if not found. +StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { + for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) + if (Data[i] != C) + return i; + return npos; +} + +/// find_last_not_of - Find the last character in the string that is not in +/// \arg Chars, or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_last_not_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0, e = Chars.size(); i != e; ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) + if (!CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + +void StringRef::split(SmallVectorImpl<StringRef> &A, + StringRef Separator, int MaxSplit, + bool KeepEmpty) const { + StringRef S = *this; + + // Count down from MaxSplit. When MaxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make MaxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (MaxSplit-- != 0) { + size_t Idx = S.find(Separator); + if (Idx == npos) + break; + + // Push this split. + if (KeepEmpty || Idx > 0) + A.push_back(S.slice(0, Idx)); + + // Jump forward. + S = S.slice(Idx + Separator.size(), npos); + } + + // Push the tail. + if (KeepEmpty || !S.empty()) + A.push_back(S); +} + +void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator, + int MaxSplit, bool KeepEmpty) const { + StringRef S = *this; + + // Count down from MaxSplit. When MaxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make MaxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (MaxSplit-- != 0) { + size_t Idx = S.find(Separator); + if (Idx == npos) + break; + + // Push this split. + if (KeepEmpty || Idx > 0) + A.push_back(S.slice(0, Idx)); + + // Jump forward. + S = S.slice(Idx + 1, npos); + } + + // Push the tail. + if (KeepEmpty || !S.empty()) + A.push_back(S); +} + +//===----------------------------------------------------------------------===// +// Helpful Algorithms +//===----------------------------------------------------------------------===// + +/// count - Return the number of non-overlapped occurrences of \arg Str in +/// the string. +size_t StringRef::count(StringRef Str) const { + size_t Count = 0; + size_t N = Str.size(); + if (N > Length) + return 0; + for (size_t i = 0, e = Length - N + 1; i != e; ++i) + if (substr(i, N).equals(Str)) + ++Count; + return Count; +} + +static unsigned GetAutoSenseRadix(StringRef &Str) { + if (Str.empty()) + return 10; + + if (Str.startswith("0x") || Str.startswith("0X")) { + Str = Str.substr(2); + return 16; + } + + if (Str.startswith("0b") || Str.startswith("0B")) { + Str = Str.substr(2); + return 2; + } + + if (Str.startswith("0o")) { + Str = Str.substr(2); + return 8; + } + + if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) { + Str = Str.substr(1); + return 8; + } + + return 10; +} + +bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix, + unsigned long long &Result) { + // Autosense radix if not specified. + if (Radix == 0) + Radix = GetAutoSenseRadix(Str); + + // Empty strings (after the radix autosense) are invalid. + if (Str.empty()) return true; + + // Parse all the bytes of the string given this radix. Watch for overflow. + StringRef Str2 = Str; + Result = 0; + while (!Str2.empty()) { + unsigned CharVal; + if (Str2[0] >= '0' && Str2[0] <= '9') + CharVal = Str2[0] - '0'; + else if (Str2[0] >= 'a' && Str2[0] <= 'z') + CharVal = Str2[0] - 'a' + 10; + else if (Str2[0] >= 'A' && Str2[0] <= 'Z') + CharVal = Str2[0] - 'A' + 10; + else + break; + + // If the parsed value is larger than the integer radix, we cannot + // consume any more characters. + if (CharVal >= Radix) + break; + + // Add in this character. + unsigned long long PrevResult = Result; + Result = Result * Radix + CharVal; + + // Check for overflow by shifting back and seeing if bits were lost. + if (Result / Radix < PrevResult) + return true; + + Str2 = Str2.substr(1); + } + + // We consider the operation a failure if no characters were consumed + // successfully. + if (Str.size() == Str2.size()) + return true; + + Str = Str2; + return false; +} + +bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix, + long long &Result) { + unsigned long long ULLVal; + + // Handle positive strings first. + if (Str.empty() || Str.front() != '-') { + if (consumeUnsignedInteger(Str, Radix, ULLVal) || + // Check for value so large it overflows a signed value. + (long long)ULLVal < 0) + return true; + Result = ULLVal; + return false; + } + + // Get the positive part of the value. + StringRef Str2 = Str.drop_front(1); + if (consumeUnsignedInteger(Str2, Radix, ULLVal) || + // Reject values so large they'd overflow as negative signed, but allow + // "-0". This negates the unsigned so that the negative isn't undefined + // on signed overflow. + (long long)-ULLVal > 0) + return true; + + Str = Str2; + Result = -ULLVal; + return false; +} + +/// GetAsUnsignedInteger - Workhorse method that converts a integer character +/// sequence of radix up to 36 to an unsigned long long value. +bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { + if (consumeUnsignedInteger(Str, Radix, Result)) + return true; + + // For getAsUnsignedInteger, we require the whole string to be consumed or + // else we consider it a failure. + return !Str.empty(); +} + +bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, + long long &Result) { + if (consumeSignedInteger(Str, Radix, Result)) + return true; + + // For getAsSignedInteger, we require the whole string to be consumed or else + // we consider it a failure. + return !Str.empty(); +} + +bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { + StringRef Str = *this; + + // Autosense radix if not specified. + if (Radix == 0) + Radix = GetAutoSenseRadix(Str); + + assert(Radix > 1 && Radix <= 36); + + // Empty strings (after the radix autosense) are invalid. + if (Str.empty()) return true; + + // Skip leading zeroes. This can be a significant improvement if + // it means we don't need > 64 bits. + while (!Str.empty() && Str.front() == '0') + Str = Str.substr(1); + + // If it was nothing but zeroes.... + if (Str.empty()) { + Result = APInt(64, 0); + return false; + } + + // (Over-)estimate the required number of bits. + unsigned Log2Radix = 0; + while ((1U << Log2Radix) < Radix) Log2Radix++; + bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix); + + unsigned BitWidth = Log2Radix * Str.size(); + if (BitWidth < Result.getBitWidth()) + BitWidth = Result.getBitWidth(); // don't shrink the result + else if (BitWidth > Result.getBitWidth()) + Result = Result.zext(BitWidth); + + APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix + if (!IsPowerOf2Radix) { + // These must have the same bit-width as Result. + RadixAP = APInt(BitWidth, Radix); + CharAP = APInt(BitWidth, 0); + } + + // Parse all the bytes of the string given this radix. + Result = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= '0' && Str[0] <= '9') + CharVal = Str[0]-'0'; + else if (Str[0] >= 'a' && Str[0] <= 'z') + CharVal = Str[0]-'a'+10; + else if (Str[0] >= 'A' && Str[0] <= 'Z') + CharVal = Str[0]-'A'+10; + else + return true; + + // If the parsed value is larger than the integer radix, the string is + // invalid. + if (CharVal >= Radix) + return true; + + // Add in this character. + if (IsPowerOf2Radix) { + Result <<= Log2Radix; + Result |= CharVal; + } else { + Result *= RadixAP; + CharAP = CharVal; + Result += CharAP; + } + + Str = Str.substr(1); + } + + return false; +} + +bool StringRef::getAsDouble(double &Result, bool AllowInexact) const { + APFloat F(0.0); + APFloat::opStatus Status = + F.convertFromString(*this, APFloat::rmNearestTiesToEven); + if (Status != APFloat::opOK) { + if (!AllowInexact || !(Status & APFloat::opInexact)) + return true; + } + + Result = F.convertToDouble(); + return false; +} + +// Implementation of StringRef hashing. +hash_code llvm::hash_value(StringRef S) { + return hash_combine_range(S.begin(), S.end()); +} diff --git a/llvm/lib/Support/StringSaver.cpp b/llvm/lib/Support/StringSaver.cpp new file mode 100644 index 0000000000000..f7ccfb97ea798 --- /dev/null +++ b/llvm/lib/Support/StringSaver.cpp @@ -0,0 +1,26 @@ +//===-- StringSaver.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringSaver.h" + +using namespace llvm; + +StringRef StringSaver::save(StringRef S) { + char *P = Alloc.Allocate<char>(S.size() + 1); + if (!S.empty()) + memcpy(P, S.data(), S.size()); + P[S.size()] = '\0'; + return StringRef(P, S.size()); +} + +StringRef UniqueStringSaver::save(StringRef S) { + auto R = Unique.insert(S); + if (R.second) // cache miss, need to actually save the string + *R.first = Strings.save(S); // safe replacement with equal value + return *R.first; +} diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/Support/SymbolRemappingReader.cpp new file mode 100644 index 0000000000000..1caf0947216ea --- /dev/null +++ b/llvm/lib/Support/SymbolRemappingReader.cpp @@ -0,0 +1,80 @@ +//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for reading and applying symbol +// remapping files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SymbolRemappingReader.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/LineIterator.h" + +using namespace llvm; + +char SymbolRemappingParseError::ID; + +/// Load a set of name remappings from a text file. +/// +/// See the documentation at the top of the file for an explanation of +/// the expected format. +Error SymbolRemappingReader::read(MemoryBuffer &B) { + line_iterator LineIt(B, /*SkipBlanks=*/true, '#'); + + auto ReportError = [&](Twine Msg) { + return llvm::make_error<SymbolRemappingParseError>( + B.getBufferIdentifier(), LineIt.line_number(), Msg); + }; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef Line = *LineIt; + Line = Line.ltrim(' '); + // line_iterator only detects comments starting in column 1. + if (Line.startswith("#") || Line.empty()) + continue; + + SmallVector<StringRef, 4> Parts; + Line.split(Parts, ' ', /*MaxSplits*/-1, /*KeepEmpty*/false); + + if (Parts.size() != 3) + return ReportError("Expected 'kind mangled_name mangled_name', " + "found '" + Line + "'"); + + using FK = ItaniumManglingCanonicalizer::FragmentKind; + Optional<FK> FragmentKind = StringSwitch<Optional<FK>>(Parts[0]) + .Case("name", FK::Name) + .Case("type", FK::Type) + .Case("encoding", FK::Encoding) + .Default(None); + if (!FragmentKind) + return ReportError("Invalid kind, expected 'name', 'type', or 'encoding'," + " found '" + Parts[0] + "'"); + + using EE = ItaniumManglingCanonicalizer::EquivalenceError; + switch (Canonicalizer.addEquivalence(*FragmentKind, Parts[1], Parts[2])) { + case EE::Success: + break; + + case EE::ManglingAlreadyUsed: + return ReportError("Manglings '" + Parts[1] + "' and '" + Parts[2] + "' " + "have both been used in prior remappings. Move this " + "remapping earlier in the file."); + + case EE::InvalidFirstMangling: + return ReportError("Could not demangle '" + Parts[1] + "' " + "as a <" + Parts[0] + ">; invalid mangling?"); + + case EE::InvalidSecondMangling: + return ReportError("Could not demangle '" + Parts[2] + "' " + "as a <" + Parts[0] + ">; invalid mangling?"); + } + } + + return Error::success(); +} diff --git a/llvm/lib/Support/SystemUtils.cpp b/llvm/lib/Support/SystemUtils.cpp new file mode 100644 index 0000000000000..47e0c72ec7c13 --- /dev/null +++ b/llvm/lib/Support/SystemUtils.cpp @@ -0,0 +1,30 @@ +//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions used to do a variety of low-level, often +// system-specific, tasks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SystemUtils.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check, + bool print_warning) { + if (stream_to_check.is_displayed()) { + if (print_warning) { + errs() << "WARNING: You're attempting to print out a bitcode file.\n" + "This is inadvisable as it may cause display problems. If\n" + "you REALLY want to taste LLVM bitcode first-hand, you\n" + "can force output with the `-f' option.\n\n"; + } + return true; + } + return false; +} diff --git a/llvm/lib/Support/TarWriter.cpp b/llvm/lib/Support/TarWriter.cpp new file mode 100644 index 0000000000000..6136e92197672 --- /dev/null +++ b/llvm/lib/Support/TarWriter.cpp @@ -0,0 +1,200 @@ +//===-- TarWriter.cpp - Tar archive file creator --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TarWriter class provides a feature to create a tar archive file. +// +// I put emphasis on simplicity over comprehensiveness when implementing this +// class because we don't need a full-fledged archive file generator in LLVM +// at the moment. +// +// The filename field in the Unix V7 tar header is 100 bytes. Longer filenames +// are stored using the PAX extension. The PAX header is standardized in +// POSIX.1-2001. +// +// The struct definition of UstarHeader is copied from +// https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TarWriter.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" + +using namespace llvm; + +// Each file in an archive must be aligned to this block size. +static const int BlockSize = 512; + +struct UstarHeader { + char Name[100]; + char Mode[8]; + char Uid[8]; + char Gid[8]; + char Size[12]; + char Mtime[12]; + char Checksum[8]; + char TypeFlag; + char Linkname[100]; + char Magic[6]; + char Version[2]; + char Uname[32]; + char Gname[32]; + char DevMajor[8]; + char DevMinor[8]; + char Prefix[155]; + char Pad[12]; +}; +static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); + +static UstarHeader makeUstarHeader() { + UstarHeader Hdr = {}; + memcpy(Hdr.Magic, "ustar", 5); // Ustar magic + memcpy(Hdr.Version, "00", 2); // Ustar version + return Hdr; +} + +// A PAX attribute is in the form of "<length> <key>=<value>\n" +// where <length> is the length of the entire string including +// the length field itself. An example string is this. +// +// 25 ctime=1084839148.1212\n +// +// This function create such string. +static std::string formatPax(StringRef Key, StringRef Val) { + int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" + + // We need to compute total size twice because appending + // a length field could change total size by one. + int Total = Len + Twine(Len).str().size(); + Total = Len + Twine(Total).str().size(); + return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); +} + +// Headers in tar files must be aligned to 512 byte boundaries. +// This function forwards the current file position to the next boundary. +static void pad(raw_fd_ostream &OS) { + uint64_t Pos = OS.tell(); + OS.seek(alignTo(Pos, BlockSize)); +} + +// Computes a checksum for a tar header. +static void computeChecksum(UstarHeader &Hdr) { + // Before computing a checksum, checksum field must be + // filled with space characters. + memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); + + // Compute a checksum and set it to the checksum field. + unsigned Chksum = 0; + for (size_t I = 0; I < sizeof(Hdr); ++I) + Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; + snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum); +} + +// Create a tar header and write it to a given output stream. +static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) { + // A PAX header consists of a 512-byte header followed + // by key-value strings. First, create key-value strings. + std::string PaxAttr = formatPax("path", Path); + + // Create a 512-byte header. + UstarHeader Hdr = makeUstarHeader(); + snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size()); + Hdr.TypeFlag = 'x'; // PAX magic + computeChecksum(Hdr); + + // Write them down. + OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); + OS << PaxAttr; + pad(OS); +} + +// Path fits in a Ustar header if +// +// - Path is less than 100 characters long, or +// - Path is in the form of "<prefix>/<name>" where <prefix> is less +// than or equal to 155 characters long and <name> is less than 100 +// characters long. Both <prefix> and <name> can contain extra '/'. +// +// If Path fits in a Ustar header, updates Prefix and Name and returns true. +// Otherwise, returns false. +static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) { + if (Path.size() < sizeof(UstarHeader::Name)) { + Prefix = ""; + Name = Path; + return true; + } + + size_t Sep = Path.rfind('/', sizeof(UstarHeader::Prefix) + 1); + if (Sep == StringRef::npos) + return false; + if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name)) + return false; + + Prefix = Path.substr(0, Sep); + Name = Path.substr(Sep + 1); + return true; +} + +// The PAX header is an extended format, so a PAX header needs +// to be followed by a "real" header. +static void writeUstarHeader(raw_fd_ostream &OS, StringRef Prefix, + StringRef Name, size_t Size) { + UstarHeader Hdr = makeUstarHeader(); + memcpy(Hdr.Name, Name.data(), Name.size()); + memcpy(Hdr.Mode, "0000664", 8); + snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size); + memcpy(Hdr.Prefix, Prefix.data(), Prefix.size()); + computeChecksum(Hdr); + OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); +} + +// Creates a TarWriter instance and returns it. +Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, + StringRef BaseDir) { + using namespace sys::fs; + int FD; + if (std::error_code EC = + openFileForWrite(OutputPath, FD, CD_CreateAlways, OF_None)) + return make_error<StringError>("cannot open " + OutputPath, EC); + return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); +} + +TarWriter::TarWriter(int FD, StringRef BaseDir) + : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} + +// Append a given file to an archive. +void TarWriter::append(StringRef Path, StringRef Data) { + // Write Path and Data. + std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(Path); + + // We do not want to include the same file more than once. + if (!Files.insert(Fullpath).second) + return; + + StringRef Prefix; + StringRef Name; + if (splitUstar(Fullpath, Prefix, Name)) { + writeUstarHeader(OS, Prefix, Name, Data.size()); + } else { + writePaxHeader(OS, Fullpath); + writeUstarHeader(OS, "", "", Data.size()); + } + + OS << Data; + pad(OS); + + // POSIX requires tar archives end with two null blocks. + // Here, we write the terminator and then seek back, so that + // the file being output is terminated correctly at any moment. + uint64_t Pos = OS.tell(); + OS << std::string(BlockSize * 2, '\0'); + OS.seek(Pos); + OS.flush(); +} diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp new file mode 100644 index 0000000000000..d213b9a8c6afa --- /dev/null +++ b/llvm/lib/Support/TargetParser.cpp @@ -0,0 +1,208 @@ +//===-- TargetParser - Parser for target features ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise hardware features such as +// FPU/CPU/ARCH names as well as specific support such as HDIV, etc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/TargetParser.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" + +using namespace llvm; +using namespace AMDGPU; + +namespace { + +struct GPUInfo { + StringLiteral Name; + StringLiteral CanonicalName; + AMDGPU::GPUKind Kind; + unsigned Features; +}; + +constexpr GPUInfo R600GPUs[26] = { + // Name Canonical Kind Features + // Name + {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE }, + {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE }, + {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE }, + {{"r630"}, {"r630"}, GK_R630, FEATURE_NONE }, + {{"rs780"}, {"rs880"}, GK_RS880, FEATURE_NONE }, + {{"rs880"}, {"rs880"}, GK_RS880, FEATURE_NONE }, + {{"rv610"}, {"rs880"}, GK_RS880, FEATURE_NONE }, + {{"rv620"}, {"rs880"}, GK_RS880, FEATURE_NONE }, + {{"rv670"}, {"rv670"}, GK_RV670, FEATURE_NONE }, + {{"rv710"}, {"rv710"}, GK_RV710, FEATURE_NONE }, + {{"rv730"}, {"rv730"}, GK_RV730, FEATURE_NONE }, + {{"rv740"}, {"rv770"}, GK_RV770, FEATURE_NONE }, + {{"rv770"}, {"rv770"}, GK_RV770, FEATURE_NONE }, + {{"cedar"}, {"cedar"}, GK_CEDAR, FEATURE_NONE }, + {{"palm"}, {"cedar"}, GK_CEDAR, FEATURE_NONE }, + {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA }, + {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA }, + {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE }, + {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE }, + {{"sumo"}, {"sumo"}, GK_SUMO, FEATURE_NONE }, + {{"sumo2"}, {"sumo"}, GK_SUMO, FEATURE_NONE }, + {{"barts"}, {"barts"}, GK_BARTS, FEATURE_NONE }, + {{"caicos"}, {"caicos"}, GK_CAICOS, FEATURE_NONE }, + {{"aruba"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA }, + {{"cayman"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA }, + {{"turks"}, {"turks"}, GK_TURKS, FEATURE_NONE } +}; + +// This table should be sorted by the value of GPUKind +// Don't bother listing the implicitly true features +constexpr GPUInfo AMDGCNGPUs[37] = { + // Name Canonical Kind Features + // Name + {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, + {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, + {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, + {{"hainan"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, + {{"oland"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, + {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, + {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, + {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE}, + {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE}, + {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32}, + {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32}, + {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32}, + {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, + {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, + {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, + {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, + {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, + {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, + {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, + {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, + {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, + {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, + {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, + {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, + {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32}, + {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32}, + {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, +}; + +const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { + GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE }; + + auto I = std::lower_bound(Table.begin(), Table.end(), Search, + [](const GPUInfo &A, const GPUInfo &B) { + return A.Kind < B.Kind; + }); + + if (I == Table.end()) + return nullptr; + return I; +} + +} // namespace + +StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { + if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) + return Entry->CanonicalName; + return ""; +} + +StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) { + if (const auto *Entry = getArchEntry(AK, R600GPUs)) + return Entry->CanonicalName; + return ""; +} + +AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) { + for (const auto C : AMDGCNGPUs) { + if (CPU == C.Name) + return C.Kind; + } + + return AMDGPU::GPUKind::GK_NONE; +} + +AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) { + for (const auto C : R600GPUs) { + if (CPU == C.Name) + return C.Kind; + } + + return AMDGPU::GPUKind::GK_NONE; +} + +unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) { + if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) + return Entry->Features; + return FEATURE_NONE; +} + +unsigned AMDGPU::getArchAttrR600(GPUKind AK) { + if (const auto *Entry = getArchEntry(AK, R600GPUs)) + return Entry->Features; + return FEATURE_NONE; +} + +void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) { + // XXX: Should this only report unique canonical names? + for (const auto C : AMDGCNGPUs) + Values.push_back(C.Name); +} + +void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) { + for (const auto C : R600GPUs) + Values.push_back(C.Name); +} + +AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { + AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); + if (AK == AMDGPU::GPUKind::GK_NONE) { + if (GPU == "generic-hsa") + return {7, 0, 0}; + if (GPU == "generic") + return {6, 0, 0}; + return {0, 0, 0}; + } + + switch (AK) { + case GK_GFX600: return {6, 0, 0}; + case GK_GFX601: return {6, 0, 1}; + case GK_GFX700: return {7, 0, 0}; + case GK_GFX701: return {7, 0, 1}; + case GK_GFX702: return {7, 0, 2}; + case GK_GFX703: return {7, 0, 3}; + case GK_GFX704: return {7, 0, 4}; + case GK_GFX801: return {8, 0, 1}; + case GK_GFX802: return {8, 0, 2}; + case GK_GFX803: return {8, 0, 3}; + case GK_GFX810: return {8, 1, 0}; + case GK_GFX900: return {9, 0, 0}; + case GK_GFX902: return {9, 0, 2}; + case GK_GFX904: return {9, 0, 4}; + case GK_GFX906: return {9, 0, 6}; + case GK_GFX908: return {9, 0, 8}; + case GK_GFX909: return {9, 0, 9}; + case GK_GFX1010: return {10, 1, 0}; + case GK_GFX1011: return {10, 1, 1}; + case GK_GFX1012: return {10, 1, 2}; + default: return {0, 0, 0}; + } +} diff --git a/llvm/lib/Support/TargetRegistry.cpp b/llvm/lib/Support/TargetRegistry.cpp new file mode 100644 index 0000000000000..1f9c3bbf82294 --- /dev/null +++ b/llvm/lib/Support/TargetRegistry.cpp @@ -0,0 +1,134 @@ +//===--- TargetRegistry.cpp - Target registration -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <vector> +using namespace llvm; + +// Clients are responsible for avoid race conditions in registration. +static Target *FirstTarget = nullptr; + +iterator_range<TargetRegistry::iterator> TargetRegistry::targets() { + return make_range(iterator(FirstTarget), iterator()); +} + +const Target *TargetRegistry::lookupTarget(const std::string &ArchName, + Triple &TheTriple, + std::string &Error) { + // Allocate target machine. First, check whether the user has explicitly + // specified an architecture to compile for. If so we have to look it up by + // name, because it might be a backend that has no mapping to a target triple. + const Target *TheTarget = nullptr; + if (!ArchName.empty()) { + auto I = find_if(targets(), + [&](const Target &T) { return ArchName == T.getName(); }); + + if (I == targets().end()) { + Error = "error: invalid target '" + ArchName + "'.\n"; + return nullptr; + } + + TheTarget = &*I; + + // Adjust the triple to match (if known), otherwise stick with the + // given triple. + Triple::ArchType Type = Triple::getArchTypeForLLVMName(ArchName); + if (Type != Triple::UnknownArch) + TheTriple.setArch(Type); + } else { + // Get the target specific parser. + std::string TempError; + TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError); + if (!TheTarget) { + Error = ": error: unable to get target for '" + + TheTriple.getTriple() + + "', see --version and --triple.\n"; + return nullptr; + } + } + + return TheTarget; +} + +const Target *TargetRegistry::lookupTarget(const std::string &TT, + std::string &Error) { + // Provide special warning when no targets are initialized. + if (targets().begin() == targets().end()) { + Error = "Unable to find target for this triple (no targets are registered)"; + return nullptr; + } + Triple::ArchType Arch = Triple(TT).getArch(); + auto ArchMatch = [&](const Target &T) { return T.ArchMatchFn(Arch); }; + auto I = find_if(targets(), ArchMatch); + + if (I == targets().end()) { + Error = "No available targets are compatible with triple \"" + TT + "\""; + return nullptr; + } + + auto J = std::find_if(std::next(I), targets().end(), ArchMatch); + if (J != targets().end()) { + Error = std::string("Cannot choose between targets \"") + I->Name + + "\" and \"" + J->Name + "\""; + return nullptr; + } + + return &*I; +} + +void TargetRegistry::RegisterTarget(Target &T, const char *Name, + const char *ShortDesc, + const char *BackendName, + Target::ArchMatchFnTy ArchMatchFn, + bool HasJIT) { + assert(Name && ShortDesc && ArchMatchFn && + "Missing required target information!"); + + // Check if this target has already been initialized, we allow this as a + // convenience to some clients. + if (T.Name) + return; + + // Add to the list of targets. + T.Next = FirstTarget; + FirstTarget = &T; + + T.Name = Name; + T.ShortDesc = ShortDesc; + T.BackendName = BackendName; + T.ArchMatchFn = ArchMatchFn; + T.HasJIT = HasJIT; +} + +static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS, + const std::pair<StringRef, const Target *> *RHS) { + return LHS->first.compare(RHS->first); +} + +void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { + std::vector<std::pair<StringRef, const Target*> > Targets; + size_t Width = 0; + for (const auto &T : TargetRegistry::targets()) { + Targets.push_back(std::make_pair(T.getName(), &T)); + Width = std::max(Width, Targets.back().first.size()); + } + array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn); + + OS << " Registered Targets:\n"; + for (unsigned i = 0, e = Targets.size(); i != e; ++i) { + OS << " " << Targets[i].first; + OS.indent(Width - Targets[i].first.size()) << " - " + << Targets[i].second->getShortDescription() << '\n'; + } + if (Targets.empty()) + OS << " (none)\n"; +} diff --git a/llvm/lib/Support/ThreadLocal.cpp b/llvm/lib/Support/ThreadLocal.cpp new file mode 100644 index 0000000000000..44e6223cf17b6 --- /dev/null +++ b/llvm/lib/Support/ThreadLocal.cpp @@ -0,0 +1,47 @@ +//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm::sys::ThreadLocal class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ThreadLocal.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Compiler.h" + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +// Define all methods as no-ops if threading is explicitly disabled +namespace llvm { +using namespace sys; +ThreadLocalImpl::ThreadLocalImpl() : data() { } +ThreadLocalImpl::~ThreadLocalImpl() { } +void ThreadLocalImpl::setInstance(const void* d) { + static_assert(sizeof(d) <= sizeof(data), "size too big"); + void **pd = reinterpret_cast<void**>(&data); + *pd = const_cast<void*>(d); +} +void *ThreadLocalImpl::getInstance() { + void **pd = reinterpret_cast<void**>(&data); + return *pd; +} +void ThreadLocalImpl::removeInstance() { + setInstance(nullptr); +} +} +#elif defined(LLVM_ON_UNIX) +#include "Unix/ThreadLocal.inc" +#elif defined( _WIN32) +#include "Windows/ThreadLocal.inc" +#else +#warning Neither LLVM_ON_UNIX nor _WIN32 set in Support/ThreadLocal.cpp +#endif diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp new file mode 100644 index 0000000000000..40982d777914d --- /dev/null +++ b/llvm/lib/Support/ThreadPool.cpp @@ -0,0 +1,145 @@ +//==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a crude C++11 based thread pool. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ThreadPool.h" + +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#if LLVM_ENABLE_THREADS + +// Default to hardware_concurrency +ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {} + +ThreadPool::ThreadPool(unsigned ThreadCount) + : ActiveThreads(0), EnableFlag(true) { + // Create ThreadCount threads that will loop forever, wait on QueueCondition + // for tasks to be queued or the Pool to be destroyed. + Threads.reserve(ThreadCount); + for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) { + Threads.emplace_back([&] { + while (true) { + PackagedTaskTy Task; + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + // Wait for tasks to be pushed in the queue + QueueCondition.wait(LockGuard, + [&] { return !EnableFlag || !Tasks.empty(); }); + // Exit condition + if (!EnableFlag && Tasks.empty()) + return; + // Yeah, we have a task, grab it and release the lock on the queue + + // We first need to signal that we are active before popping the queue + // in order for wait() to properly detect that even if the queue is + // empty, there is still a task in flight. + { + std::unique_lock<std::mutex> LockGuard(CompletionLock); + ++ActiveThreads; + } + Task = std::move(Tasks.front()); + Tasks.pop(); + } + // Run the task we just grabbed + Task(); + + { + // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() + std::unique_lock<std::mutex> LockGuard(CompletionLock); + --ActiveThreads; + } + + // Notify task completion, in case someone waits on ThreadPool::wait() + CompletionCondition.notify_all(); + } + }); + } +} + +void ThreadPool::wait() { + // Wait for all threads to complete and the queue to be empty + std::unique_lock<std::mutex> LockGuard(CompletionLock); + // The order of the checks for ActiveThreads and Tasks.empty() matters because + // any active threads might be modifying the Tasks queue, and this would be a + // race. + CompletionCondition.wait(LockGuard, + [&] { return !ActiveThreads && Tasks.empty(); }); +} + +std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) { + /// Wrap the Task in a packaged_task to return a future object. + PackagedTaskTy PackagedTask(std::move(Task)); + auto Future = PackagedTask.get_future(); + { + // Lock the queue and push the new task + std::unique_lock<std::mutex> LockGuard(QueueLock); + + // Don't allow enqueueing after disabling the pool + assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); + + Tasks.push(std::move(PackagedTask)); + } + QueueCondition.notify_one(); + return Future.share(); +} + +// The destructor joins all threads, waiting for completion. +ThreadPool::~ThreadPool() { + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + EnableFlag = false; + } + QueueCondition.notify_all(); + for (auto &Worker : Threads) + Worker.join(); +} + +#else // LLVM_ENABLE_THREADS Disabled + +ThreadPool::ThreadPool() : ThreadPool(0) {} + +// No threads are launched, issue a warning if ThreadCount is not 0 +ThreadPool::ThreadPool(unsigned ThreadCount) + : ActiveThreads(0) { + if (ThreadCount) { + errs() << "Warning: request a ThreadPool with " << ThreadCount + << " threads, but LLVM_ENABLE_THREADS has been turned off\n"; + } +} + +void ThreadPool::wait() { + // Sequential implementation running the tasks + while (!Tasks.empty()) { + auto Task = std::move(Tasks.front()); + Tasks.pop(); + Task(); + } +} + +std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) { + // Get a Future with launch::deferred execution using std::async + auto Future = std::async(std::launch::deferred, std::move(Task)).share(); + // Wrap the future so that both ThreadPool::wait() can operate and the + // returned future can be sync'ed on. + PackagedTaskTy PackagedTask([Future]() { Future.get(); }); + Tasks.push(std::move(PackagedTask)); + return Future; +} + +ThreadPool::~ThreadPool() { + wait(); +} + +#endif diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp new file mode 100644 index 0000000000000..e5899a60f4dbc --- /dev/null +++ b/llvm/lib/Support/Threading.cpp @@ -0,0 +1,95 @@ +//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines helper functions for running LLVM in a multi-threaded +// environment. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Threading.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Host.h" + +#include <cassert> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +bool llvm::llvm_is_multithreaded() { +#if LLVM_ENABLE_THREADS != 0 + return true; +#else + return false; +#endif +} + +#if LLVM_ENABLE_THREADS == 0 || \ + (!defined(_WIN32) && !defined(HAVE_PTHREAD_H)) +// Support for non-Win32, non-pthread implementation. +void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData, + unsigned RequestedStackSize) { + (void)RequestedStackSize; + Fn(UserData); +} + +unsigned llvm::heavyweight_hardware_concurrency() { return 1; } + +unsigned llvm::hardware_concurrency() { return 1; } + +uint64_t llvm::get_threadid() { return 0; } + +uint32_t llvm::get_max_thread_name_length() { return 0; } + +void llvm::set_thread_name(const Twine &Name) {} + +void llvm::get_thread_name(SmallVectorImpl<char> &Name) { Name.clear(); } + +#else + +#include <thread> +unsigned llvm::heavyweight_hardware_concurrency() { + // Since we can't get here unless LLVM_ENABLE_THREADS == 1, it is safe to use + // `std::thread` directly instead of `llvm::thread` (and indeed, doing so + // allows us to not define `thread` in the llvm namespace, which conflicts + // with some platforms such as FreeBSD whose headers also define a struct + // called `thread` in the global namespace which can cause ambiguity due to + // ADL. + int NumPhysical = sys::getHostNumPhysicalCores(); + if (NumPhysical == -1) + return std::thread::hardware_concurrency(); + return NumPhysical; +} + +unsigned llvm::hardware_concurrency() { +#if defined(HAVE_SCHED_GETAFFINITY) && defined(HAVE_CPU_COUNT) + cpu_set_t Set; + if (sched_getaffinity(0, sizeof(Set), &Set)) + return CPU_COUNT(&Set); +#endif + // Guard against std::thread::hardware_concurrency() returning 0. + if (unsigned Val = std::thread::hardware_concurrency()) + return Val; + return 1; +} + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Threading.inc" +#endif +#ifdef _WIN32 +#include "Windows/Threading.inc" +#endif + +#endif diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp new file mode 100644 index 0000000000000..ca9119e30b65f --- /dev/null +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -0,0 +1,222 @@ +//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements hierarchical time profiler. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TimeProfiler.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include <cassert> +#include <chrono> +#include <string> +#include <vector> + +using namespace std::chrono; + +namespace llvm { + +TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; + +typedef duration<steady_clock::rep, steady_clock::period> DurationType; +typedef time_point<steady_clock> TimePointType; +typedef std::pair<size_t, DurationType> CountAndDurationType; +typedef std::pair<std::string, CountAndDurationType> + NameAndCountAndDurationType; + +struct Entry { + TimePointType Start; + TimePointType End; + std::string Name; + std::string Detail; + + Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) + : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), + Detail(std::move(Dt)){}; + + // Calculate timings for FlameGraph. Cast time points to microsecond precision + // rather than casting duration. This avoid truncation issues causing inner + // scopes overruning outer scopes. + steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { + return (time_point_cast<microseconds>(Start) - + time_point_cast<microseconds>(StartTime)) + .count(); + } + + steady_clock::rep getFlameGraphDurUs() const { + return (time_point_cast<microseconds>(End) - + time_point_cast<microseconds>(Start)) + .count(); + } +}; + +struct TimeTraceProfiler { + TimeTraceProfiler() { + StartTime = steady_clock::now(); + } + + void begin(std::string Name, llvm::function_ref<std::string()> Detail) { + Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), + Detail()); + } + + void end() { + assert(!Stack.empty() && "Must call begin() first"); + auto &E = Stack.back(); + E.End = steady_clock::now(); + + // Check that end times monotonically increase. + assert((Entries.empty() || + (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= + Entries.back().getFlameGraphStartUs(StartTime) + + Entries.back().getFlameGraphDurUs())) && + "TimeProfiler scope ended earlier than previous scope"); + + // Calculate duration at full precision for overall counts. + DurationType Duration = E.End - E.Start; + + // Only include sections longer or equal to TimeTraceGranularity msec. + if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) + Entries.emplace_back(E); + + // Track total time taken by each "name", but only the topmost levels of + // them; e.g. if there's a template instantiation that instantiates other + // templates from within, we only want to add the topmost one. "topmost" + // happens to be the ones that don't have any currently open entries above + // itself. + if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) { + return Val.Name == E.Name; + }) == Stack.rend()) { + auto &CountAndTotal = CountAndTotalPerName[E.Name]; + CountAndTotal.first++; + CountAndTotal.second += Duration; + } + + Stack.pop_back(); + } + + void Write(raw_pwrite_stream &OS) { + assert(Stack.empty() && + "All profiler sections should be ended when calling Write"); + json::OStream J(OS); + J.objectBegin(); + J.attributeBegin("traceEvents"); + J.arrayBegin(); + + // Emit all events for the main flame graph. + for (const auto &E : Entries) { + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); + + J.object([&]{ + J.attribute("pid", 1); + J.attribute("tid", 0); + J.attribute("ph", "X"); + J.attribute("ts", StartUs); + J.attribute("dur", DurUs); + J.attribute("name", E.Name); + J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); + }); + } + + // Emit totals by section name as additional "thread" events, sorted from + // longest one. + int Tid = 1; + std::vector<NameAndCountAndDurationType> SortedTotals; + SortedTotals.reserve(CountAndTotalPerName.size()); + for (const auto &E : CountAndTotalPerName) + SortedTotals.emplace_back(E.getKey(), E.getValue()); + + llvm::sort(SortedTotals.begin(), SortedTotals.end(), + [](const NameAndCountAndDurationType &A, + const NameAndCountAndDurationType &B) { + return A.second.second > B.second.second; + }); + for (const auto &E : SortedTotals) { + auto DurUs = duration_cast<microseconds>(E.second.second).count(); + auto Count = CountAndTotalPerName[E.first].first; + + J.object([&]{ + J.attribute("pid", 1); + J.attribute("tid", Tid); + J.attribute("ph", "X"); + J.attribute("ts", 0); + J.attribute("dur", DurUs); + J.attribute("name", "Total " + E.first); + J.attributeObject("args", [&] { + J.attribute("count", int64_t(Count)); + J.attribute("avg ms", int64_t(DurUs / Count / 1000)); + }); + }); + + ++Tid; + } + + // Emit metadata event with process name. + J.object([&] { + J.attribute("cat", ""); + J.attribute("pid", 1); + J.attribute("tid", 0); + J.attribute("ts", 0); + J.attribute("ph", "M"); + J.attribute("name", "process_name"); + J.attributeObject("args", [&] { J.attribute("name", "clang"); }); + }); + + J.arrayEnd(); + J.attributeEnd(); + J.objectEnd(); + } + + SmallVector<Entry, 16> Stack; + SmallVector<Entry, 128> Entries; + StringMap<CountAndDurationType> CountAndTotalPerName; + TimePointType StartTime; + + // Minimum time granularity (in microseconds) + unsigned TimeTraceGranularity; +}; + +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) { + assert(TimeTraceProfilerInstance == nullptr && + "Profiler should not be initialized"); + TimeTraceProfilerInstance = new TimeTraceProfiler(); + TimeTraceProfilerInstance->TimeTraceGranularity = TimeTraceGranularity; +} + +void timeTraceProfilerCleanup() { + delete TimeTraceProfilerInstance; + TimeTraceProfilerInstance = nullptr; +} + +void timeTraceProfilerWrite(raw_pwrite_stream &OS) { + assert(TimeTraceProfilerInstance != nullptr && + "Profiler object can't be null"); + TimeTraceProfilerInstance->Write(OS); +} + +void timeTraceProfilerBegin(StringRef Name, StringRef Detail) { + if (TimeTraceProfilerInstance != nullptr) + TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; }); +} + +void timeTraceProfilerBegin(StringRef Name, + llvm::function_ref<std::string()> Detail) { + if (TimeTraceProfilerInstance != nullptr) + TimeTraceProfilerInstance->begin(Name, Detail); +} + +void timeTraceProfilerEnd() { + if (TimeTraceProfilerInstance != nullptr) + TimeTraceProfilerInstance->end(); +} + +} // namespace llvm diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp new file mode 100644 index 0000000000000..10c9b8e0b329c --- /dev/null +++ b/llvm/lib/Support/Timer.cpp @@ -0,0 +1,442 @@ +//===-- Timer.cpp - Interval Timing Support -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Interval Timing implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Timer.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signposts.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <limits> + +using namespace llvm; + +// This ugly hack is brought to you courtesy of constructor/destructor ordering +// being unspecified by C++. Basically the problem is that a Statistic object +// gets destroyed, which ends up calling 'GetLibSupportInfoOutputFile()' +// (below), which calls this function. LibSupportInfoOutputFilename used to be +// a global variable, but sometimes it would get destroyed before the Statistic, +// causing havoc to ensue. We "fix" this by creating the string the first time +// it is needed and never destroying it. +static ManagedStatic<std::string> LibSupportInfoOutputFilename; +static std::string &getLibSupportInfoOutputFilename() { + return *LibSupportInfoOutputFilename; +} + +static ManagedStatic<sys::SmartMutex<true> > TimerLock; + +/// Allows llvm::Timer to emit signposts when supported. +static ManagedStatic<SignpostEmitter> Signposts; + +namespace { + static cl::opt<bool> + TrackSpace("track-memory", cl::desc("Enable -time-passes memory " + "tracking (this may be slow)"), + cl::Hidden); + + static cl::opt<std::string, true> + InfoOutputFilename("info-output-file", cl::value_desc("filename"), + cl::desc("File to append -stats and -timer output to"), + cl::Hidden, cl::location(getLibSupportInfoOutputFilename())); +} + +std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() { + const std::string &OutputFilename = getLibSupportInfoOutputFilename(); + if (OutputFilename.empty()) + return std::make_unique<raw_fd_ostream>(2, false); // stderr. + if (OutputFilename == "-") + return std::make_unique<raw_fd_ostream>(1, false); // stdout. + + // Append mode is used because the info output file is opened and closed + // each time -stats or -time-passes wants to print output to it. To + // compensate for this, the test-suite Makefiles have code to delete the + // info output file before running commands which write to it. + std::error_code EC; + auto Result = std::make_unique<raw_fd_ostream>( + OutputFilename, EC, sys::fs::OF_Append | sys::fs::OF_Text); + if (!EC) + return Result; + + errs() << "Error opening info-output-file '" + << OutputFilename << " for appending!\n"; + return std::make_unique<raw_fd_ostream>(2, false); // stderr. +} + +namespace { +struct CreateDefaultTimerGroup { + static void *call() { + return new TimerGroup("misc", "Miscellaneous Ungrouped Timers"); + } +}; +} // namespace +static ManagedStatic<TimerGroup, CreateDefaultTimerGroup> DefaultTimerGroup; +static TimerGroup *getDefaultTimerGroup() { return &*DefaultTimerGroup; } + +//===----------------------------------------------------------------------===// +// Timer Implementation +//===----------------------------------------------------------------------===// + +void Timer::init(StringRef Name, StringRef Description) { + init(Name, Description, *getDefaultTimerGroup()); +} + +void Timer::init(StringRef Name, StringRef Description, TimerGroup &tg) { + assert(!TG && "Timer already initialized"); + this->Name.assign(Name.begin(), Name.end()); + this->Description.assign(Description.begin(), Description.end()); + Running = Triggered = false; + TG = &tg; + TG->addTimer(*this); +} + +Timer::~Timer() { + if (!TG) return; // Never initialized, or already cleared. + TG->removeTimer(*this); +} + +static inline size_t getMemUsage() { + if (!TrackSpace) return 0; + return sys::Process::GetMallocUsage(); +} + +TimeRecord TimeRecord::getCurrentTime(bool Start) { + using Seconds = std::chrono::duration<double, std::ratio<1>>; + TimeRecord Result; + sys::TimePoint<> now; + std::chrono::nanoseconds user, sys; + + if (Start) { + Result.MemUsed = getMemUsage(); + sys::Process::GetTimeUsage(now, user, sys); + } else { + sys::Process::GetTimeUsage(now, user, sys); + Result.MemUsed = getMemUsage(); + } + + Result.WallTime = Seconds(now.time_since_epoch()).count(); + Result.UserTime = Seconds(user).count(); + Result.SystemTime = Seconds(sys).count(); + return Result; +} + +void Timer::startTimer() { + assert(!Running && "Cannot start a running timer"); + Running = Triggered = true; + Signposts->startTimerInterval(this); + StartTime = TimeRecord::getCurrentTime(true); +} + +void Timer::stopTimer() { + assert(Running && "Cannot stop a paused timer"); + Running = false; + Time += TimeRecord::getCurrentTime(false); + Time -= StartTime; + Signposts->endTimerInterval(this); +} + +void Timer::clear() { + Running = Triggered = false; + Time = StartTime = TimeRecord(); +} + +static void printVal(double Val, double Total, raw_ostream &OS) { + if (Total < 1e-7) // Avoid dividing by zero. + OS << " ----- "; + else + OS << format(" %7.4f (%5.1f%%)", Val, Val*100/Total); +} + +void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const { + if (Total.getUserTime()) + printVal(getUserTime(), Total.getUserTime(), OS); + if (Total.getSystemTime()) + printVal(getSystemTime(), Total.getSystemTime(), OS); + if (Total.getProcessTime()) + printVal(getProcessTime(), Total.getProcessTime(), OS); + printVal(getWallTime(), Total.getWallTime(), OS); + + OS << " "; + + if (Total.getMemUsed()) + OS << format("%9" PRId64 " ", (int64_t)getMemUsed()); +} + + +//===----------------------------------------------------------------------===// +// NamedRegionTimer Implementation +//===----------------------------------------------------------------------===// + +namespace { + +typedef StringMap<Timer> Name2TimerMap; + +class Name2PairMap { + StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map; +public: + ~Name2PairMap() { + for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator + I = Map.begin(), E = Map.end(); I != E; ++I) + delete I->second.first; + } + + Timer &get(StringRef Name, StringRef Description, StringRef GroupName, + StringRef GroupDescription) { + sys::SmartScopedLock<true> L(*TimerLock); + + std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName]; + + if (!GroupEntry.first) + GroupEntry.first = new TimerGroup(GroupName, GroupDescription); + + Timer &T = GroupEntry.second[Name]; + if (!T.isInitialized()) + T.init(Name, Description, *GroupEntry.first); + return T; + } +}; + +} + +static ManagedStatic<Name2PairMap> NamedGroupedTimers; + +NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef Description, + StringRef GroupName, + StringRef GroupDescription, bool Enabled) + : TimeRegion(!Enabled ? nullptr + : &NamedGroupedTimers->get(Name, Description, GroupName, + GroupDescription)) {} + +//===----------------------------------------------------------------------===// +// TimerGroup Implementation +//===----------------------------------------------------------------------===// + +/// This is the global list of TimerGroups, maintained by the TimerGroup +/// ctor/dtor and is protected by the TimerLock lock. +static TimerGroup *TimerGroupList = nullptr; + +TimerGroup::TimerGroup(StringRef Name, StringRef Description) + : Name(Name.begin(), Name.end()), + Description(Description.begin(), Description.end()) { + // Add the group to TimerGroupList. + sys::SmartScopedLock<true> L(*TimerLock); + if (TimerGroupList) + TimerGroupList->Prev = &Next; + Next = TimerGroupList; + Prev = &TimerGroupList; + TimerGroupList = this; +} + +TimerGroup::TimerGroup(StringRef Name, StringRef Description, + const StringMap<TimeRecord> &Records) + : TimerGroup(Name, Description) { + TimersToPrint.reserve(Records.size()); + for (const auto &P : Records) + TimersToPrint.emplace_back(P.getValue(), P.getKey(), P.getKey()); + assert(TimersToPrint.size() == Records.size() && "Size mismatch"); +} + +TimerGroup::~TimerGroup() { + // If the timer group is destroyed before the timers it owns, accumulate and + // print the timing data. + while (FirstTimer) + removeTimer(*FirstTimer); + + // Remove the group from the TimerGroupList. + sys::SmartScopedLock<true> L(*TimerLock); + *Prev = Next; + if (Next) + Next->Prev = Prev; +} + + +void TimerGroup::removeTimer(Timer &T) { + sys::SmartScopedLock<true> L(*TimerLock); + + // If the timer was started, move its data to TimersToPrint. + if (T.hasTriggered()) + TimersToPrint.emplace_back(T.Time, T.Name, T.Description); + + T.TG = nullptr; + + // Unlink the timer from our list. + *T.Prev = T.Next; + if (T.Next) + T.Next->Prev = T.Prev; + + // Print the report when all timers in this group are destroyed if some of + // them were started. + if (FirstTimer || TimersToPrint.empty()) + return; + + std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile(); + PrintQueuedTimers(*OutStream); +} + +void TimerGroup::addTimer(Timer &T) { + sys::SmartScopedLock<true> L(*TimerLock); + + // Add the timer to our list. + if (FirstTimer) + FirstTimer->Prev = &T.Next; + T.Next = FirstTimer; + T.Prev = &FirstTimer; + FirstTimer = &T; +} + +void TimerGroup::PrintQueuedTimers(raw_ostream &OS) { + // Sort the timers in descending order by amount of time taken. + llvm::sort(TimersToPrint); + + TimeRecord Total; + for (const PrintRecord &Record : TimersToPrint) + Total += Record.Time; + + // Print out timing header. + OS << "===" << std::string(73, '-') << "===\n"; + // Figure out how many spaces to indent TimerGroup name. + unsigned Padding = (80-Description.length())/2; + if (Padding > 80) Padding = 0; // Don't allow "negative" numbers + OS.indent(Padding) << Description << '\n'; + OS << "===" << std::string(73, '-') << "===\n"; + + // If this is not an collection of ungrouped times, print the total time. + // Ungrouped timers don't really make sense to add up. We still print the + // TOTAL line to make the percentages make sense. + if (this != getDefaultTimerGroup()) + OS << format(" Total Execution Time: %5.4f seconds (%5.4f wall clock)\n", + Total.getProcessTime(), Total.getWallTime()); + OS << '\n'; + + if (Total.getUserTime()) + OS << " ---User Time---"; + if (Total.getSystemTime()) + OS << " --System Time--"; + if (Total.getProcessTime()) + OS << " --User+System--"; + OS << " ---Wall Time---"; + if (Total.getMemUsed()) + OS << " ---Mem---"; + OS << " --- Name ---\n"; + + // Loop through all of the timing data, printing it out. + for (const PrintRecord &Record : make_range(TimersToPrint.rbegin(), + TimersToPrint.rend())) { + Record.Time.print(Total, OS); + OS << Record.Description << '\n'; + } + + Total.print(Total, OS); + OS << "Total\n\n"; + OS.flush(); + + TimersToPrint.clear(); +} + +void TimerGroup::prepareToPrintList(bool ResetTime) { + // See if any of our timers were started, if so add them to TimersToPrint. + for (Timer *T = FirstTimer; T; T = T->Next) { + if (!T->hasTriggered()) continue; + bool WasRunning = T->isRunning(); + if (WasRunning) + T->stopTimer(); + + TimersToPrint.emplace_back(T->Time, T->Name, T->Description); + + if (ResetTime) + T->clear(); + + if (WasRunning) + T->startTimer(); + } +} + +void TimerGroup::print(raw_ostream &OS, bool ResetAfterPrint) { + { + // After preparing the timers we can free the lock + sys::SmartScopedLock<true> L(*TimerLock); + prepareToPrintList(ResetAfterPrint); + } + + // If any timers were started, print the group. + if (!TimersToPrint.empty()) + PrintQueuedTimers(OS); +} + +void TimerGroup::clear() { + sys::SmartScopedLock<true> L(*TimerLock); + for (Timer *T = FirstTimer; T; T = T->Next) + T->clear(); +} + +void TimerGroup::printAll(raw_ostream &OS) { + sys::SmartScopedLock<true> L(*TimerLock); + + for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next) + TG->print(OS); +} + +void TimerGroup::clearAll() { + sys::SmartScopedLock<true> L(*TimerLock); + for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next) + TG->clear(); +} + +void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R, + const char *suffix, double Value) { + assert(yaml::needsQuotes(Name) == yaml::QuotingType::None && + "TimerGroup name should not need quotes"); + assert(yaml::needsQuotes(R.Name) == yaml::QuotingType::None && + "Timer name should not need quotes"); + constexpr auto max_digits10 = std::numeric_limits<double>::max_digits10; + OS << "\t\"time." << Name << '.' << R.Name << suffix + << "\": " << format("%.*e", max_digits10 - 1, Value); +} + +const char *TimerGroup::printJSONValues(raw_ostream &OS, const char *delim) { + sys::SmartScopedLock<true> L(*TimerLock); + + prepareToPrintList(false); + for (const PrintRecord &R : TimersToPrint) { + OS << delim; + delim = ",\n"; + + const TimeRecord &T = R.Time; + printJSONValue(OS, R, ".wall", T.getWallTime()); + OS << delim; + printJSONValue(OS, R, ".user", T.getUserTime()); + OS << delim; + printJSONValue(OS, R, ".sys", T.getSystemTime()); + if (T.getMemUsed()) { + OS << delim; + printJSONValue(OS, R, ".mem", T.getMemUsed()); + } + } + TimersToPrint.clear(); + return delim; +} + +const char *TimerGroup::printAllJSONValues(raw_ostream &OS, const char *delim) { + sys::SmartScopedLock<true> L(*TimerLock); + for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next) + delim = TG->printJSONValues(OS, delim); + return delim; +} + +void TimerGroup::ConstructTimerLists() { + (void)*NamedGroupedTimers; +} diff --git a/llvm/lib/Support/ToolOutputFile.cpp b/llvm/lib/Support/ToolOutputFile.cpp new file mode 100644 index 0000000000000..ed3a247f01155 --- /dev/null +++ b/llvm/lib/Support/ToolOutputFile.cpp @@ -0,0 +1,45 @@ +//===--- ToolOutputFile.cpp - Implement the ToolOutputFile class --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the ToolOutputFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Signals.h" +using namespace llvm; + +ToolOutputFile::CleanupInstaller::CleanupInstaller(StringRef Filename) + : Filename(Filename), Keep(false) { + // Arrange for the file to be deleted if the process is killed. + if (Filename != "-") + sys::RemoveFileOnSignal(Filename); +} + +ToolOutputFile::CleanupInstaller::~CleanupInstaller() { + // Delete the file if the client hasn't told us not to. + if (!Keep && Filename != "-") + sys::fs::remove(Filename); + + // Ok, the file is successfully written and closed, or deleted. There's no + // further need to clean it up on signals. + if (Filename != "-") + sys::DontRemoveFileOnSignal(Filename); +} + +ToolOutputFile::ToolOutputFile(StringRef Filename, std::error_code &EC, + sys::fs::OpenFlags Flags) + : Installer(Filename), OS(Filename, EC, Flags) { + // If open fails, no cleanup is needed. + if (EC) + Installer.Keep = true; +} + +ToolOutputFile::ToolOutputFile(StringRef Filename, int FD) + : Installer(Filename), OS(FD, true) {} diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp new file mode 100644 index 0000000000000..94810b56db8ec --- /dev/null +++ b/llvm/lib/Support/TrigramIndex.cpp @@ -0,0 +1,110 @@ +//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TrigramIndex implements a heuristic for SpecialCaseList that allows to +// filter out ~99% incoming queries when all regular expressions in the +// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more +// complicated, the check is defeated and it will always pass the queries to a +// full regex. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TrigramIndex.h" +#include "llvm/ADT/SmallVector.h" + +#include <set> +#include <string> +#include <unordered_map> + +using namespace llvm; + +static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}"; + +static bool isAdvancedMetachar(unsigned Char) { + return strchr(RegexAdvancedMetachars, Char) != nullptr; +} + +void TrigramIndex::insert(std::string Regex) { + if (Defeated) return; + std::set<unsigned> Was; + unsigned Cnt = 0; + unsigned Tri = 0; + unsigned Len = 0; + bool Escaped = false; + for (unsigned Char : Regex) { + if (!Escaped) { + // Regular expressions allow escaping symbols by preceding it with '\'. + if (Char == '\\') { + Escaped = true; + continue; + } + if (isAdvancedMetachar(Char)) { + // This is a more complicated regex than we can handle here. + Defeated = true; + return; + } + if (Char == '.' || Char == '*') { + Tri = 0; + Len = 0; + continue; + } + } + if (Escaped && Char >= '1' && Char <= '9') { + Defeated = true; + return; + } + // We have already handled escaping and can reset the flag. + Escaped = false; + Tri = ((Tri << 8) + Char) & 0xFFFFFF; + Len++; + if (Len < 3) + continue; + // We don't want the index to grow too much for the popular trigrams, + // as they are weak signals. It's ok to still require them for the + // rules we have already processed. It's just a small additional + // computational cost. + if (Index[Tri].size() >= 4) + continue; + Cnt++; + if (!Was.count(Tri)) { + // Adding the current rule to the index. + Index[Tri].push_back(Counts.size()); + Was.insert(Tri); + } + } + if (!Cnt) { + // This rule does not have remarkable trigrams to rely on. + // We have to always call the full regex chain. + Defeated = true; + return; + } + Counts.push_back(Cnt); +} + +bool TrigramIndex::isDefinitelyOut(StringRef Query) const { + if (Defeated) + return false; + std::vector<unsigned> CurCounts(Counts.size()); + unsigned Tri = 0; + for (size_t I = 0; I < Query.size(); I++) { + Tri = ((Tri << 8) + Query[I]) & 0xFFFFFF; + if (I < 2) + continue; + const auto &II = Index.find(Tri); + if (II == Index.end()) + continue; + for (size_t J : II->second) { + CurCounts[J]++; + // If we have reached a desired limit, we have to look at the query + // more closely by running a full regex. + if (CurCounts[J] >= Counts[J]) + return false; + } + } + return true; +} diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp new file mode 100644 index 0000000000000..d419463e6a5e6 --- /dev/null +++ b/llvm/lib/Support/Triple.cpp @@ -0,0 +1,1656 @@ +//===--- Triple.cpp - Target triple helper class --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetParser.h" +#include <cstring> +using namespace llvm; + +StringRef Triple::getArchTypeName(ArchType Kind) { + switch (Kind) { + case UnknownArch: return "unknown"; + + case aarch64: return "aarch64"; + case aarch64_be: return "aarch64_be"; + case aarch64_32: return "aarch64_32"; + case arm: return "arm"; + case armeb: return "armeb"; + case arc: return "arc"; + case avr: return "avr"; + case bpfel: return "bpfel"; + case bpfeb: return "bpfeb"; + case hexagon: return "hexagon"; + case mips: return "mips"; + case mipsel: return "mipsel"; + case mips64: return "mips64"; + case mips64el: return "mips64el"; + case msp430: return "msp430"; + case ppc64: return "powerpc64"; + case ppc64le: return "powerpc64le"; + case ppc: return "powerpc"; + case r600: return "r600"; + case amdgcn: return "amdgcn"; + case riscv32: return "riscv32"; + case riscv64: return "riscv64"; + case sparc: return "sparc"; + case sparcv9: return "sparcv9"; + case sparcel: return "sparcel"; + case systemz: return "s390x"; + case tce: return "tce"; + case tcele: return "tcele"; + case thumb: return "thumb"; + case thumbeb: return "thumbeb"; + case x86: return "i386"; + case x86_64: return "x86_64"; + case xcore: return "xcore"; + case nvptx: return "nvptx"; + case nvptx64: return "nvptx64"; + case le32: return "le32"; + case le64: return "le64"; + case amdil: return "amdil"; + case amdil64: return "amdil64"; + case hsail: return "hsail"; + case hsail64: return "hsail64"; + case spir: return "spir"; + case spir64: return "spir64"; + case kalimba: return "kalimba"; + case lanai: return "lanai"; + case shave: return "shave"; + case wasm32: return "wasm32"; + case wasm64: return "wasm64"; + case renderscript32: return "renderscript32"; + case renderscript64: return "renderscript64"; + } + + llvm_unreachable("Invalid ArchType!"); +} + +StringRef Triple::getArchTypePrefix(ArchType Kind) { + switch (Kind) { + default: + return StringRef(); + + case aarch64: + case aarch64_be: + case aarch64_32: return "aarch64"; + + case arc: return "arc"; + + case arm: + case armeb: + case thumb: + case thumbeb: return "arm"; + + case avr: return "avr"; + + case ppc64: + case ppc64le: + case ppc: return "ppc"; + + case mips: + case mipsel: + case mips64: + case mips64el: return "mips"; + + case hexagon: return "hexagon"; + + case amdgcn: return "amdgcn"; + case r600: return "r600"; + + case bpfel: + case bpfeb: return "bpf"; + + case sparcv9: + case sparcel: + case sparc: return "sparc"; + + case systemz: return "s390"; + + case x86: + case x86_64: return "x86"; + + case xcore: return "xcore"; + + // NVPTX intrinsics are namespaced under nvvm. + case nvptx: return "nvvm"; + case nvptx64: return "nvvm"; + + case le32: return "le32"; + case le64: return "le64"; + + case amdil: + case amdil64: return "amdil"; + + case hsail: + case hsail64: return "hsail"; + + case spir: + case spir64: return "spir"; + case kalimba: return "kalimba"; + case lanai: return "lanai"; + case shave: return "shave"; + case wasm32: + case wasm64: return "wasm"; + + case riscv32: + case riscv64: return "riscv"; + } +} + +StringRef Triple::getVendorTypeName(VendorType Kind) { + switch (Kind) { + case UnknownVendor: return "unknown"; + + case Apple: return "apple"; + case PC: return "pc"; + case SCEI: return "scei"; + case BGP: return "bgp"; + case BGQ: return "bgq"; + case Freescale: return "fsl"; + case IBM: return "ibm"; + case ImaginationTechnologies: return "img"; + case MipsTechnologies: return "mti"; + case NVIDIA: return "nvidia"; + case CSR: return "csr"; + case Myriad: return "myriad"; + case AMD: return "amd"; + case Mesa: return "mesa"; + case SUSE: return "suse"; + case OpenEmbedded: return "oe"; + } + + llvm_unreachable("Invalid VendorType!"); +} + +StringRef Triple::getOSTypeName(OSType Kind) { + switch (Kind) { + case UnknownOS: return "unknown"; + + case Ananas: return "ananas"; + case CloudABI: return "cloudabi"; + case Darwin: return "darwin"; + case DragonFly: return "dragonfly"; + case FreeBSD: return "freebsd"; + case Fuchsia: return "fuchsia"; + case IOS: return "ios"; + case KFreeBSD: return "kfreebsd"; + case Linux: return "linux"; + case Lv2: return "lv2"; + case MacOSX: return "macosx"; + case NetBSD: return "netbsd"; + case OpenBSD: return "openbsd"; + case Solaris: return "solaris"; + case Win32: return "windows"; + case Haiku: return "haiku"; + case Minix: return "minix"; + case RTEMS: return "rtems"; + case NaCl: return "nacl"; + case CNK: return "cnk"; + case AIX: return "aix"; + case CUDA: return "cuda"; + case NVCL: return "nvcl"; + case AMDHSA: return "amdhsa"; + case PS4: return "ps4"; + case ELFIAMCU: return "elfiamcu"; + case TvOS: return "tvos"; + case WatchOS: return "watchos"; + case Mesa3D: return "mesa3d"; + case Contiki: return "contiki"; + case AMDPAL: return "amdpal"; + case HermitCore: return "hermit"; + case Hurd: return "hurd"; + case WASI: return "wasi"; + case Emscripten: return "emscripten"; + } + + llvm_unreachable("Invalid OSType"); +} + +StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { + switch (Kind) { + case UnknownEnvironment: return "unknown"; + case GNU: return "gnu"; + case GNUABIN32: return "gnuabin32"; + case GNUABI64: return "gnuabi64"; + case GNUEABIHF: return "gnueabihf"; + case GNUEABI: return "gnueabi"; + case GNUX32: return "gnux32"; + case CODE16: return "code16"; + case EABI: return "eabi"; + case EABIHF: return "eabihf"; + case ELFv1: return "elfv1"; + case ELFv2: return "elfv2"; + case Android: return "android"; + case Musl: return "musl"; + case MuslEABI: return "musleabi"; + case MuslEABIHF: return "musleabihf"; + case MSVC: return "msvc"; + case Itanium: return "itanium"; + case Cygnus: return "cygnus"; + case CoreCLR: return "coreclr"; + case Simulator: return "simulator"; + case MacABI: return "macabi"; + } + + llvm_unreachable("Invalid EnvironmentType!"); +} + +static Triple::ArchType parseBPFArch(StringRef ArchName) { + if (ArchName.equals("bpf")) { + if (sys::IsLittleEndianHost) + return Triple::bpfel; + else + return Triple::bpfeb; + } else if (ArchName.equals("bpf_be") || ArchName.equals("bpfeb")) { + return Triple::bpfeb; + } else if (ArchName.equals("bpf_le") || ArchName.equals("bpfel")) { + return Triple::bpfel; + } else { + return Triple::UnknownArch; + } +} + +Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { + Triple::ArchType BPFArch(parseBPFArch(Name)); + return StringSwitch<Triple::ArchType>(Name) + .Case("aarch64", aarch64) + .Case("aarch64_be", aarch64_be) + .Case("aarch64_32", aarch64_32) + .Case("arc", arc) + .Case("arm64", aarch64) // "arm64" is an alias for "aarch64" + .Case("arm64_32", aarch64_32) + .Case("arm", arm) + .Case("armeb", armeb) + .Case("avr", avr) + .StartsWith("bpf", BPFArch) + .Case("mips", mips) + .Case("mipsel", mipsel) + .Case("mips64", mips64) + .Case("mips64el", mips64el) + .Case("msp430", msp430) + .Case("ppc64", ppc64) + .Case("ppc32", ppc) + .Case("ppc", ppc) + .Case("ppc64le", ppc64le) + .Case("r600", r600) + .Case("amdgcn", amdgcn) + .Case("riscv32", riscv32) + .Case("riscv64", riscv64) + .Case("hexagon", hexagon) + .Case("sparc", sparc) + .Case("sparcel", sparcel) + .Case("sparcv9", sparcv9) + .Case("systemz", systemz) + .Case("tce", tce) + .Case("tcele", tcele) + .Case("thumb", thumb) + .Case("thumbeb", thumbeb) + .Case("x86", x86) + .Case("x86-64", x86_64) + .Case("xcore", xcore) + .Case("nvptx", nvptx) + .Case("nvptx64", nvptx64) + .Case("le32", le32) + .Case("le64", le64) + .Case("amdil", amdil) + .Case("amdil64", amdil64) + .Case("hsail", hsail) + .Case("hsail64", hsail64) + .Case("spir", spir) + .Case("spir64", spir64) + .Case("kalimba", kalimba) + .Case("lanai", lanai) + .Case("shave", shave) + .Case("wasm32", wasm32) + .Case("wasm64", wasm64) + .Case("renderscript32", renderscript32) + .Case("renderscript64", renderscript64) + .Default(UnknownArch); +} + +static Triple::ArchType parseARMArch(StringRef ArchName) { + ARM::ISAKind ISA = ARM::parseArchISA(ArchName); + ARM::EndianKind ENDIAN = ARM::parseArchEndian(ArchName); + + Triple::ArchType arch = Triple::UnknownArch; + switch (ENDIAN) { + case ARM::EndianKind::LITTLE: { + switch (ISA) { + case ARM::ISAKind::ARM: + arch = Triple::arm; + break; + case ARM::ISAKind::THUMB: + arch = Triple::thumb; + break; + case ARM::ISAKind::AARCH64: + arch = Triple::aarch64; + break; + case ARM::ISAKind::INVALID: + break; + } + break; + } + case ARM::EndianKind::BIG: { + switch (ISA) { + case ARM::ISAKind::ARM: + arch = Triple::armeb; + break; + case ARM::ISAKind::THUMB: + arch = Triple::thumbeb; + break; + case ARM::ISAKind::AARCH64: + arch = Triple::aarch64_be; + break; + case ARM::ISAKind::INVALID: + break; + } + break; + } + case ARM::EndianKind::INVALID: { + break; + } + } + + ArchName = ARM::getCanonicalArchName(ArchName); + if (ArchName.empty()) + return Triple::UnknownArch; + + // Thumb only exists in v4+ + if (ISA == ARM::ISAKind::THUMB && + (ArchName.startswith("v2") || ArchName.startswith("v3"))) + return Triple::UnknownArch; + + // Thumb only for v6m + ARM::ProfileKind Profile = ARM::parseArchProfile(ArchName); + unsigned Version = ARM::parseArchVersion(ArchName); + if (Profile == ARM::ProfileKind::M && Version == 6) { + if (ENDIAN == ARM::EndianKind::BIG) + return Triple::thumbeb; + else + return Triple::thumb; + } + + return arch; +} + +static Triple::ArchType parseArch(StringRef ArchName) { + auto AT = StringSwitch<Triple::ArchType>(ArchName) + .Cases("i386", "i486", "i586", "i686", Triple::x86) + // FIXME: Do we need to support these? + .Cases("i786", "i886", "i986", Triple::x86) + .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) + .Cases("powerpc", "ppc", "ppc32", Triple::ppc) + .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64) + .Cases("powerpc64le", "ppc64le", Triple::ppc64le) + .Case("xscale", Triple::arm) + .Case("xscaleeb", Triple::armeb) + .Case("aarch64", Triple::aarch64) + .Case("aarch64_be", Triple::aarch64_be) + .Case("aarch64_32", Triple::aarch64_32) + .Case("arc", Triple::arc) + .Case("arm64", Triple::aarch64) + .Case("arm64_32", Triple::aarch64_32) + .Case("arm", Triple::arm) + .Case("armeb", Triple::armeb) + .Case("thumb", Triple::thumb) + .Case("thumbeb", Triple::thumbeb) + .Case("avr", Triple::avr) + .Case("msp430", Triple::msp430) + .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6", + "mipsr6", Triple::mips) + .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el", + Triple::mipsel) + .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6", + "mips64r6", "mipsn32r6", Triple::mips64) + .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el", + "mipsn32r6el", Triple::mips64el) + .Case("r600", Triple::r600) + .Case("amdgcn", Triple::amdgcn) + .Case("riscv32", Triple::riscv32) + .Case("riscv64", Triple::riscv64) + .Case("hexagon", Triple::hexagon) + .Cases("s390x", "systemz", Triple::systemz) + .Case("sparc", Triple::sparc) + .Case("sparcel", Triple::sparcel) + .Cases("sparcv9", "sparc64", Triple::sparcv9) + .Case("tce", Triple::tce) + .Case("tcele", Triple::tcele) + .Case("xcore", Triple::xcore) + .Case("nvptx", Triple::nvptx) + .Case("nvptx64", Triple::nvptx64) + .Case("le32", Triple::le32) + .Case("le64", Triple::le64) + .Case("amdil", Triple::amdil) + .Case("amdil64", Triple::amdil64) + .Case("hsail", Triple::hsail) + .Case("hsail64", Triple::hsail64) + .Case("spir", Triple::spir) + .Case("spir64", Triple::spir64) + .StartsWith("kalimba", Triple::kalimba) + .Case("lanai", Triple::lanai) + .Case("shave", Triple::shave) + .Case("wasm32", Triple::wasm32) + .Case("wasm64", Triple::wasm64) + .Case("renderscript32", Triple::renderscript32) + .Case("renderscript64", Triple::renderscript64) + .Default(Triple::UnknownArch); + + // Some architectures require special parsing logic just to compute the + // ArchType result. + if (AT == Triple::UnknownArch) { + if (ArchName.startswith("arm") || ArchName.startswith("thumb") || + ArchName.startswith("aarch64")) + return parseARMArch(ArchName); + if (ArchName.startswith("bpf")) + return parseBPFArch(ArchName); + } + + return AT; +} + +static Triple::VendorType parseVendor(StringRef VendorName) { + return StringSwitch<Triple::VendorType>(VendorName) + .Case("apple", Triple::Apple) + .Case("pc", Triple::PC) + .Case("scei", Triple::SCEI) + .Case("bgp", Triple::BGP) + .Case("bgq", Triple::BGQ) + .Case("fsl", Triple::Freescale) + .Case("ibm", Triple::IBM) + .Case("img", Triple::ImaginationTechnologies) + .Case("mti", Triple::MipsTechnologies) + .Case("nvidia", Triple::NVIDIA) + .Case("csr", Triple::CSR) + .Case("myriad", Triple::Myriad) + .Case("amd", Triple::AMD) + .Case("mesa", Triple::Mesa) + .Case("suse", Triple::SUSE) + .Case("oe", Triple::OpenEmbedded) + .Default(Triple::UnknownVendor); +} + +static Triple::OSType parseOS(StringRef OSName) { + return StringSwitch<Triple::OSType>(OSName) + .StartsWith("ananas", Triple::Ananas) + .StartsWith("cloudabi", Triple::CloudABI) + .StartsWith("darwin", Triple::Darwin) + .StartsWith("dragonfly", Triple::DragonFly) + .StartsWith("freebsd", Triple::FreeBSD) + .StartsWith("fuchsia", Triple::Fuchsia) + .StartsWith("ios", Triple::IOS) + .StartsWith("kfreebsd", Triple::KFreeBSD) + .StartsWith("linux", Triple::Linux) + .StartsWith("lv2", Triple::Lv2) + .StartsWith("macos", Triple::MacOSX) + .StartsWith("netbsd", Triple::NetBSD) + .StartsWith("openbsd", Triple::OpenBSD) + .StartsWith("solaris", Triple::Solaris) + .StartsWith("win32", Triple::Win32) + .StartsWith("windows", Triple::Win32) + .StartsWith("haiku", Triple::Haiku) + .StartsWith("minix", Triple::Minix) + .StartsWith("rtems", Triple::RTEMS) + .StartsWith("nacl", Triple::NaCl) + .StartsWith("cnk", Triple::CNK) + .StartsWith("aix", Triple::AIX) + .StartsWith("cuda", Triple::CUDA) + .StartsWith("nvcl", Triple::NVCL) + .StartsWith("amdhsa", Triple::AMDHSA) + .StartsWith("ps4", Triple::PS4) + .StartsWith("elfiamcu", Triple::ELFIAMCU) + .StartsWith("tvos", Triple::TvOS) + .StartsWith("watchos", Triple::WatchOS) + .StartsWith("mesa3d", Triple::Mesa3D) + .StartsWith("contiki", Triple::Contiki) + .StartsWith("amdpal", Triple::AMDPAL) + .StartsWith("hermit", Triple::HermitCore) + .StartsWith("hurd", Triple::Hurd) + .StartsWith("wasi", Triple::WASI) + .StartsWith("emscripten", Triple::Emscripten) + .Default(Triple::UnknownOS); +} + +static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { + return StringSwitch<Triple::EnvironmentType>(EnvironmentName) + .StartsWith("eabihf", Triple::EABIHF) + .StartsWith("eabi", Triple::EABI) + .StartsWith("elfv1", Triple::ELFv1) + .StartsWith("elfv2", Triple::ELFv2) + .StartsWith("gnuabin32", Triple::GNUABIN32) + .StartsWith("gnuabi64", Triple::GNUABI64) + .StartsWith("gnueabihf", Triple::GNUEABIHF) + .StartsWith("gnueabi", Triple::GNUEABI) + .StartsWith("gnux32", Triple::GNUX32) + .StartsWith("code16", Triple::CODE16) + .StartsWith("gnu", Triple::GNU) + .StartsWith("android", Triple::Android) + .StartsWith("musleabihf", Triple::MuslEABIHF) + .StartsWith("musleabi", Triple::MuslEABI) + .StartsWith("musl", Triple::Musl) + .StartsWith("msvc", Triple::MSVC) + .StartsWith("itanium", Triple::Itanium) + .StartsWith("cygnus", Triple::Cygnus) + .StartsWith("coreclr", Triple::CoreCLR) + .StartsWith("simulator", Triple::Simulator) + .StartsWith("macabi", Triple::MacABI) + .Default(Triple::UnknownEnvironment); +} + +static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) { + return StringSwitch<Triple::ObjectFormatType>(EnvironmentName) + // "xcoff" must come before "coff" because of the order-dependendent + // pattern matching. + .EndsWith("xcoff", Triple::XCOFF) + .EndsWith("coff", Triple::COFF) + .EndsWith("elf", Triple::ELF) + .EndsWith("macho", Triple::MachO) + .EndsWith("wasm", Triple::Wasm) + .Default(Triple::UnknownObjectFormat); +} + +static Triple::SubArchType parseSubArch(StringRef SubArchName) { + if (SubArchName.startswith("mips") && + (SubArchName.endswith("r6el") || SubArchName.endswith("r6"))) + return Triple::MipsSubArch_r6; + + StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); + + // For now, this is the small part. Early return. + if (ARMSubArch.empty()) + return StringSwitch<Triple::SubArchType>(SubArchName) + .EndsWith("kalimba3", Triple::KalimbaSubArch_v3) + .EndsWith("kalimba4", Triple::KalimbaSubArch_v4) + .EndsWith("kalimba5", Triple::KalimbaSubArch_v5) + .Default(Triple::NoSubArch); + + // ARM sub arch. + switch(ARM::parseArch(ARMSubArch)) { + case ARM::ArchKind::ARMV4: + return Triple::NoSubArch; + case ARM::ArchKind::ARMV4T: + return Triple::ARMSubArch_v4t; + case ARM::ArchKind::ARMV5T: + return Triple::ARMSubArch_v5; + case ARM::ArchKind::ARMV5TE: + case ARM::ArchKind::IWMMXT: + case ARM::ArchKind::IWMMXT2: + case ARM::ArchKind::XSCALE: + case ARM::ArchKind::ARMV5TEJ: + return Triple::ARMSubArch_v5te; + case ARM::ArchKind::ARMV6: + return Triple::ARMSubArch_v6; + case ARM::ArchKind::ARMV6K: + case ARM::ArchKind::ARMV6KZ: + return Triple::ARMSubArch_v6k; + case ARM::ArchKind::ARMV6T2: + return Triple::ARMSubArch_v6t2; + case ARM::ArchKind::ARMV6M: + return Triple::ARMSubArch_v6m; + case ARM::ArchKind::ARMV7A: + case ARM::ArchKind::ARMV7R: + return Triple::ARMSubArch_v7; + case ARM::ArchKind::ARMV7VE: + return Triple::ARMSubArch_v7ve; + case ARM::ArchKind::ARMV7K: + return Triple::ARMSubArch_v7k; + case ARM::ArchKind::ARMV7M: + return Triple::ARMSubArch_v7m; + case ARM::ArchKind::ARMV7S: + return Triple::ARMSubArch_v7s; + case ARM::ArchKind::ARMV7EM: + return Triple::ARMSubArch_v7em; + case ARM::ArchKind::ARMV8A: + return Triple::ARMSubArch_v8; + case ARM::ArchKind::ARMV8_1A: + return Triple::ARMSubArch_v8_1a; + case ARM::ArchKind::ARMV8_2A: + return Triple::ARMSubArch_v8_2a; + case ARM::ArchKind::ARMV8_3A: + return Triple::ARMSubArch_v8_3a; + case ARM::ArchKind::ARMV8_4A: + return Triple::ARMSubArch_v8_4a; + case ARM::ArchKind::ARMV8_5A: + return Triple::ARMSubArch_v8_5a; + case ARM::ArchKind::ARMV8R: + return Triple::ARMSubArch_v8r; + case ARM::ArchKind::ARMV8MBaseline: + return Triple::ARMSubArch_v8m_baseline; + case ARM::ArchKind::ARMV8MMainline: + return Triple::ARMSubArch_v8m_mainline; + case ARM::ArchKind::ARMV8_1MMainline: + return Triple::ARMSubArch_v8_1m_mainline; + default: + return Triple::NoSubArch; + } +} + +static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) { + switch (Kind) { + case Triple::UnknownObjectFormat: return ""; + case Triple::COFF: return "coff"; + case Triple::ELF: return "elf"; + case Triple::MachO: return "macho"; + case Triple::Wasm: return "wasm"; + case Triple::XCOFF: return "xcoff"; + } + llvm_unreachable("unknown object format type"); +} + +static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { + switch (T.getArch()) { + case Triple::UnknownArch: + case Triple::aarch64: + case Triple::aarch64_32: + case Triple::arm: + case Triple::thumb: + case Triple::x86: + case Triple::x86_64: + if (T.isOSDarwin()) + return Triple::MachO; + else if (T.isOSWindows()) + return Triple::COFF; + return Triple::ELF; + + case Triple::aarch64_be: + case Triple::arc: + case Triple::amdgcn: + case Triple::amdil: + case Triple::amdil64: + case Triple::armeb: + case Triple::avr: + case Triple::bpfeb: + case Triple::bpfel: + case Triple::hexagon: + case Triple::lanai: + case Triple::hsail: + case Triple::hsail64: + case Triple::kalimba: + case Triple::le32: + case Triple::le64: + case Triple::mips: + case Triple::mips64: + case Triple::mips64el: + case Triple::mipsel: + case Triple::msp430: + case Triple::nvptx: + case Triple::nvptx64: + case Triple::ppc64le: + case Triple::r600: + case Triple::renderscript32: + case Triple::renderscript64: + case Triple::riscv32: + case Triple::riscv64: + case Triple::shave: + case Triple::sparc: + case Triple::sparcel: + case Triple::sparcv9: + case Triple::spir: + case Triple::spir64: + case Triple::systemz: + case Triple::tce: + case Triple::tcele: + case Triple::thumbeb: + case Triple::xcore: + return Triple::ELF; + + case Triple::ppc: + case Triple::ppc64: + if (T.isOSDarwin()) + return Triple::MachO; + else if (T.isOSAIX()) + return Triple::XCOFF; + return Triple::ELF; + + case Triple::wasm32: + case Triple::wasm64: + return Triple::Wasm; + } + llvm_unreachable("unknown architecture"); +} + +/// Construct a triple from the string representation provided. +/// +/// This stores the string representation and parses the various pieces into +/// enum members. +Triple::Triple(const Twine &Str) + : Data(Str.str()), Arch(UnknownArch), SubArch(NoSubArch), + Vendor(UnknownVendor), OS(UnknownOS), Environment(UnknownEnvironment), + ObjectFormat(UnknownObjectFormat) { + // Do minimal parsing by hand here. + SmallVector<StringRef, 4> Components; + StringRef(Data).split(Components, '-', /*MaxSplit*/ 3); + if (Components.size() > 0) { + Arch = parseArch(Components[0]); + SubArch = parseSubArch(Components[0]); + if (Components.size() > 1) { + Vendor = parseVendor(Components[1]); + if (Components.size() > 2) { + OS = parseOS(Components[2]); + if (Components.size() > 3) { + Environment = parseEnvironment(Components[3]); + ObjectFormat = parseFormat(Components[3]); + } + } + } else { + Environment = + StringSwitch<Triple::EnvironmentType>(Components[0]) + .StartsWith("mipsn32", Triple::GNUABIN32) + .StartsWith("mips64", Triple::GNUABI64) + .StartsWith("mipsisa64", Triple::GNUABI64) + .StartsWith("mipsisa32", Triple::GNU) + .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU) + .Default(UnknownEnvironment); + } + } + if (ObjectFormat == UnknownObjectFormat) + ObjectFormat = getDefaultFormat(*this); +} + +/// Construct a triple from string representations of the architecture, +/// vendor, and OS. +/// +/// This joins each argument into a canonical string representation and parses +/// them into enum members. It leaves the environment unknown and omits it from +/// the string representation. +Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr) + : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()), + Arch(parseArch(ArchStr.str())), + SubArch(parseSubArch(ArchStr.str())), + Vendor(parseVendor(VendorStr.str())), + OS(parseOS(OSStr.str())), + Environment(), ObjectFormat(Triple::UnknownObjectFormat) { + ObjectFormat = getDefaultFormat(*this); +} + +/// Construct a triple from string representations of the architecture, +/// vendor, OS, and environment. +/// +/// This joins each argument into a canonical string representation and parses +/// them into enum members. +Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr, + const Twine &EnvironmentStr) + : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') + + EnvironmentStr).str()), + Arch(parseArch(ArchStr.str())), + SubArch(parseSubArch(ArchStr.str())), + Vendor(parseVendor(VendorStr.str())), + OS(parseOS(OSStr.str())), + Environment(parseEnvironment(EnvironmentStr.str())), + ObjectFormat(parseFormat(EnvironmentStr.str())) { + if (ObjectFormat == Triple::UnknownObjectFormat) + ObjectFormat = getDefaultFormat(*this); +} + +std::string Triple::normalize(StringRef Str) { + bool IsMinGW32 = false; + bool IsCygwin = false; + + // Parse into components. + SmallVector<StringRef, 4> Components; + Str.split(Components, '-'); + + // If the first component corresponds to a known architecture, preferentially + // use it for the architecture. If the second component corresponds to a + // known vendor, preferentially use it for the vendor, etc. This avoids silly + // component movement when a component parses as (eg) both a valid arch and a + // valid os. + ArchType Arch = UnknownArch; + if (Components.size() > 0) + Arch = parseArch(Components[0]); + VendorType Vendor = UnknownVendor; + if (Components.size() > 1) + Vendor = parseVendor(Components[1]); + OSType OS = UnknownOS; + if (Components.size() > 2) { + OS = parseOS(Components[2]); + IsCygwin = Components[2].startswith("cygwin"); + IsMinGW32 = Components[2].startswith("mingw"); + } + EnvironmentType Environment = UnknownEnvironment; + if (Components.size() > 3) + Environment = parseEnvironment(Components[3]); + ObjectFormatType ObjectFormat = UnknownObjectFormat; + if (Components.size() > 4) + ObjectFormat = parseFormat(Components[4]); + + // Note which components are already in their final position. These will not + // be moved. + bool Found[4]; + Found[0] = Arch != UnknownArch; + Found[1] = Vendor != UnknownVendor; + Found[2] = OS != UnknownOS; + Found[3] = Environment != UnknownEnvironment; + + // If they are not there already, permute the components into their canonical + // positions by seeing if they parse as a valid architecture, and if so moving + // the component to the architecture position etc. + for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) { + if (Found[Pos]) + continue; // Already in the canonical position. + + for (unsigned Idx = 0; Idx != Components.size(); ++Idx) { + // Do not reparse any components that already matched. + if (Idx < array_lengthof(Found) && Found[Idx]) + continue; + + // Does this component parse as valid for the target position? + bool Valid = false; + StringRef Comp = Components[Idx]; + switch (Pos) { + default: llvm_unreachable("unexpected component type!"); + case 0: + Arch = parseArch(Comp); + Valid = Arch != UnknownArch; + break; + case 1: + Vendor = parseVendor(Comp); + Valid = Vendor != UnknownVendor; + break; + case 2: + OS = parseOS(Comp); + IsCygwin = Comp.startswith("cygwin"); + IsMinGW32 = Comp.startswith("mingw"); + Valid = OS != UnknownOS || IsCygwin || IsMinGW32; + break; + case 3: + Environment = parseEnvironment(Comp); + Valid = Environment != UnknownEnvironment; + if (!Valid) { + ObjectFormat = parseFormat(Comp); + Valid = ObjectFormat != UnknownObjectFormat; + } + break; + } + if (!Valid) + continue; // Nope, try the next component. + + // Move the component to the target position, pushing any non-fixed + // components that are in the way to the right. This tends to give + // good results in the common cases of a forgotten vendor component + // or a wrongly positioned environment. + if (Pos < Idx) { + // Insert left, pushing the existing components to the right. For + // example, a-b-i386 -> i386-a-b when moving i386 to the front. + StringRef CurrentComponent(""); // The empty component. + // Replace the component we are moving with an empty component. + std::swap(CurrentComponent, Components[Idx]); + // Insert the component being moved at Pos, displacing any existing + // components to the right. + for (unsigned i = Pos; !CurrentComponent.empty(); ++i) { + // Skip over any fixed components. + while (i < array_lengthof(Found) && Found[i]) + ++i; + // Place the component at the new position, getting the component + // that was at this position - it will be moved right. + std::swap(CurrentComponent, Components[i]); + } + } else if (Pos > Idx) { + // Push right by inserting empty components until the component at Idx + // reaches the target position Pos. For example, pc-a -> -pc-a when + // moving pc to the second position. + do { + // Insert one empty component at Idx. + StringRef CurrentComponent(""); // The empty component. + for (unsigned i = Idx; i < Components.size();) { + // Place the component at the new position, getting the component + // that was at this position - it will be moved right. + std::swap(CurrentComponent, Components[i]); + // If it was placed on top of an empty component then we are done. + if (CurrentComponent.empty()) + break; + // Advance to the next component, skipping any fixed components. + while (++i < array_lengthof(Found) && Found[i]) + ; + } + // The last component was pushed off the end - append it. + if (!CurrentComponent.empty()) + Components.push_back(CurrentComponent); + + // Advance Idx to the component's new position. + while (++Idx < array_lengthof(Found) && Found[Idx]) + ; + } while (Idx < Pos); // Add more until the final position is reached. + } + assert(Pos < Components.size() && Components[Pos] == Comp && + "Component moved wrong!"); + Found[Pos] = true; + break; + } + } + + // Replace empty components with "unknown" value. + for (unsigned i = 0, e = Components.size(); i < e; ++i) { + if (Components[i].empty()) + Components[i] = "unknown"; + } + + // Special case logic goes here. At this point Arch, Vendor and OS have the + // correct values for the computed components. + std::string NormalizedEnvironment; + if (Environment == Triple::Android && Components[3].startswith("androideabi")) { + StringRef AndroidVersion = Components[3].drop_front(strlen("androideabi")); + if (AndroidVersion.empty()) { + Components[3] = "android"; + } else { + NormalizedEnvironment = Twine("android", AndroidVersion).str(); + Components[3] = NormalizedEnvironment; + } + } + + // SUSE uses "gnueabi" to mean "gnueabihf" + if (Vendor == Triple::SUSE && Environment == llvm::Triple::GNUEABI) + Components[3] = "gnueabihf"; + + if (OS == Triple::Win32) { + Components.resize(4); + Components[2] = "windows"; + if (Environment == UnknownEnvironment) { + if (ObjectFormat == UnknownObjectFormat || ObjectFormat == Triple::COFF) + Components[3] = "msvc"; + else + Components[3] = getObjectFormatTypeName(ObjectFormat); + } + } else if (IsMinGW32) { + Components.resize(4); + Components[2] = "windows"; + Components[3] = "gnu"; + } else if (IsCygwin) { + Components.resize(4); + Components[2] = "windows"; + Components[3] = "cygnus"; + } + if (IsMinGW32 || IsCygwin || + (OS == Triple::Win32 && Environment != UnknownEnvironment)) { + if (ObjectFormat != UnknownObjectFormat && ObjectFormat != Triple::COFF) { + Components.resize(5); + Components[4] = getObjectFormatTypeName(ObjectFormat); + } + } + + // Stick the corrected components back together to form the normalized string. + std::string Normalized; + for (unsigned i = 0, e = Components.size(); i != e; ++i) { + if (i) Normalized += '-'; + Normalized += Components[i]; + } + return Normalized; +} + +StringRef Triple::getArchName() const { + return StringRef(Data).split('-').first; // Isolate first component +} + +StringRef Triple::getVendorName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + return Tmp.split('-').first; // Isolate second component +} + +StringRef Triple::getOSName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + Tmp = Tmp.split('-').second; // Strip second component + return Tmp.split('-').first; // Isolate third component +} + +StringRef Triple::getEnvironmentName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + Tmp = Tmp.split('-').second; // Strip second component + return Tmp.split('-').second; // Strip third component +} + +StringRef Triple::getOSAndEnvironmentName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + return Tmp.split('-').second; // Strip second component +} + +static unsigned EatNumber(StringRef &Str) { + assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number"); + unsigned Result = 0; + + do { + // Consume the leading digit. + Result = Result*10 + (Str[0] - '0'); + + // Eat the digit. + Str = Str.substr(1); + } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9'); + + return Result; +} + +static void parseVersionFromName(StringRef Name, unsigned &Major, + unsigned &Minor, unsigned &Micro) { + // Any unset version defaults to 0. + Major = Minor = Micro = 0; + + // Parse up to three components. + unsigned *Components[3] = {&Major, &Minor, &Micro}; + for (unsigned i = 0; i != 3; ++i) { + if (Name.empty() || Name[0] < '0' || Name[0] > '9') + break; + + // Consume the leading number. + *Components[i] = EatNumber(Name); + + // Consume the separator, if present. + if (Name.startswith(".")) + Name = Name.substr(1); + } +} + +void Triple::getEnvironmentVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + StringRef EnvironmentName = getEnvironmentName(); + StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment()); + if (EnvironmentName.startswith(EnvironmentTypeName)) + EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size()); + + parseVersionFromName(EnvironmentName, Major, Minor, Micro); +} + +void Triple::getOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + StringRef OSName = getOSName(); + // Assume that the OS portion of the triple starts with the canonical name. + StringRef OSTypeName = getOSTypeName(getOS()); + if (OSName.startswith(OSTypeName)) + OSName = OSName.substr(OSTypeName.size()); + else if (getOS() == MacOSX) + OSName.consume_front("macos"); + + parseVersionFromName(OSName, Major, Minor, Micro); +} + +bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + getOSVersion(Major, Minor, Micro); + + switch (getOS()) { + default: llvm_unreachable("unexpected OS for Darwin triple"); + case Darwin: + // Default to darwin8, i.e., MacOSX 10.4. + if (Major == 0) + Major = 8; + // Darwin version numbers are skewed from OS X versions. + if (Major < 4) + return false; + Micro = 0; + Minor = Major - 4; + Major = 10; + break; + case MacOSX: + // Default to 10.4. + if (Major == 0) { + Major = 10; + Minor = 4; + } + if (Major != 10) + return false; + break; + case IOS: + case TvOS: + case WatchOS: + // Ignore the version from the triple. This is only handled because the + // the clang driver combines OS X and IOS support into a common Darwin + // toolchain that wants to know the OS X version number even when targeting + // IOS. + Major = 10; + Minor = 4; + Micro = 0; + break; + } + return true; +} + +void Triple::getiOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + switch (getOS()) { + default: llvm_unreachable("unexpected OS for Darwin triple"); + case Darwin: + case MacOSX: + // Ignore the version from the triple. This is only handled because the + // the clang driver combines OS X and IOS support into a common Darwin + // toolchain that wants to know the iOS version number even when targeting + // OS X. + Major = 5; + Minor = 0; + Micro = 0; + break; + case IOS: + case TvOS: + getOSVersion(Major, Minor, Micro); + // Default to 5.0 (or 7.0 for arm64). + if (Major == 0) + Major = (getArch() == aarch64) ? 7 : 5; + break; + case WatchOS: + llvm_unreachable("conflicting triple info"); + } +} + +void Triple::getWatchOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + switch (getOS()) { + default: llvm_unreachable("unexpected OS for Darwin triple"); + case Darwin: + case MacOSX: + // Ignore the version from the triple. This is only handled because the + // the clang driver combines OS X and IOS support into a common Darwin + // toolchain that wants to know the iOS version number even when targeting + // OS X. + Major = 2; + Minor = 0; + Micro = 0; + break; + case WatchOS: + getOSVersion(Major, Minor, Micro); + if (Major == 0) + Major = 2; + break; + case IOS: + llvm_unreachable("conflicting triple info"); + } +} + +void Triple::setTriple(const Twine &Str) { + *this = Triple(Str); +} + +void Triple::setArch(ArchType Kind) { + setArchName(getArchTypeName(Kind)); +} + +void Triple::setVendor(VendorType Kind) { + setVendorName(getVendorTypeName(Kind)); +} + +void Triple::setOS(OSType Kind) { + setOSName(getOSTypeName(Kind)); +} + +void Triple::setEnvironment(EnvironmentType Kind) { + if (ObjectFormat == getDefaultFormat(*this)) + return setEnvironmentName(getEnvironmentTypeName(Kind)); + + setEnvironmentName((getEnvironmentTypeName(Kind) + Twine("-") + + getObjectFormatTypeName(ObjectFormat)).str()); +} + +void Triple::setObjectFormat(ObjectFormatType Kind) { + if (Environment == UnknownEnvironment) + return setEnvironmentName(getObjectFormatTypeName(Kind)); + + setEnvironmentName((getEnvironmentTypeName(Environment) + Twine("-") + + getObjectFormatTypeName(Kind)).str()); +} + +void Triple::setArchName(StringRef Str) { + // Work around a miscompilation bug for Twines in gcc 4.0.3. + SmallString<64> Triple; + Triple += Str; + Triple += "-"; + Triple += getVendorName(); + Triple += "-"; + Triple += getOSAndEnvironmentName(); + setTriple(Triple); +} + +void Triple::setVendorName(StringRef Str) { + setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName()); +} + +void Triple::setOSName(StringRef Str) { + if (hasEnvironment()) + setTriple(getArchName() + "-" + getVendorName() + "-" + Str + + "-" + getEnvironmentName()); + else + setTriple(getArchName() + "-" + getVendorName() + "-" + Str); +} + +void Triple::setEnvironmentName(StringRef Str) { + setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + + "-" + Str); +} + +void Triple::setOSAndEnvironmentName(StringRef Str) { + setTriple(getArchName() + "-" + getVendorName() + "-" + Str); +} + +static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { + switch (Arch) { + case llvm::Triple::UnknownArch: + return 0; + + case llvm::Triple::avr: + case llvm::Triple::msp430: + return 16; + + case llvm::Triple::aarch64_32: + case llvm::Triple::arc: + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::hexagon: + case llvm::Triple::le32: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::nvptx: + case llvm::Triple::ppc: + case llvm::Triple::r600: + case llvm::Triple::riscv32: + case llvm::Triple::sparc: + case llvm::Triple::sparcel: + case llvm::Triple::tce: + case llvm::Triple::tcele: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::x86: + case llvm::Triple::xcore: + case llvm::Triple::amdil: + case llvm::Triple::hsail: + case llvm::Triple::spir: + case llvm::Triple::kalimba: + case llvm::Triple::lanai: + case llvm::Triple::shave: + case llvm::Triple::wasm32: + case llvm::Triple::renderscript32: + return 32; + + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + case llvm::Triple::amdgcn: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: + case llvm::Triple::le64: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::nvptx64: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::riscv64: + case llvm::Triple::sparcv9: + case llvm::Triple::systemz: + case llvm::Triple::x86_64: + case llvm::Triple::amdil64: + case llvm::Triple::hsail64: + case llvm::Triple::spir64: + case llvm::Triple::wasm64: + case llvm::Triple::renderscript64: + return 64; + } + llvm_unreachable("Invalid architecture value"); +} + +bool Triple::isArch64Bit() const { + return getArchPointerBitWidth(getArch()) == 64; +} + +bool Triple::isArch32Bit() const { + return getArchPointerBitWidth(getArch()) == 32; +} + +bool Triple::isArch16Bit() const { + return getArchPointerBitWidth(getArch()) == 16; +} + +Triple Triple::get32BitArchVariant() const { + Triple T(*this); + switch (getArch()) { + case Triple::UnknownArch: + case Triple::amdgcn: + case Triple::avr: + case Triple::bpfel: + case Triple::bpfeb: + case Triple::msp430: + case Triple::systemz: + case Triple::ppc64le: + T.setArch(UnknownArch); + break; + + case Triple::aarch64_32: + case Triple::amdil: + case Triple::hsail: + case Triple::spir: + case Triple::arc: + case Triple::arm: + case Triple::armeb: + case Triple::hexagon: + case Triple::kalimba: + case Triple::le32: + case Triple::mips: + case Triple::mipsel: + case Triple::nvptx: + case Triple::ppc: + case Triple::r600: + case Triple::riscv32: + case Triple::sparc: + case Triple::sparcel: + case Triple::tce: + case Triple::tcele: + case Triple::thumb: + case Triple::thumbeb: + case Triple::x86: + case Triple::xcore: + case Triple::lanai: + case Triple::shave: + case Triple::wasm32: + case Triple::renderscript32: + // Already 32-bit. + break; + + case Triple::aarch64: T.setArch(Triple::arm); break; + case Triple::aarch64_be: T.setArch(Triple::armeb); break; + case Triple::le64: T.setArch(Triple::le32); break; + case Triple::mips64: T.setArch(Triple::mips); break; + case Triple::mips64el: T.setArch(Triple::mipsel); break; + case Triple::nvptx64: T.setArch(Triple::nvptx); break; + case Triple::ppc64: T.setArch(Triple::ppc); break; + case Triple::sparcv9: T.setArch(Triple::sparc); break; + case Triple::riscv64: T.setArch(Triple::riscv32); break; + case Triple::x86_64: T.setArch(Triple::x86); break; + case Triple::amdil64: T.setArch(Triple::amdil); break; + case Triple::hsail64: T.setArch(Triple::hsail); break; + case Triple::spir64: T.setArch(Triple::spir); break; + case Triple::wasm64: T.setArch(Triple::wasm32); break; + case Triple::renderscript64: T.setArch(Triple::renderscript32); break; + } + return T; +} + +Triple Triple::get64BitArchVariant() const { + Triple T(*this); + switch (getArch()) { + case Triple::UnknownArch: + case Triple::arc: + case Triple::avr: + case Triple::hexagon: + case Triple::kalimba: + case Triple::lanai: + case Triple::msp430: + case Triple::r600: + case Triple::tce: + case Triple::tcele: + case Triple::xcore: + case Triple::sparcel: + case Triple::shave: + T.setArch(UnknownArch); + break; + + case Triple::aarch64: + case Triple::aarch64_be: + case Triple::bpfel: + case Triple::bpfeb: + case Triple::le64: + case Triple::amdil64: + case Triple::amdgcn: + case Triple::hsail64: + case Triple::spir64: + case Triple::mips64: + case Triple::mips64el: + case Triple::nvptx64: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::riscv64: + case Triple::sparcv9: + case Triple::systemz: + case Triple::x86_64: + case Triple::wasm64: + case Triple::renderscript64: + // Already 64-bit. + break; + + case Triple::aarch64_32: T.setArch(Triple::aarch64); break; + case Triple::arm: T.setArch(Triple::aarch64); break; + case Triple::armeb: T.setArch(Triple::aarch64_be); break; + case Triple::le32: T.setArch(Triple::le64); break; + case Triple::mips: T.setArch(Triple::mips64); break; + case Triple::mipsel: T.setArch(Triple::mips64el); break; + case Triple::nvptx: T.setArch(Triple::nvptx64); break; + case Triple::ppc: T.setArch(Triple::ppc64); break; + case Triple::sparc: T.setArch(Triple::sparcv9); break; + case Triple::riscv32: T.setArch(Triple::riscv64); break; + case Triple::x86: T.setArch(Triple::x86_64); break; + case Triple::amdil: T.setArch(Triple::amdil64); break; + case Triple::hsail: T.setArch(Triple::hsail64); break; + case Triple::spir: T.setArch(Triple::spir64); break; + case Triple::thumb: T.setArch(Triple::aarch64); break; + case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; + case Triple::wasm32: T.setArch(Triple::wasm64); break; + case Triple::renderscript32: T.setArch(Triple::renderscript64); break; + } + return T; +} + +Triple Triple::getBigEndianArchVariant() const { + Triple T(*this); + // Already big endian. + if (!isLittleEndian()) + return T; + switch (getArch()) { + case Triple::UnknownArch: + case Triple::amdgcn: + case Triple::amdil64: + case Triple::amdil: + case Triple::avr: + case Triple::hexagon: + case Triple::hsail64: + case Triple::hsail: + case Triple::kalimba: + case Triple::le32: + case Triple::le64: + case Triple::msp430: + case Triple::nvptx64: + case Triple::nvptx: + case Triple::r600: + case Triple::riscv32: + case Triple::riscv64: + case Triple::shave: + case Triple::spir64: + case Triple::spir: + case Triple::wasm32: + case Triple::wasm64: + case Triple::x86: + case Triple::x86_64: + case Triple::xcore: + case Triple::renderscript32: + case Triple::renderscript64: + + // ARM is intentionally unsupported here, changing the architecture would + // drop any arch suffixes. + case Triple::arm: + case Triple::thumb: + T.setArch(UnknownArch); + break; + + case Triple::tcele: T.setArch(Triple::tce); break; + case Triple::aarch64: T.setArch(Triple::aarch64_be); break; + case Triple::bpfel: T.setArch(Triple::bpfeb); break; + case Triple::mips64el:T.setArch(Triple::mips64); break; + case Triple::mipsel: T.setArch(Triple::mips); break; + case Triple::ppc64le: T.setArch(Triple::ppc64); break; + case Triple::sparcel: T.setArch(Triple::sparc); break; + default: + llvm_unreachable("getBigEndianArchVariant: unknown triple."); + } + return T; +} + +Triple Triple::getLittleEndianArchVariant() const { + Triple T(*this); + if (isLittleEndian()) + return T; + + switch (getArch()) { + case Triple::UnknownArch: + case Triple::lanai: + case Triple::ppc: + case Triple::sparcv9: + case Triple::systemz: + + // ARM is intentionally unsupported here, changing the architecture would + // drop any arch suffixes. + case Triple::armeb: + case Triple::thumbeb: + T.setArch(UnknownArch); + break; + + case Triple::tce: T.setArch(Triple::tcele); break; + case Triple::aarch64_be: T.setArch(Triple::aarch64); break; + case Triple::bpfeb: T.setArch(Triple::bpfel); break; + case Triple::mips64: T.setArch(Triple::mips64el); break; + case Triple::mips: T.setArch(Triple::mipsel); break; + case Triple::ppc64: T.setArch(Triple::ppc64le); break; + case Triple::sparc: T.setArch(Triple::sparcel); break; + default: + llvm_unreachable("getLittleEndianArchVariant: unknown triple."); + } + return T; +} + +bool Triple::isLittleEndian() const { + switch (getArch()) { + case Triple::aarch64: + case Triple::aarch64_32: + case Triple::amdgcn: + case Triple::amdil64: + case Triple::amdil: + case Triple::arm: + case Triple::avr: + case Triple::bpfel: + case Triple::hexagon: + case Triple::hsail64: + case Triple::hsail: + case Triple::kalimba: + case Triple::le32: + case Triple::le64: + case Triple::mips64el: + case Triple::mipsel: + case Triple::msp430: + case Triple::nvptx64: + case Triple::nvptx: + case Triple::ppc64le: + case Triple::r600: + case Triple::riscv32: + case Triple::riscv64: + case Triple::shave: + case Triple::sparcel: + case Triple::spir64: + case Triple::spir: + case Triple::thumb: + case Triple::wasm32: + case Triple::wasm64: + case Triple::x86: + case Triple::x86_64: + case Triple::xcore: + case Triple::tcele: + case Triple::renderscript32: + case Triple::renderscript64: + return true; + default: + return false; + } +} + +bool Triple::isCompatibleWith(const Triple &Other) const { + // ARM and Thumb triples are compatible, if subarch, vendor and OS match. + if ((getArch() == Triple::thumb && Other.getArch() == Triple::arm) || + (getArch() == Triple::arm && Other.getArch() == Triple::thumb) || + (getArch() == Triple::thumbeb && Other.getArch() == Triple::armeb) || + (getArch() == Triple::armeb && Other.getArch() == Triple::thumbeb)) { + if (getVendor() == Triple::Apple) + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + else + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS() && + getEnvironment() == Other.getEnvironment() && + getObjectFormat() == Other.getObjectFormat(); + } + + // If vendor is apple, ignore the version number. + if (getVendor() == Triple::Apple) + return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + + return *this == Other; +} + +std::string Triple::merge(const Triple &Other) const { + // If vendor is apple, pick the triple with the larger version number. + if (getVendor() == Triple::Apple) + if (Other.isOSVersionLT(*this)) + return str(); + + return Other.str(); +} + +StringRef Triple::getARMCPUForArch(StringRef MArch) const { + if (MArch.empty()) + MArch = getArchName(); + MArch = ARM::getCanonicalArchName(MArch); + + // Some defaults are forced. + switch (getOS()) { + case llvm::Triple::FreeBSD: + case llvm::Triple::NetBSD: + if (!MArch.empty() && MArch == "v6") + return "arm1176jzf-s"; + break; + case llvm::Triple::Win32: + // FIXME: this is invalid for WindowsCE + return "cortex-a9"; + case llvm::Triple::MacOSX: + case llvm::Triple::IOS: + case llvm::Triple::WatchOS: + case llvm::Triple::TvOS: + if (MArch == "v7k") + return "cortex-a7"; + break; + default: + break; + } + + if (MArch.empty()) + return StringRef(); + + StringRef CPU = ARM::getDefaultCPU(MArch); + if (!CPU.empty() && !CPU.equals("invalid")) + return CPU; + + // If no specific architecture version is requested, return the minimum CPU + // required by the OS and environment. + switch (getOS()) { + case llvm::Triple::NetBSD: + switch (getEnvironment()) { + case llvm::Triple::GNUEABIHF: + case llvm::Triple::GNUEABI: + case llvm::Triple::EABIHF: + case llvm::Triple::EABI: + return "arm926ej-s"; + default: + return "strongarm"; + } + case llvm::Triple::NaCl: + case llvm::Triple::OpenBSD: + return "cortex-a8"; + default: + switch (getEnvironment()) { + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABIHF: + return "arm1176jzf-s"; + default: + return "arm7tdmi"; + } + } + + llvm_unreachable("invalid arch name"); +} diff --git a/llvm/lib/Support/Twine.cpp b/llvm/lib/Support/Twine.cpp new file mode 100644 index 0000000000000..fbbcd8848f1cd --- /dev/null +++ b/llvm/lib/Support/Twine.cpp @@ -0,0 +1,184 @@ +//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +std::string Twine::str() const { + // If we're storing only a std::string, just return it. + if (LHSKind == StdStringKind && RHSKind == EmptyKind) + return *LHS.stdString; + + // If we're storing a formatv_object, we can avoid an extra copy by formatting + // it immediately and returning the result. + if (LHSKind == FormatvObjectKind && RHSKind == EmptyKind) + return LHS.formatvObject->str(); + + // Otherwise, flatten and copy the contents first. + SmallString<256> Vec; + return toStringRef(Vec).str(); +} + +void Twine::toVector(SmallVectorImpl<char> &Out) const { + raw_svector_ostream OS(Out); + print(OS); +} + +StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const { + if (isUnary()) { + switch (getLHSKind()) { + case CStringKind: + // Already null terminated, yay! + return StringRef(LHS.cString); + case StdStringKind: { + const std::string *str = LHS.stdString; + return StringRef(str->c_str(), str->size()); + } + default: + break; + } + } + toVector(Out); + Out.push_back(0); + Out.pop_back(); + return StringRef(Out.data(), Out.size()); +} + +void Twine::printOneChild(raw_ostream &OS, Child Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: break; + case Twine::EmptyKind: break; + case Twine::TwineKind: + Ptr.twine->print(OS); + break; + case Twine::CStringKind: + OS << Ptr.cString; + break; + case Twine::StdStringKind: + OS << *Ptr.stdString; + break; + case Twine::StringRefKind: + OS << *Ptr.stringRef; + break; + case Twine::SmallStringKind: + OS << *Ptr.smallString; + break; + case Twine::FormatvObjectKind: + OS << *Ptr.formatvObject; + break; + case Twine::CharKind: + OS << Ptr.character; + break; + case Twine::DecUIKind: + OS << Ptr.decUI; + break; + case Twine::DecIKind: + OS << Ptr.decI; + break; + case Twine::DecULKind: + OS << *Ptr.decUL; + break; + case Twine::DecLKind: + OS << *Ptr.decL; + break; + case Twine::DecULLKind: + OS << *Ptr.decULL; + break; + case Twine::DecLLKind: + OS << *Ptr.decLL; + break; + case Twine::UHexKind: + OS.write_hex(*Ptr.uHex); + break; + } +} + +void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: + OS << "null"; break; + case Twine::EmptyKind: + OS << "empty"; break; + case Twine::TwineKind: + OS << "rope:"; + Ptr.twine->printRepr(OS); + break; + case Twine::CStringKind: + OS << "cstring:\"" + << Ptr.cString << "\""; + break; + case Twine::StdStringKind: + OS << "std::string:\"" + << Ptr.stdString << "\""; + break; + case Twine::StringRefKind: + OS << "stringref:\"" + << Ptr.stringRef << "\""; + break; + case Twine::SmallStringKind: + OS << "smallstring:\"" << *Ptr.smallString << "\""; + break; + case Twine::FormatvObjectKind: + OS << "formatv:\"" << *Ptr.formatvObject << "\""; + break; + case Twine::CharKind: + OS << "char:\"" << Ptr.character << "\""; + break; + case Twine::DecUIKind: + OS << "decUI:\"" << Ptr.decUI << "\""; + break; + case Twine::DecIKind: + OS << "decI:\"" << Ptr.decI << "\""; + break; + case Twine::DecULKind: + OS << "decUL:\"" << *Ptr.decUL << "\""; + break; + case Twine::DecLKind: + OS << "decL:\"" << *Ptr.decL << "\""; + break; + case Twine::DecULLKind: + OS << "decULL:\"" << *Ptr.decULL << "\""; + break; + case Twine::DecLLKind: + OS << "decLL:\"" << *Ptr.decLL << "\""; + break; + case Twine::UHexKind: + OS << "uhex:\"" << Ptr.uHex << "\""; + break; + } +} + +void Twine::print(raw_ostream &OS) const { + printOneChild(OS, LHS, getLHSKind()); + printOneChild(OS, RHS, getRHSKind()); +} + +void Twine::printRepr(raw_ostream &OS) const { + OS << "(Twine "; + printOneChildRepr(OS, LHS, getLHSKind()); + OS << " "; + printOneChildRepr(OS, RHS, getRHSKind()); + OS << ")"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void Twine::dump() const { + print(dbgs()); +} + +LLVM_DUMP_METHOD void Twine::dumpRepr() const { + printRepr(dbgs()); +} +#endif diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp new file mode 100644 index 0000000000000..4d195069682bb --- /dev/null +++ b/llvm/lib/Support/Unicode.cpp @@ -0,0 +1,366 @@ +//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements functions that allow querying certain properties of +// Unicode characters. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Unicode.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/UnicodeCharRanges.h" + +namespace llvm { +namespace sys { +namespace unicode { + +bool isPrintable(int UCS) { + // Sorted list of non-overlapping intervals of code points that are not + // supposed to be printable. + static const UnicodeCharRange NonPrintableRanges[] = { + { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F }, + { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B }, + { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 }, + { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 }, + { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF }, + { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D }, + { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C }, + { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F }, + { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F }, + { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF }, + { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 }, + { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 }, + { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB }, + { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 }, + { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 }, + { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E }, + { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 }, + { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B }, + { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A }, + { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D }, + { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 }, + { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 }, + { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB }, + { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF }, + { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 }, + { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 }, + { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 }, + { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A }, + { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E }, + { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 }, + { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 }, + { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 }, + { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD }, + { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF }, + { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 }, + { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 }, + { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C }, + { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 }, + { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 }, + { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 }, + { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 }, + { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 }, + { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD }, + { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 }, + { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D }, + { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 }, + { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F }, + { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 }, + { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 }, + { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 }, + { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 }, + { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E }, + { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 }, + { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 }, + { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 }, + { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC }, + { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 }, + { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB }, + { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 }, + { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD }, + { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC }, + { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 }, + { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 }, + { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F }, + { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF }, + { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 }, + { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C }, + { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF }, + { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D }, + { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F }, + { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F }, + { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF }, + { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F }, + { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF }, + { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F }, + { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F }, + { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF }, + { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F }, + { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F }, + { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F }, + { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C }, + { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF }, + { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F }, + { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 }, + { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E }, + { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 }, + { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 }, + { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F }, + { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 }, + { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF }, + { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF }, + { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 }, + { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F }, + { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 }, + { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E }, + { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 }, + { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF }, + { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 }, + { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A }, + { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF }, + { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 }, + { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F }, + { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F }, + { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF }, + { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F }, + { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F }, + { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F }, + { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD }, + { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E }, + { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD }, + { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F }, + { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA }, + { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 }, + { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF }, + { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF }, + { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F }, + { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C }, + { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F }, + { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 }, + { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF }, + { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F }, + { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F }, + { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 }, + { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 }, + { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF }, + { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF }, + { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B }, + { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F }, + { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 }, + { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F }, + { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F }, + { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E }, + { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F }, + { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 }, + { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E }, + { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E }, + { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD }, + { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B }, + { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 }, + { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F }, + { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 }, + { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F }, + { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F }, + { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF }, + { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F }, + { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF }, + { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F }, + { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF }, + { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF }, + { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 }, + { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF }, + { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 }, + { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 }, + { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA }, + { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 }, + { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D }, + { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 }, + { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 }, + { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 }, + { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 }, + { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 }, + { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 }, + { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C }, + { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 }, + { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C }, + { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 }, + { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 }, + { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F }, + { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 }, + { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF }, + { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 }, + { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF }, + { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F }, + { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F }, + { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F }, + { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F }, + { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF }, + { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 }, + { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F }, + { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F }, + { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF }, + { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 }, + { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF } + }; + static const UnicodeCharSet NonPrintables(NonPrintableRanges); + + return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS); +} + +/// Gets the number of positions a character is likely to occupy when output +/// on a terminal ("character width"). This depends on the implementation of the +/// terminal, and there's no standard definition of character width. +/// The implementation defines it in a way that is expected to be compatible +/// with a generic Unicode-capable terminal. +/// \return Character width: +/// * ErrorNonPrintableCharacter (-1) for non-printable characters (as +/// identified by isPrintable); +/// * 0 for non-spacing and enclosing combining marks; +/// * 2 for CJK characters excluding halfwidth forms; +/// * 1 for all remaining characters. +static inline int charWidth(int UCS) +{ + if (!isPrintable(UCS)) + return ErrorNonPrintableCharacter; + + // Sorted list of non-spacing and enclosing combining mark intervals as + // defined in "3.6 Combination" of + // http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf + static const UnicodeCharRange CombiningCharacterRanges[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, + { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, + { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, { 0x064B, 0x065F }, + { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, + { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, + { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, + { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, + { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08E4, 0x08FE }, + { 0x0900, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C }, + { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 }, + { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, + { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, + { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, + { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, + { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, + { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, + { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, + { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, + { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 }, + { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, + { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, + { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, + { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, + { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, + { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, + { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, + { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, + { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, + { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, + { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, + { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, + { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 }, + { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, + { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, + { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB }, + { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED }, + { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 }, + { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 }, + { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 }, + { 0x1DFC, 0x1DFF }, { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 }, + { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, + { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, + { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, + { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, + { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D }, + { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, + { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E }, + { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, + { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 }, + { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 }, + { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 }, + { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E }, + { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, { 0x101FD, 0x101FD }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 }, + { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 }, + { 0x110B9, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x1112B }, + { 0x1112D, 0x11134 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE }, + { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 }, + { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 }, { 0x1D167, 0x1D169 }, + { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0100, 0xE01EF }, + }; + static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges); + + if (CombiningCharacters.contains(UCS)) + return 0; + + static const UnicodeCharRange DoubleWidthCharacterRanges[] = { + // Hangul Jamo + { 0x1100, 0x11FF }, + // Deprecated fullwidth angle brackets + { 0x2329, 0x232A }, + // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi + // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE) + { 0x2E80, 0x303E }, { 0x3040, 0xA4CF }, + // Hangul + { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB }, + // CJK Unified Ideographs + { 0xF900, 0xFAFF }, + // Vertical forms + { 0xFE10, 0xFE19 }, + // CJK Compatibility Forms + Small Form Variants + { 0xFE30, 0xFE6F }, + // Fullwidth forms + { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 }, + // CJK Unified Ideographs + { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F } + }; + static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges); + + if (DoubleWidthCharacters.contains(UCS)) + return 2; + return 1; +} + +int columnWidthUTF8(StringRef Text) { + unsigned ColumnWidth = 0; + unsigned Length; + for (size_t i = 0, e = Text.size(); i < e; i += Length) { + Length = getNumBytesForUTF8(Text[i]); + if (Length <= 0 || i + Length > Text.size()) + return ErrorInvalidUTF8; + UTF32 buf[1]; + const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i); + UTF32 *Target = &buf[0]; + if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target, + Target + 1, strictConversion)) + return ErrorInvalidUTF8; + int Width = charWidth(buf[0]); + if (Width < 0) + return ErrorNonPrintableCharacter; + ColumnWidth += Width; + } + return ColumnWidth; +} + +} // namespace unicode +} // namespace sys +} // namespace llvm + diff --git a/llvm/lib/Support/UnicodeCaseFold.cpp b/llvm/lib/Support/UnicodeCaseFold.cpp new file mode 100644 index 0000000000000..b18d49dbafb00 --- /dev/null +++ b/llvm/lib/Support/UnicodeCaseFold.cpp @@ -0,0 +1,742 @@ +//===---------- Support/UnicodeCaseFold.cpp -------------------------------===// +// +// This file was generated by utils/unicode-case-fold.py from the Unicode +// case folding database at +// http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt +// +// To regenerate this file, run: +// utils/unicode-case-fold.py \ +// "http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt" \ +// > lib/Support/UnicodeCaseFold.cpp +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Unicode.h" + +int llvm::sys::unicode::foldCharSimple(int C) { + if (C < 0x0041) + return C; + // 26 characters + if (C <= 0x005a) + return C + 32; + // MICRO SIGN + if (C == 0x00b5) + return 0x03bc; + if (C < 0x00c0) + return C; + // 23 characters + if (C <= 0x00d6) + return C + 32; + if (C < 0x00d8) + return C; + // 7 characters + if (C <= 0x00de) + return C + 32; + if (C < 0x0100) + return C; + // 24 characters + if (C <= 0x012e) + return C | 1; + if (C < 0x0132) + return C; + // 3 characters + if (C <= 0x0136) + return C | 1; + if (C < 0x0139) + return C; + // 8 characters + if (C <= 0x0147 && C % 2 == 1) + return C + 1; + if (C < 0x014a) + return C; + // 23 characters + if (C <= 0x0176) + return C | 1; + // LATIN CAPITAL LETTER Y WITH DIAERESIS + if (C == 0x0178) + return 0x00ff; + if (C < 0x0179) + return C; + // 3 characters + if (C <= 0x017d && C % 2 == 1) + return C + 1; + // LATIN SMALL LETTER LONG S + if (C == 0x017f) + return 0x0073; + // LATIN CAPITAL LETTER B WITH HOOK + if (C == 0x0181) + return 0x0253; + if (C < 0x0182) + return C; + // 2 characters + if (C <= 0x0184) + return C | 1; + // LATIN CAPITAL LETTER OPEN O + if (C == 0x0186) + return 0x0254; + // LATIN CAPITAL LETTER C WITH HOOK + if (C == 0x0187) + return 0x0188; + if (C < 0x0189) + return C; + // 2 characters + if (C <= 0x018a) + return C + 205; + // LATIN CAPITAL LETTER D WITH TOPBAR + if (C == 0x018b) + return 0x018c; + // LATIN CAPITAL LETTER REVERSED E + if (C == 0x018e) + return 0x01dd; + // LATIN CAPITAL LETTER SCHWA + if (C == 0x018f) + return 0x0259; + // LATIN CAPITAL LETTER OPEN E + if (C == 0x0190) + return 0x025b; + // LATIN CAPITAL LETTER F WITH HOOK + if (C == 0x0191) + return 0x0192; + // LATIN CAPITAL LETTER G WITH HOOK + if (C == 0x0193) + return 0x0260; + // LATIN CAPITAL LETTER GAMMA + if (C == 0x0194) + return 0x0263; + // LATIN CAPITAL LETTER IOTA + if (C == 0x0196) + return 0x0269; + // LATIN CAPITAL LETTER I WITH STROKE + if (C == 0x0197) + return 0x0268; + // LATIN CAPITAL LETTER K WITH HOOK + if (C == 0x0198) + return 0x0199; + // LATIN CAPITAL LETTER TURNED M + if (C == 0x019c) + return 0x026f; + // LATIN CAPITAL LETTER N WITH LEFT HOOK + if (C == 0x019d) + return 0x0272; + // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + if (C == 0x019f) + return 0x0275; + if (C < 0x01a0) + return C; + // 3 characters + if (C <= 0x01a4) + return C | 1; + // LATIN LETTER YR + if (C == 0x01a6) + return 0x0280; + // LATIN CAPITAL LETTER TONE TWO + if (C == 0x01a7) + return 0x01a8; + // LATIN CAPITAL LETTER ESH + if (C == 0x01a9) + return 0x0283; + // LATIN CAPITAL LETTER T WITH HOOK + if (C == 0x01ac) + return 0x01ad; + // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + if (C == 0x01ae) + return 0x0288; + // LATIN CAPITAL LETTER U WITH HORN + if (C == 0x01af) + return 0x01b0; + if (C < 0x01b1) + return C; + // 2 characters + if (C <= 0x01b2) + return C + 217; + if (C < 0x01b3) + return C; + // 2 characters + if (C <= 0x01b5 && C % 2 == 1) + return C + 1; + // LATIN CAPITAL LETTER EZH + if (C == 0x01b7) + return 0x0292; + if (C < 0x01b8) + return C; + // 2 characters + if (C <= 0x01bc && C % 4 == 0) + return C + 1; + // LATIN CAPITAL LETTER DZ WITH CARON + if (C == 0x01c4) + return 0x01c6; + // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + if (C == 0x01c5) + return 0x01c6; + // LATIN CAPITAL LETTER LJ + if (C == 0x01c7) + return 0x01c9; + // LATIN CAPITAL LETTER L WITH SMALL LETTER J + if (C == 0x01c8) + return 0x01c9; + // LATIN CAPITAL LETTER NJ + if (C == 0x01ca) + return 0x01cc; + if (C < 0x01cb) + return C; + // 9 characters + if (C <= 0x01db && C % 2 == 1) + return C + 1; + if (C < 0x01de) + return C; + // 9 characters + if (C <= 0x01ee) + return C | 1; + // LATIN CAPITAL LETTER DZ + if (C == 0x01f1) + return 0x01f3; + if (C < 0x01f2) + return C; + // 2 characters + if (C <= 0x01f4) + return C | 1; + // LATIN CAPITAL LETTER HWAIR + if (C == 0x01f6) + return 0x0195; + // LATIN CAPITAL LETTER WYNN + if (C == 0x01f7) + return 0x01bf; + if (C < 0x01f8) + return C; + // 20 characters + if (C <= 0x021e) + return C | 1; + // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + if (C == 0x0220) + return 0x019e; + if (C < 0x0222) + return C; + // 9 characters + if (C <= 0x0232) + return C | 1; + // LATIN CAPITAL LETTER A WITH STROKE + if (C == 0x023a) + return 0x2c65; + // LATIN CAPITAL LETTER C WITH STROKE + if (C == 0x023b) + return 0x023c; + // LATIN CAPITAL LETTER L WITH BAR + if (C == 0x023d) + return 0x019a; + // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + if (C == 0x023e) + return 0x2c66; + // LATIN CAPITAL LETTER GLOTTAL STOP + if (C == 0x0241) + return 0x0242; + // LATIN CAPITAL LETTER B WITH STROKE + if (C == 0x0243) + return 0x0180; + // LATIN CAPITAL LETTER U BAR + if (C == 0x0244) + return 0x0289; + // LATIN CAPITAL LETTER TURNED V + if (C == 0x0245) + return 0x028c; + if (C < 0x0246) + return C; + // 5 characters + if (C <= 0x024e) + return C | 1; + // COMBINING GREEK YPOGEGRAMMENI + if (C == 0x0345) + return 0x03b9; + if (C < 0x0370) + return C; + // 2 characters + if (C <= 0x0372) + return C | 1; + // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + if (C == 0x0376) + return 0x0377; + // GREEK CAPITAL LETTER YOT + if (C == 0x037f) + return 0x03f3; + // GREEK CAPITAL LETTER ALPHA WITH TONOS + if (C == 0x0386) + return 0x03ac; + if (C < 0x0388) + return C; + // 3 characters + if (C <= 0x038a) + return C + 37; + // GREEK CAPITAL LETTER OMICRON WITH TONOS + if (C == 0x038c) + return 0x03cc; + if (C < 0x038e) + return C; + // 2 characters + if (C <= 0x038f) + return C + 63; + if (C < 0x0391) + return C; + // 17 characters + if (C <= 0x03a1) + return C + 32; + if (C < 0x03a3) + return C; + // 9 characters + if (C <= 0x03ab) + return C + 32; + // GREEK SMALL LETTER FINAL SIGMA + if (C == 0x03c2) + return 0x03c3; + // GREEK CAPITAL KAI SYMBOL + if (C == 0x03cf) + return 0x03d7; + // GREEK BETA SYMBOL + if (C == 0x03d0) + return 0x03b2; + // GREEK THETA SYMBOL + if (C == 0x03d1) + return 0x03b8; + // GREEK PHI SYMBOL + if (C == 0x03d5) + return 0x03c6; + // GREEK PI SYMBOL + if (C == 0x03d6) + return 0x03c0; + if (C < 0x03d8) + return C; + // 12 characters + if (C <= 0x03ee) + return C | 1; + // GREEK KAPPA SYMBOL + if (C == 0x03f0) + return 0x03ba; + // GREEK RHO SYMBOL + if (C == 0x03f1) + return 0x03c1; + // GREEK CAPITAL THETA SYMBOL + if (C == 0x03f4) + return 0x03b8; + // GREEK LUNATE EPSILON SYMBOL + if (C == 0x03f5) + return 0x03b5; + // GREEK CAPITAL LETTER SHO + if (C == 0x03f7) + return 0x03f8; + // GREEK CAPITAL LUNATE SIGMA SYMBOL + if (C == 0x03f9) + return 0x03f2; + // GREEK CAPITAL LETTER SAN + if (C == 0x03fa) + return 0x03fb; + if (C < 0x03fd) + return C; + // 3 characters + if (C <= 0x03ff) + return C + -130; + if (C < 0x0400) + return C; + // 16 characters + if (C <= 0x040f) + return C + 80; + if (C < 0x0410) + return C; + // 32 characters + if (C <= 0x042f) + return C + 32; + if (C < 0x0460) + return C; + // 17 characters + if (C <= 0x0480) + return C | 1; + if (C < 0x048a) + return C; + // 27 characters + if (C <= 0x04be) + return C | 1; + // CYRILLIC LETTER PALOCHKA + if (C == 0x04c0) + return 0x04cf; + if (C < 0x04c1) + return C; + // 7 characters + if (C <= 0x04cd && C % 2 == 1) + return C + 1; + if (C < 0x04d0) + return C; + // 48 characters + if (C <= 0x052e) + return C | 1; + if (C < 0x0531) + return C; + // 38 characters + if (C <= 0x0556) + return C + 48; + if (C < 0x10a0) + return C; + // 38 characters + if (C <= 0x10c5) + return C + 7264; + if (C < 0x10c7) + return C; + // 2 characters + if (C <= 0x10cd && C % 6 == 5) + return C + 7264; + if (C < 0x13f8) + return C; + // 6 characters + if (C <= 0x13fd) + return C + -8; + // CYRILLIC SMALL LETTER ROUNDED VE + if (C == 0x1c80) + return 0x0432; + // CYRILLIC SMALL LETTER LONG-LEGGED DE + if (C == 0x1c81) + return 0x0434; + // CYRILLIC SMALL LETTER NARROW O + if (C == 0x1c82) + return 0x043e; + if (C < 0x1c83) + return C; + // 2 characters + if (C <= 0x1c84) + return C + -6210; + // CYRILLIC SMALL LETTER THREE-LEGGED TE + if (C == 0x1c85) + return 0x0442; + // CYRILLIC SMALL LETTER TALL HARD SIGN + if (C == 0x1c86) + return 0x044a; + // CYRILLIC SMALL LETTER TALL YAT + if (C == 0x1c87) + return 0x0463; + // CYRILLIC SMALL LETTER UNBLENDED UK + if (C == 0x1c88) + return 0xa64b; + if (C < 0x1e00) + return C; + // 75 characters + if (C <= 0x1e94) + return C | 1; + // LATIN SMALL LETTER LONG S WITH DOT ABOVE + if (C == 0x1e9b) + return 0x1e61; + // LATIN CAPITAL LETTER SHARP S + if (C == 0x1e9e) + return 0x00df; + if (C < 0x1ea0) + return C; + // 48 characters + if (C <= 0x1efe) + return C | 1; + if (C < 0x1f08) + return C; + // 8 characters + if (C <= 0x1f0f) + return C + -8; + if (C < 0x1f18) + return C; + // 6 characters + if (C <= 0x1f1d) + return C + -8; + if (C < 0x1f28) + return C; + // 8 characters + if (C <= 0x1f2f) + return C + -8; + if (C < 0x1f38) + return C; + // 8 characters + if (C <= 0x1f3f) + return C + -8; + if (C < 0x1f48) + return C; + // 6 characters + if (C <= 0x1f4d) + return C + -8; + if (C < 0x1f59) + return C; + // 4 characters + if (C <= 0x1f5f && C % 2 == 1) + return C + -8; + if (C < 0x1f68) + return C; + // 8 characters + if (C <= 0x1f6f) + return C + -8; + if (C < 0x1f88) + return C; + // 8 characters + if (C <= 0x1f8f) + return C + -8; + if (C < 0x1f98) + return C; + // 8 characters + if (C <= 0x1f9f) + return C + -8; + if (C < 0x1fa8) + return C; + // 8 characters + if (C <= 0x1faf) + return C + -8; + if (C < 0x1fb8) + return C; + // 2 characters + if (C <= 0x1fb9) + return C + -8; + if (C < 0x1fba) + return C; + // 2 characters + if (C <= 0x1fbb) + return C + -74; + // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + if (C == 0x1fbc) + return 0x1fb3; + // GREEK PROSGEGRAMMENI + if (C == 0x1fbe) + return 0x03b9; + if (C < 0x1fc8) + return C; + // 4 characters + if (C <= 0x1fcb) + return C + -86; + // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + if (C == 0x1fcc) + return 0x1fc3; + if (C < 0x1fd8) + return C; + // 2 characters + if (C <= 0x1fd9) + return C + -8; + if (C < 0x1fda) + return C; + // 2 characters + if (C <= 0x1fdb) + return C + -100; + if (C < 0x1fe8) + return C; + // 2 characters + if (C <= 0x1fe9) + return C + -8; + if (C < 0x1fea) + return C; + // 2 characters + if (C <= 0x1feb) + return C + -112; + // GREEK CAPITAL LETTER RHO WITH DASIA + if (C == 0x1fec) + return 0x1fe5; + if (C < 0x1ff8) + return C; + // 2 characters + if (C <= 0x1ff9) + return C + -128; + if (C < 0x1ffa) + return C; + // 2 characters + if (C <= 0x1ffb) + return C + -126; + // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + if (C == 0x1ffc) + return 0x1ff3; + // OHM SIGN + if (C == 0x2126) + return 0x03c9; + // KELVIN SIGN + if (C == 0x212a) + return 0x006b; + // ANGSTROM SIGN + if (C == 0x212b) + return 0x00e5; + // TURNED CAPITAL F + if (C == 0x2132) + return 0x214e; + if (C < 0x2160) + return C; + // 16 characters + if (C <= 0x216f) + return C + 16; + // ROMAN NUMERAL REVERSED ONE HUNDRED + if (C == 0x2183) + return 0x2184; + if (C < 0x24b6) + return C; + // 26 characters + if (C <= 0x24cf) + return C + 26; + if (C < 0x2c00) + return C; + // 47 characters + if (C <= 0x2c2e) + return C + 48; + // LATIN CAPITAL LETTER L WITH DOUBLE BAR + if (C == 0x2c60) + return 0x2c61; + // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + if (C == 0x2c62) + return 0x026b; + // LATIN CAPITAL LETTER P WITH STROKE + if (C == 0x2c63) + return 0x1d7d; + // LATIN CAPITAL LETTER R WITH TAIL + if (C == 0x2c64) + return 0x027d; + if (C < 0x2c67) + return C; + // 3 characters + if (C <= 0x2c6b && C % 2 == 1) + return C + 1; + // LATIN CAPITAL LETTER ALPHA + if (C == 0x2c6d) + return 0x0251; + // LATIN CAPITAL LETTER M WITH HOOK + if (C == 0x2c6e) + return 0x0271; + // LATIN CAPITAL LETTER TURNED A + if (C == 0x2c6f) + return 0x0250; + // LATIN CAPITAL LETTER TURNED ALPHA + if (C == 0x2c70) + return 0x0252; + if (C < 0x2c72) + return C; + // 2 characters + if (C <= 0x2c75 && C % 3 == 2) + return C + 1; + if (C < 0x2c7e) + return C; + // 2 characters + if (C <= 0x2c7f) + return C + -10815; + if (C < 0x2c80) + return C; + // 50 characters + if (C <= 0x2ce2) + return C | 1; + if (C < 0x2ceb) + return C; + // 2 characters + if (C <= 0x2ced && C % 2 == 1) + return C + 1; + if (C < 0x2cf2) + return C; + // 2 characters + if (C <= 0xa640 && C % 31054 == 11506) + return C + 1; + if (C < 0xa642) + return C; + // 22 characters + if (C <= 0xa66c) + return C | 1; + if (C < 0xa680) + return C; + // 14 characters + if (C <= 0xa69a) + return C | 1; + if (C < 0xa722) + return C; + // 7 characters + if (C <= 0xa72e) + return C | 1; + if (C < 0xa732) + return C; + // 31 characters + if (C <= 0xa76e) + return C | 1; + if (C < 0xa779) + return C; + // 2 characters + if (C <= 0xa77b && C % 2 == 1) + return C + 1; + // LATIN CAPITAL LETTER INSULAR G + if (C == 0xa77d) + return 0x1d79; + if (C < 0xa77e) + return C; + // 5 characters + if (C <= 0xa786) + return C | 1; + // LATIN CAPITAL LETTER SALTILLO + if (C == 0xa78b) + return 0xa78c; + // LATIN CAPITAL LETTER TURNED H + if (C == 0xa78d) + return 0x0265; + if (C < 0xa790) + return C; + // 2 characters + if (C <= 0xa792) + return C | 1; + if (C < 0xa796) + return C; + // 10 characters + if (C <= 0xa7a8) + return C | 1; + // LATIN CAPITAL LETTER H WITH HOOK + if (C == 0xa7aa) + return 0x0266; + // LATIN CAPITAL LETTER REVERSED OPEN E + if (C == 0xa7ab) + return 0x025c; + // LATIN CAPITAL LETTER SCRIPT G + if (C == 0xa7ac) + return 0x0261; + // LATIN CAPITAL LETTER L WITH BELT + if (C == 0xa7ad) + return 0x026c; + // LATIN CAPITAL LETTER SMALL CAPITAL I + if (C == 0xa7ae) + return 0x026a; + // LATIN CAPITAL LETTER TURNED K + if (C == 0xa7b0) + return 0x029e; + // LATIN CAPITAL LETTER TURNED T + if (C == 0xa7b1) + return 0x0287; + // LATIN CAPITAL LETTER J WITH CROSSED-TAIL + if (C == 0xa7b2) + return 0x029d; + // LATIN CAPITAL LETTER CHI + if (C == 0xa7b3) + return 0xab53; + if (C < 0xa7b4) + return C; + // 2 characters + if (C <= 0xa7b6) + return C | 1; + if (C < 0xab70) + return C; + // 80 characters + if (C <= 0xabbf) + return C + -38864; + if (C < 0xff21) + return C; + // 26 characters + if (C <= 0xff3a) + return C + 32; + if (C < 0x10400) + return C; + // 40 characters + if (C <= 0x10427) + return C + 40; + if (C < 0x104b0) + return C; + // 36 characters + if (C <= 0x104d3) + return C + 40; + if (C < 0x10c80) + return C; + // 51 characters + if (C <= 0x10cb2) + return C + 64; + if (C < 0x118a0) + return C; + // 32 characters + if (C <= 0x118bf) + return C + 32; + if (C < 0x1e900) + return C; + // 34 characters + if (C <= 0x1e921) + return C + 34; + + return C; +} diff --git a/llvm/lib/Support/Unix/COM.inc b/llvm/lib/Support/Unix/COM.inc new file mode 100644 index 0000000000000..03a690ac37667 --- /dev/null +++ b/llvm/lib/Support/Unix/COM.inc @@ -0,0 +1,26 @@ +//===- llvm/Support/Unix/COM.inc - Unix COM Implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix portion of COM support. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +namespace llvm { +namespace sys { + +InitializeCOMRAII::InitializeCOMRAII(COMThreadingMode Threading, + bool SpeedOverMemory) {} + +InitializeCOMRAII::~InitializeCOMRAII() {} +} +} diff --git a/llvm/lib/Support/Unix/DynamicLibrary.inc b/llvm/lib/Support/Unix/DynamicLibrary.inc new file mode 100644 index 0000000000000..a2a379963de03 --- /dev/null +++ b/llvm/lib/Support/Unix/DynamicLibrary.inc @@ -0,0 +1,134 @@ +//===- Unix/DynamicLibrary.cpp - Unix DL Implementation ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the UNIX specific implementation of DynamicLibrary. +// +//===----------------------------------------------------------------------===// + +#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) +#include <dlfcn.h> + +DynamicLibrary::HandleSet::~HandleSet() { + // Close the libraries in reverse order. + for (void *Handle : llvm::reverse(Handles)) + ::dlclose(Handle); + if (Process) + ::dlclose(Process); + + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; +} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + void *Handle = ::dlopen(File, RTLD_LAZY|RTLD_GLOBAL); + if (!Handle) { + if (Err) *Err = ::dlerror(); + return &DynamicLibrary::Invalid; + } + +#ifdef __CYGWIN__ + // Cygwin searches symbols only in the main + // with the handle of dlopen(NULL, RTLD_GLOBAL). + if (!File) + Handle = RTLD_DEFAULT; +#endif + + return Handle; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + ::dlclose(Handle); +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + return ::dlsym(Handle, Symbol); +} + +#else // !HAVE_DLOPEN + +DynamicLibrary::HandleSet::~HandleSet() {} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + if (Err) *Err = "dlopen() not supported on this platform"; + return &Invalid; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + return nullptr; +} + +#endif + +// Must declare the symbols in the global namespace. +static void *DoSearch(const char* SymbolName) { +#define EXPLICIT_SYMBOL(SYM) \ + extern void *SYM; if (!strcmp(SymbolName, #SYM)) return (void*)&SYM + + // If this is darwin, it has some funky issues, try to solve them here. Some + // important symbols are marked 'private external' which doesn't allow + // SearchForAddressOfSymbol to find them. As such, we special case them here, + // there is only a small handful of them. + +#ifdef __APPLE__ + { + // __eprintf is sometimes used for assert() handling on x86. + // + // FIXME: Currently disabled when using Clang, as we don't always have our + // runtime support libraries available. +#ifndef __clang__ +#ifdef __i386__ + EXPLICIT_SYMBOL(__eprintf); +#endif +#endif + } +#endif + +#ifdef __CYGWIN__ + { + EXPLICIT_SYMBOL(_alloca); + EXPLICIT_SYMBOL(__main); + } +#endif + +#undef EXPLICIT_SYMBOL + +// This macro returns the address of a well-known, explicit symbol +#define EXPLICIT_SYMBOL(SYM) \ + if (!strcmp(SymbolName, #SYM)) return &SYM + +// Under glibc we have a weird situation. The stderr/out/in symbols are both +// macros and global variables because of standards requirements. So, we +// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. +#if defined(__GLIBC__) + { + EXPLICIT_SYMBOL(stderr); + EXPLICIT_SYMBOL(stdout); + EXPLICIT_SYMBOL(stdin); + } +#else + // For everything else, we want to check to make sure the symbol isn't defined + // as a macro before using EXPLICIT_SYMBOL. + { +#ifndef stdin + EXPLICIT_SYMBOL(stdin); +#endif +#ifndef stdout + EXPLICIT_SYMBOL(stdout); +#endif +#ifndef stderr + EXPLICIT_SYMBOL(stderr); +#endif + } +#endif +#undef EXPLICIT_SYMBOL + + return nullptr; +} diff --git a/llvm/lib/Support/Unix/Host.inc b/llvm/lib/Support/Unix/Host.inc new file mode 100644 index 0000000000000..17d78dc18be75 --- /dev/null +++ b/llvm/lib/Support/Unix/Host.inc @@ -0,0 +1,84 @@ +//===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the UNIX Host support. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/config.h" +#include <cctype> +#include <string> +#include <sys/utsname.h> + +using namespace llvm; + +static std::string getOSVersion() { + struct utsname info; + + if (uname(&info)) + return ""; + + return info.release; +} + +static std::string updateTripleOSVersion(std::string TargetTripleString) { + // On darwin, we want to update the version to match that of the target. + std::string::size_type DarwinDashIdx = TargetTripleString.find("-darwin"); + if (DarwinDashIdx != std::string::npos) { + TargetTripleString.resize(DarwinDashIdx + strlen("-darwin")); + TargetTripleString += getOSVersion(); + return TargetTripleString; + } + std::string::size_type MacOSDashIdx = TargetTripleString.find("-macos"); + if (MacOSDashIdx != std::string::npos) { + TargetTripleString.resize(MacOSDashIdx); + // Reset the OS to darwin as the OS version from `uname` doesn't use the + // macOS version scheme. + TargetTripleString += "-darwin"; + TargetTripleString += getOSVersion(); + } + // On AIX, the AIX version and release should be that of the current host + // unless if the version has already been specified. + if (Triple(LLVM_HOST_TRIPLE).getOS() == Triple::AIX) { + Triple TT(TargetTripleString); + if (TT.getOS() == Triple::AIX && !TT.getOSMajorVersion()) { + struct utsname name; + if (uname(&name) != -1) { + std::string NewOSName = Triple::getOSTypeName(Triple::AIX); + NewOSName += name.version; + NewOSName += '.'; + NewOSName += name.release; + NewOSName += ".0.0"; + TT.setOSName(NewOSName); + return TT.str(); + } + } + } + return TargetTripleString; +} + +std::string sys::getDefaultTargetTriple() { + std::string TargetTripleString = + updateTripleOSVersion(LLVM_DEFAULT_TARGET_TRIPLE); + + // Override the default target with an environment variable named by + // LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + TargetTripleString = EnvTriple; +#endif + + return TargetTripleString; +} diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc new file mode 100644 index 0000000000000..05f8e32896fa0 --- /dev/null +++ b/llvm/lib/Support/Unix/Memory.inc @@ -0,0 +1,268 @@ +//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some functions for various memory management utilities. +// +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Process.h" + +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#ifdef __APPLE__ +#include <mach/mach.h> +#endif + +#ifdef __Fuchsia__ +#include <zircon/syscalls.h> +#endif + +#if defined(__mips__) +# if defined(__OpenBSD__) +# include <mips64/sysarch.h> +# elif !defined(__FreeBSD__) +# include <sys/cachectl.h> +# endif +#endif + +#if defined(__APPLE__) +extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +#else +extern "C" void __clear_cache(void *, void*); +#endif + +namespace { + +int getPosixProtectionFlags(unsigned Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { + case llvm::sys::Memory::MF_READ: + return PROT_READ; + case llvm::sys::Memory::MF_WRITE: + return PROT_WRITE; + case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_WRITE: + return PROT_READ | PROT_WRITE; + case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC: + return PROT_READ | PROT_EXEC; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE | + llvm::sys::Memory::MF_EXEC: + return PROT_READ | PROT_WRITE | PROT_EXEC; + case llvm::sys::Memory::MF_EXEC: +#if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC)) + // On PowerPC, having an executable page that has no read permission + // can have unintended consequences. The function InvalidateInstruction- + // Cache uses instructions dcbf and icbi, both of which are treated by + // the processor as loads. If the page has no read permissions, + // executing these instructions will result in a segmentation fault. + return PROT_READ | PROT_EXEC; +#else + return PROT_EXEC; +#endif + default: + llvm_unreachable("Illegal memory protection flag specified!"); + } + // Provide a default return value as required by some compilers. + return PROT_NONE; +} + +} // anonymous namespace + +namespace llvm { +namespace sys { + +MemoryBlock +Memory::allocateMappedMemory(size_t NumBytes, + const MemoryBlock *const NearBlock, + unsigned PFlags, + std::error_code &EC) { + EC = std::error_code(); + if (NumBytes == 0) + return MemoryBlock(); + + // On platforms that have it, we can use MAP_ANON to get a memory-mapped + // page without file backing, but we need a fallback of opening /dev/zero + // for strictly POSIX platforms instead. + int fd; +#if defined(MAP_ANON) + fd = -1; +#else + fd = open("/dev/zero", O_RDWR); + if (fd == -1) { + EC = std::error_code(errno, std::generic_category()); + return MemoryBlock(); + } +#endif + + int MMFlags = MAP_PRIVATE; +#if defined(MAP_ANON) + MMFlags |= MAP_ANON; +#endif + int Protect = getPosixProtectionFlags(PFlags); + +#if defined(__NetBSD__) && defined(PROT_MPROTECT) + Protect |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + + // Use any near hint and the page size to set a page-aligned starting address + uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) + + NearBlock->allocatedSize() : 0; + static const size_t PageSize = Process::getPageSizeEstimate(); + const size_t NumPages = (NumBytes+PageSize-1)/PageSize; + + if (Start && Start % PageSize) + Start += PageSize - Start % PageSize; + + // FIXME: Handle huge page requests (MF_HUGE_HINT). + void *Addr = ::mmap(reinterpret_cast<void *>(Start), PageSize*NumPages, Protect, + MMFlags, fd, 0); + if (Addr == MAP_FAILED) { + if (NearBlock) { //Try again without a near hint +#if !defined(MAP_ANON) + close(fd); +#endif + return allocateMappedMemory(NumBytes, nullptr, PFlags, EC); + } + + EC = std::error_code(errno, std::generic_category()); +#if !defined(MAP_ANON) + close(fd); +#endif + return MemoryBlock(); + } + +#if !defined(MAP_ANON) + close(fd); +#endif + + MemoryBlock Result; + Result.Address = Addr; + Result.AllocatedSize = PageSize*NumPages; + Result.Flags = PFlags; + + // Rely on protectMappedMemory to invalidate instruction cache. + if (PFlags & MF_EXEC) { + EC = Memory::protectMappedMemory (Result, PFlags); + if (EC != std::error_code()) + return MemoryBlock(); + } + + return Result; +} + +std::error_code +Memory::releaseMappedMemory(MemoryBlock &M) { + if (M.Address == nullptr || M.AllocatedSize == 0) + return std::error_code(); + + if (0 != ::munmap(M.Address, M.AllocatedSize)) + return std::error_code(errno, std::generic_category()); + + M.Address = nullptr; + M.AllocatedSize = 0; + + return std::error_code(); +} + +std::error_code +Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { + static const Align PageSize = Align(Process::getPageSizeEstimate()); + if (M.Address == nullptr || M.AllocatedSize == 0) + return std::error_code(); + + if (!Flags) + return std::error_code(EINVAL, std::generic_category()); + + int Protect = getPosixProtectionFlags(Flags); + uintptr_t Start = alignAddr((const uint8_t *)M.Address - PageSize.value() + 1, PageSize); + uintptr_t End = alignAddr((const uint8_t *)M.Address + M.AllocatedSize, PageSize); + + bool InvalidateCache = (Flags & MF_EXEC); + +#if defined(__arm__) || defined(__aarch64__) + // Certain ARM implementations treat icache clear instruction as a memory read, + // and CPU segfaults on trying to clear cache on !PROT_READ page. Therefore we need + // to temporarily add PROT_READ for the sake of flushing the instruction caches. + if (InvalidateCache && !(Protect & PROT_READ)) { + int Result = ::mprotect((void *)Start, End - Start, Protect | PROT_READ); + if (Result != 0) + return std::error_code(errno, std::generic_category()); + + Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize); + InvalidateCache = false; + } +#endif + + int Result = ::mprotect((void *)Start, End - Start, Protect); + + if (Result != 0) + return std::error_code(errno, std::generic_category()); + + if (InvalidateCache) + Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize); + + return std::error_code(); +} + +/// InvalidateInstructionCache - Before the JIT can run a block of code +/// that has been emitted it must invalidate the instruction cache on some +/// platforms. +void Memory::InvalidateInstructionCache(const void *Addr, + size_t Len) { + +// icache invalidation for PPC and ARM. +#if defined(__APPLE__) + +# if (defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(__arm__) || \ + defined(__arm64__)) + sys_icache_invalidate(const_cast<void *>(Addr), Len); +# endif + +#elif defined(__Fuchsia__) + + zx_status_t Status = zx_cache_flush(Addr, Len, ZX_CACHE_FLUSH_INSN); + assert(Status == ZX_OK && "cannot invalidate instruction cache"); + +#else + +# if (defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__) + const size_t LineSize = 32; + + const intptr_t Mask = ~(LineSize - 1); + const intptr_t StartLine = ((intptr_t) Addr) & Mask; + const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask; + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("dcbf 0, %0" : : "r"(Line)); + asm volatile("sync"); + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ + defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + const char *Start = static_cast<const char *>(Addr); + const char *End = Start + Len; + __clear_cache(const_cast<char *>(Start), const_cast<char *>(End)); +# endif + +#endif // end apple + + ValgrindDiscardTranslations(Addr, Len); +} + +} // namespace sys +} // namespace llvm diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc new file mode 100644 index 0000000000000..a617eca3566a3 --- /dev/null +++ b/llvm/lib/Support/Unix/Path.inc @@ -0,0 +1,1213 @@ +//===- llvm/Support/Unix/Path.inc - Unix Path Implementation ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific implementation of the Path API. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include <limits.h> +#include <stdio.h> +#if HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#if HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#include <dirent.h> +#include <pwd.h> + +#ifdef __APPLE__ +#include <mach-o/dyld.h> +#include <sys/attr.h> +#include <copyfile.h> +#elif defined(__DragonFly__) +#include <sys/mount.h> +#endif + +// Both stdio.h and cstdio are included via different paths and +// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros +// either. +#undef ferror +#undef feof + +// For GNU Hurd +#if defined(__GNU__) && !defined(PATH_MAX) +# define PATH_MAX 4096 +# define MAXPATHLEN 4096 +#endif + +#include <sys/types.h> +#if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && \ + !defined(__linux__) && !defined(__FreeBSD_kernel__) && !defined(_AIX) +#include <sys/statvfs.h> +#define STATVFS statvfs +#define FSTATVFS fstatvfs +#define STATVFS_F_FRSIZE(vfs) vfs.f_frsize +#else +#if defined(__OpenBSD__) || defined(__FreeBSD__) +#include <sys/mount.h> +#include <sys/param.h> +#elif defined(__linux__) +#if defined(HAVE_LINUX_MAGIC_H) +#include <linux/magic.h> +#else +#if defined(HAVE_LINUX_NFS_FS_H) +#include <linux/nfs_fs.h> +#endif +#if defined(HAVE_LINUX_SMB_H) +#include <linux/smb.h> +#endif +#endif +#include <sys/vfs.h> +#elif defined(_AIX) +#include <sys/statfs.h> + +// <sys/vmount.h> depends on `uint` to be a typedef from <sys/types.h> to +// `uint_t`; however, <sys/types.h> does not always declare `uint`. We provide +// the typedef prior to including <sys/vmount.h> to work around this issue. +typedef uint_t uint; +#include <sys/vmount.h> +#else +#include <sys/mount.h> +#endif +#define STATVFS statfs +#define FSTATVFS fstatfs +#define STATVFS_F_FRSIZE(vfs) static_cast<uint64_t>(vfs.f_bsize) +#endif + +#if defined(__NetBSD__) || defined(__DragonFly__) || defined(__GNU__) +#define STATVFS_F_FLAG(vfs) (vfs).f_flag +#else +#define STATVFS_F_FLAG(vfs) (vfs).f_flags +#endif + +using namespace llvm; + +namespace llvm { +namespace sys { +namespace fs { + +const file_t kInvalidFile = -1; + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__minix) || defined(__FreeBSD_kernel__) || defined(__linux__) || \ + defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__) +static int +test_dir(char ret[PATH_MAX], const char *dir, const char *bin) +{ + struct stat sb; + char fullpath[PATH_MAX]; + + int chars = snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin); + // We cannot write PATH_MAX characters because the string will be terminated + // with a null character. Fail if truncation happened. + if (chars >= PATH_MAX) + return 1; + if (!realpath(fullpath, ret)) + return 1; + if (stat(fullpath, &sb) != 0) + return 1; + + return 0; +} + +static char * +getprogpath(char ret[PATH_MAX], const char *bin) +{ + /* First approach: absolute path. */ + if (bin[0] == '/') { + if (test_dir(ret, "/", bin) == 0) + return ret; + return nullptr; + } + + /* Second approach: relative path. */ + if (strchr(bin, '/')) { + char cwd[PATH_MAX]; + if (!getcwd(cwd, PATH_MAX)) + return nullptr; + if (test_dir(ret, cwd, bin) == 0) + return ret; + return nullptr; + } + + /* Third approach: $PATH */ + char *pv; + if ((pv = getenv("PATH")) == nullptr) + return nullptr; + char *s = strdup(pv); + if (!s) + return nullptr; + char *state; + for (char *t = strtok_r(s, ":", &state); t != nullptr; + t = strtok_r(nullptr, ":", &state)) { + if (test_dir(ret, t, bin) == 0) { + free(s); + return ret; + } + } + free(s); + return nullptr; +} +#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__ + +/// GetMainExecutable - Return the path to the main executable, given the +/// value of argv[0] from program startup. +std::string getMainExecutable(const char *argv0, void *MainAddr) { +#if defined(__APPLE__) + // On OS X the executable path is saved to the stack by dyld. Reading it + // from there is much faster than calling dladdr, especially for large + // binaries with symbols. + char exe_path[MAXPATHLEN]; + uint32_t size = sizeof(exe_path); + if (_NSGetExecutablePath(exe_path, &size) == 0) { + char link_path[MAXPATHLEN]; + if (realpath(exe_path, link_path)) + return link_path; + } +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__minix) || defined(__DragonFly__) || \ + defined(__FreeBSD_kernel__) || defined(_AIX) + const char *curproc = "/proc/curproc/file"; + char exe_path[PATH_MAX]; + // /proc is not mounted by default under FreeBSD, but gives more accurate + // information than argv[0] when it is. + if (sys::fs::exists(curproc)) { + ssize_t len = readlink(curproc, exe_path, sizeof(exe_path)); + if (len > 0) { + // Null terminate the string for realpath. readlink never null + // terminates its output. + len = std::min(len, ssize_t(sizeof(exe_path) - 1)); + exe_path[len] = '\0'; + return exe_path; + } + } + // If we don't have procfs mounted, fall back to argv[0] + if (getprogpath(exe_path, argv0) != NULL) + return exe_path; +#elif defined(__linux__) || defined(__CYGWIN__) + char exe_path[MAXPATHLEN]; + const char *aPath = "/proc/self/exe"; + if (sys::fs::exists(aPath)) { + // /proc is not always mounted under Linux (chroot for example). + ssize_t len = readlink(aPath, exe_path, sizeof(exe_path)); + if (len < 0) + return ""; + + // Null terminate the string for realpath. readlink never null + // terminates its output. + len = std::min(len, ssize_t(sizeof(exe_path) - 1)); + exe_path[len] = '\0'; + + // On Linux, /proc/self/exe always looks through symlinks. However, on + // GNU/Hurd, /proc/self/exe is a symlink to the path that was used to start + // the program, and not the eventual binary file. Therefore, call realpath + // so this behaves the same on all platforms. +#if _POSIX_VERSION >= 200112 || defined(__GLIBC__) + if (char *real_path = realpath(exe_path, NULL)) { + std::string ret = std::string(real_path); + free(real_path); + return ret; + } +#else + char real_path[MAXPATHLEN]; + if (realpath(exe_path, real_path)) + return std::string(real_path); +#endif + } + // Fall back to the classical detection. + if (getprogpath(exe_path, argv0)) + return exe_path; +#elif defined(HAVE_DLFCN_H) && defined(HAVE_DLADDR) + // Use dladdr to get executable path if available. + Dl_info DLInfo; + int err = dladdr(MainAddr, &DLInfo); + if (err == 0) + return ""; + + // If the filename is a symlink, we need to resolve and return the location of + // the actual executable. + char link_path[MAXPATHLEN]; + if (realpath(DLInfo.dli_fname, link_path)) + return link_path; +#else +#error GetMainExecutable is not implemented on this host yet. +#endif + return ""; +} + +TimePoint<> basic_file_status::getLastAccessedTime() const { + return toTimePoint(fs_st_atime, fs_st_atime_nsec); +} + +TimePoint<> basic_file_status::getLastModificationTime() const { + return toTimePoint(fs_st_mtime, fs_st_mtime_nsec); +} + +UniqueID file_status::getUniqueID() const { + return UniqueID(fs_st_dev, fs_st_ino); +} + +uint32_t file_status::getLinkCount() const { + return fs_st_nlinks; +} + +ErrorOr<space_info> disk_space(const Twine &Path) { + struct STATVFS Vfs; + if (::STATVFS(const_cast<char *>(Path.str().c_str()), &Vfs)) + return std::error_code(errno, std::generic_category()); + auto FrSize = STATVFS_F_FRSIZE(Vfs); + space_info SpaceInfo; + SpaceInfo.capacity = static_cast<uint64_t>(Vfs.f_blocks) * FrSize; + SpaceInfo.free = static_cast<uint64_t>(Vfs.f_bfree) * FrSize; + SpaceInfo.available = static_cast<uint64_t>(Vfs.f_bavail) * FrSize; + return SpaceInfo; +} + +std::error_code current_path(SmallVectorImpl<char> &result) { + result.clear(); + + const char *pwd = ::getenv("PWD"); + llvm::sys::fs::file_status PWDStatus, DotStatus; + if (pwd && llvm::sys::path::is_absolute(pwd) && + !llvm::sys::fs::status(pwd, PWDStatus) && + !llvm::sys::fs::status(".", DotStatus) && + PWDStatus.getUniqueID() == DotStatus.getUniqueID()) { + result.append(pwd, pwd + strlen(pwd)); + return std::error_code(); + } + +#ifdef MAXPATHLEN + result.reserve(MAXPATHLEN); +#else +// For GNU Hurd + result.reserve(1024); +#endif + + while (true) { + if (::getcwd(result.data(), result.capacity()) == nullptr) { + // See if there was a real error. + if (errno != ENOMEM) + return std::error_code(errno, std::generic_category()); + // Otherwise there just wasn't enough space. + result.reserve(result.capacity() * 2); + } else + break; + } + + result.set_size(strlen(result.data())); + return std::error_code(); +} + +std::error_code set_current_path(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); + + if (::chdir(p.begin()) == -1) + return std::error_code(errno, std::generic_category()); + + return std::error_code(); +} + +std::error_code create_directory(const Twine &path, bool IgnoreExisting, + perms Perms) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); + + if (::mkdir(p.begin(), Perms) == -1) { + if (errno != EEXIST || !IgnoreExisting) + return std::error_code(errno, std::generic_category()); + } + + return std::error_code(); +} + +// Note that we are using symbolic link because hard links are not supported by +// all filesystems (SMB doesn't). +std::error_code create_link(const Twine &to, const Twine &from) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); + + if (::symlink(t.begin(), f.begin()) == -1) + return std::error_code(errno, std::generic_category()); + + return std::error_code(); +} + +std::error_code create_hard_link(const Twine &to, const Twine &from) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); + + if (::link(t.begin(), f.begin()) == -1) + return std::error_code(errno, std::generic_category()); + + return std::error_code(); +} + +std::error_code remove(const Twine &path, bool IgnoreNonExisting) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); + + struct stat buf; + if (lstat(p.begin(), &buf) != 0) { + if (errno != ENOENT || !IgnoreNonExisting) + return std::error_code(errno, std::generic_category()); + return std::error_code(); + } + + // Note: this check catches strange situations. In all cases, LLVM should + // only be involved in the creation and deletion of regular files. This + // check ensures that what we're trying to erase is a regular file. It + // effectively prevents LLVM from erasing things like /dev/null, any block + // special file, or other things that aren't "regular" files. + if (!S_ISREG(buf.st_mode) && !S_ISDIR(buf.st_mode) && !S_ISLNK(buf.st_mode)) + return make_error_code(errc::operation_not_permitted); + + if (::remove(p.begin()) == -1) { + if (errno != ENOENT || !IgnoreNonExisting) + return std::error_code(errno, std::generic_category()); + } + + return std::error_code(); +} + +static bool is_local_impl(struct STATVFS &Vfs) { +#if defined(__linux__) || defined(__GNU__) +#ifndef NFS_SUPER_MAGIC +#define NFS_SUPER_MAGIC 0x6969 +#endif +#ifndef SMB_SUPER_MAGIC +#define SMB_SUPER_MAGIC 0x517B +#endif +#ifndef CIFS_MAGIC_NUMBER +#define CIFS_MAGIC_NUMBER 0xFF534D42 +#endif +#ifdef __GNU__ + switch ((uint32_t)Vfs.__f_type) { +#else + switch ((uint32_t)Vfs.f_type) { +#endif + case NFS_SUPER_MAGIC: + case SMB_SUPER_MAGIC: + case CIFS_MAGIC_NUMBER: + return false; + default: + return true; + } +#elif defined(__CYGWIN__) + // Cygwin doesn't expose this information; would need to use Win32 API. + return false; +#elif defined(__Fuchsia__) + // Fuchsia doesn't yet support remote filesystem mounts. + return true; +#elif defined(__EMSCRIPTEN__) + // Emscripten doesn't currently support remote filesystem mounts. + return true; +#elif defined(__HAIKU__) + // Haiku doesn't expose this information. + return false; +#elif defined(__sun) + // statvfs::f_basetype contains a null-terminated FSType name of the mounted target + StringRef fstype(Vfs.f_basetype); + // NFS is the only non-local fstype?? + return !fstype.equals("nfs"); +#elif defined(_AIX) + // Call mntctl; try more than twice in case of timing issues with a concurrent + // mount. + int Ret; + size_t BufSize = 2048u; + std::unique_ptr<char[]> Buf; + int Tries = 3; + while (Tries--) { + Buf = std::make_unique<char[]>(BufSize); + Ret = mntctl(MCTL_QUERY, BufSize, Buf.get()); + if (Ret != 0) + break; + BufSize = *reinterpret_cast<unsigned int *>(Buf.get()); + Buf.reset(); + } + + if (Ret == -1) + // There was an error; "remote" is the conservative answer. + return false; + + // Look for the correct vmount entry. + char *CurObjPtr = Buf.get(); + while (Ret--) { + struct vmount *Vp = reinterpret_cast<struct vmount *>(CurObjPtr); + static_assert(sizeof(Vfs.f_fsid) == sizeof(Vp->vmt_fsid), + "fsid length mismatch"); + if (memcmp(&Vfs.f_fsid, &Vp->vmt_fsid, sizeof Vfs.f_fsid) == 0) + return (Vp->vmt_flags & MNT_REMOTE) == 0; + + CurObjPtr += Vp->vmt_length; + } + + // vmount entry not found; "remote" is the conservative answer. + return false; +#else + return !!(STATVFS_F_FLAG(Vfs) & MNT_LOCAL); +#endif +} + +std::error_code is_local(const Twine &Path, bool &Result) { + struct STATVFS Vfs; + if (::STATVFS(const_cast<char *>(Path.str().c_str()), &Vfs)) + return std::error_code(errno, std::generic_category()); + + Result = is_local_impl(Vfs); + return std::error_code(); +} + +std::error_code is_local(int FD, bool &Result) { + struct STATVFS Vfs; + if (::FSTATVFS(FD, &Vfs)) + return std::error_code(errno, std::generic_category()); + + Result = is_local_impl(Vfs); + return std::error_code(); +} + +std::error_code rename(const Twine &from, const Twine &to) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); + + if (::rename(f.begin(), t.begin()) == -1) + return std::error_code(errno, std::generic_category()); + + return std::error_code(); +} + +std::error_code resize_file(int FD, uint64_t Size) { +#if defined(HAVE_POSIX_FALLOCATE) + // If we have posix_fallocate use it. Unlike ftruncate it always allocates + // space, so we get an error if the disk is full. + if (int Err = ::posix_fallocate(FD, 0, Size)) { +#ifdef _AIX + constexpr int NotSupportedError = ENOTSUP; +#else + constexpr int NotSupportedError = EOPNOTSUPP; +#endif + if (Err != EINVAL && Err != NotSupportedError) + return std::error_code(Err, std::generic_category()); + } +#endif + // Use ftruncate as a fallback. It may or may not allocate space. At least on + // OS X with HFS+ it does. + if (::ftruncate(FD, Size) == -1) + return std::error_code(errno, std::generic_category()); + + return std::error_code(); +} + +static int convertAccessMode(AccessMode Mode) { + switch (Mode) { + case AccessMode::Exist: + return F_OK; + case AccessMode::Write: + return W_OK; + case AccessMode::Execute: + return R_OK | X_OK; // scripts also need R_OK. + } + llvm_unreachable("invalid enum"); +} + +std::error_code access(const Twine &Path, AccessMode Mode) { + SmallString<128> PathStorage; + StringRef P = Path.toNullTerminatedStringRef(PathStorage); + + if (::access(P.begin(), convertAccessMode(Mode)) == -1) + return std::error_code(errno, std::generic_category()); + + if (Mode == AccessMode::Execute) { + // Don't say that directories are executable. + struct stat buf; + if (0 != stat(P.begin(), &buf)) + return errc::permission_denied; + if (!S_ISREG(buf.st_mode)) + return errc::permission_denied; + } + + return std::error_code(); +} + +bool can_execute(const Twine &Path) { + return !access(Path, AccessMode::Execute); +} + +bool equivalent(file_status A, file_status B) { + assert(status_known(A) && status_known(B)); + return A.fs_st_dev == B.fs_st_dev && + A.fs_st_ino == B.fs_st_ino; +} + +std::error_code equivalent(const Twine &A, const Twine &B, bool &result) { + file_status fsA, fsB; + if (std::error_code ec = status(A, fsA)) + return ec; + if (std::error_code ec = status(B, fsB)) + return ec; + result = equivalent(fsA, fsB); + return std::error_code(); +} + +static void expandTildeExpr(SmallVectorImpl<char> &Path) { + StringRef PathStr(Path.begin(), Path.size()); + if (PathStr.empty() || !PathStr.startswith("~")) + return; + + PathStr = PathStr.drop_front(); + StringRef Expr = + PathStr.take_until([](char c) { return path::is_separator(c); }); + StringRef Remainder = PathStr.substr(Expr.size() + 1); + SmallString<128> Storage; + if (Expr.empty()) { + // This is just ~/..., resolve it to the current user's home dir. + if (!path::home_directory(Storage)) { + // For some reason we couldn't get the home directory. Just exit. + return; + } + + // Overwrite the first character and insert the rest. + Path[0] = Storage[0]; + Path.insert(Path.begin() + 1, Storage.begin() + 1, Storage.end()); + return; + } + + // This is a string of the form ~username/, look up this user's entry in the + // password database. + struct passwd *Entry = nullptr; + std::string User = Expr.str(); + Entry = ::getpwnam(User.c_str()); + + if (!Entry) { + // Unable to look up the entry, just return back the original path. + return; + } + + Storage = Remainder; + Path.clear(); + Path.append(Entry->pw_dir, Entry->pw_dir + strlen(Entry->pw_dir)); + llvm::sys::path::append(Path, Storage); +} + + +void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) { + dest.clear(); + if (path.isTriviallyEmpty()) + return; + + path.toVector(dest); + expandTildeExpr(dest); + + return; +} + +static file_type typeForMode(mode_t Mode) { + if (S_ISDIR(Mode)) + return file_type::directory_file; + else if (S_ISREG(Mode)) + return file_type::regular_file; + else if (S_ISBLK(Mode)) + return file_type::block_file; + else if (S_ISCHR(Mode)) + return file_type::character_file; + else if (S_ISFIFO(Mode)) + return file_type::fifo_file; + else if (S_ISSOCK(Mode)) + return file_type::socket_file; + else if (S_ISLNK(Mode)) + return file_type::symlink_file; + return file_type::type_unknown; +} + +static std::error_code fillStatus(int StatRet, const struct stat &Status, + file_status &Result) { + if (StatRet != 0) { + std::error_code EC(errno, std::generic_category()); + if (EC == errc::no_such_file_or_directory) + Result = file_status(file_type::file_not_found); + else + Result = file_status(file_type::status_error); + return EC; + } + + uint32_t atime_nsec, mtime_nsec; +#if defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC) + atime_nsec = Status.st_atimespec.tv_nsec; + mtime_nsec = Status.st_mtimespec.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC) + atime_nsec = Status.st_atim.tv_nsec; + mtime_nsec = Status.st_mtim.tv_nsec; +#else + atime_nsec = mtime_nsec = 0; +#endif + + perms Perms = static_cast<perms>(Status.st_mode) & all_perms; + Result = file_status(typeForMode(Status.st_mode), Perms, Status.st_dev, + Status.st_nlink, Status.st_ino, + Status.st_atime, atime_nsec, Status.st_mtime, mtime_nsec, + Status.st_uid, Status.st_gid, Status.st_size); + + return std::error_code(); +} + +std::error_code status(const Twine &Path, file_status &Result, bool Follow) { + SmallString<128> PathStorage; + StringRef P = Path.toNullTerminatedStringRef(PathStorage); + + struct stat Status; + int StatRet = (Follow ? ::stat : ::lstat)(P.begin(), &Status); + return fillStatus(StatRet, Status, Result); +} + +std::error_code status(int FD, file_status &Result) { + struct stat Status; + int StatRet = ::fstat(FD, &Status); + return fillStatus(StatRet, Status, Result); +} + +unsigned getUmask() { + // Chose arbitary new mask and reset the umask to the old mask. + // umask(2) never fails so ignore the return of the second call. + unsigned Mask = ::umask(0); + (void) ::umask(Mask); + return Mask; +} + +std::error_code setPermissions(const Twine &Path, perms Permissions) { + SmallString<128> PathStorage; + StringRef P = Path.toNullTerminatedStringRef(PathStorage); + + if (::chmod(P.begin(), Permissions)) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +std::error_code setPermissions(int FD, perms Permissions) { + if (::fchmod(FD, Permissions)) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime, + TimePoint<> ModificationTime) { +#if defined(HAVE_FUTIMENS) + timespec Times[2]; + Times[0] = sys::toTimeSpec(AccessTime); + Times[1] = sys::toTimeSpec(ModificationTime); + if (::futimens(FD, Times)) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +#elif defined(HAVE_FUTIMES) + timeval Times[2]; + Times[0] = sys::toTimeVal( + std::chrono::time_point_cast<std::chrono::microseconds>(AccessTime)); + Times[1] = + sys::toTimeVal(std::chrono::time_point_cast<std::chrono::microseconds>( + ModificationTime)); + if (::futimes(FD, Times)) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +#else +#warning Missing futimes() and futimens() + return make_error_code(errc::function_not_supported); +#endif +} + +std::error_code mapped_file_region::init(int FD, uint64_t Offset, + mapmode Mode) { + assert(Size != 0); + + int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE; + int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE); +#if defined(__APPLE__) + //---------------------------------------------------------------------- + // Newer versions of MacOSX have a flag that will allow us to read from + // binaries whose code signature is invalid without crashing by using + // the MAP_RESILIENT_CODESIGN flag. Also if a file from removable media + // is mapped we can avoid crashing and return zeroes to any pages we try + // to read if the media becomes unavailable by using the + // MAP_RESILIENT_MEDIA flag. These flags are only usable when mapping + // with PROT_READ, so take care not to specify them otherwise. + //---------------------------------------------------------------------- + if (Mode == readonly) { +#if defined(MAP_RESILIENT_CODESIGN) + flags |= MAP_RESILIENT_CODESIGN; +#endif +#if defined(MAP_RESILIENT_MEDIA) + flags |= MAP_RESILIENT_MEDIA; +#endif + } +#endif // #if defined (__APPLE__) + + Mapping = ::mmap(nullptr, Size, prot, flags, FD, Offset); + if (Mapping == MAP_FAILED) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +mapped_file_region::mapped_file_region(int fd, mapmode mode, size_t length, + uint64_t offset, std::error_code &ec) + : Size(length), Mapping(), Mode(mode) { + (void)Mode; + ec = init(fd, offset, mode); + if (ec) + Mapping = nullptr; +} + +mapped_file_region::~mapped_file_region() { + if (Mapping) + ::munmap(Mapping, Size); +} + +size_t mapped_file_region::size() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Size; +} + +char *mapped_file_region::data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<char*>(Mapping); +} + +const char *mapped_file_region::const_data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<const char*>(Mapping); +} + +int mapped_file_region::alignment() { + return Process::getPageSizeEstimate(); +} + +std::error_code detail::directory_iterator_construct(detail::DirIterState &it, + StringRef path, + bool follow_symlinks) { + SmallString<128> path_null(path); + DIR *directory = ::opendir(path_null.c_str()); + if (!directory) + return std::error_code(errno, std::generic_category()); + + it.IterationHandle = reinterpret_cast<intptr_t>(directory); + // Add something for replace_filename to replace. + path::append(path_null, "."); + it.CurrentEntry = directory_entry(path_null.str(), follow_symlinks); + return directory_iterator_increment(it); +} + +std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) { + if (it.IterationHandle) + ::closedir(reinterpret_cast<DIR *>(it.IterationHandle)); + it.IterationHandle = 0; + it.CurrentEntry = directory_entry(); + return std::error_code(); +} + +static file_type direntType(dirent* Entry) { + // Most platforms provide the file type in the dirent: Linux/BSD/Mac. + // The DTTOIF macro lets us reuse our status -> type conversion. + // Note that while glibc provides a macro to see if this is supported, + // _DIRENT_HAVE_D_TYPE, it's not defined on BSD/Mac, so we test for the + // d_type-to-mode_t conversion macro instead. +#if defined(DTTOIF) + return typeForMode(DTTOIF(Entry->d_type)); +#else + // Other platforms such as Solaris require a stat() to get the type. + return file_type::type_unknown; +#endif +} + +std::error_code detail::directory_iterator_increment(detail::DirIterState &It) { + errno = 0; + dirent *CurDir = ::readdir(reinterpret_cast<DIR *>(It.IterationHandle)); + if (CurDir == nullptr && errno != 0) { + return std::error_code(errno, std::generic_category()); + } else if (CurDir != nullptr) { + StringRef Name(CurDir->d_name); + if ((Name.size() == 1 && Name[0] == '.') || + (Name.size() == 2 && Name[0] == '.' && Name[1] == '.')) + return directory_iterator_increment(It); + It.CurrentEntry.replace_filename(Name, direntType(CurDir)); + } else + return directory_iterator_destruct(It); + + return std::error_code(); +} + +ErrorOr<basic_file_status> directory_entry::status() const { + file_status s; + if (auto EC = fs::status(Path, s, FollowSymlinks)) + return EC; + return s; +} + +#if !defined(F_GETPATH) +static bool hasProcSelfFD() { + // If we have a /proc filesystem mounted, we can quickly establish the + // real name of the file with readlink + static const bool Result = (::access("/proc/self/fd", R_OK) == 0); + return Result; +} +#endif + +static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags, + FileAccess Access) { + int Result = 0; + if (Access == FA_Read) + Result |= O_RDONLY; + else if (Access == FA_Write) + Result |= O_WRONLY; + else if (Access == (FA_Read | FA_Write)) + Result |= O_RDWR; + + // This is for compatibility with old code that assumed OF_Append implied + // would open an existing file. See Windows/Path.inc for a longer comment. + if (Flags & OF_Append) + Disp = CD_OpenAlways; + + if (Disp == CD_CreateNew) { + Result |= O_CREAT; // Create if it doesn't exist. + Result |= O_EXCL; // Fail if it does. + } else if (Disp == CD_CreateAlways) { + Result |= O_CREAT; // Create if it doesn't exist. + Result |= O_TRUNC; // Truncate if it does. + } else if (Disp == CD_OpenAlways) { + Result |= O_CREAT; // Create if it doesn't exist. + } else if (Disp == CD_OpenExisting) { + // Nothing special, just don't add O_CREAT and we get these semantics. + } + + if (Flags & OF_Append) + Result |= O_APPEND; + +#ifdef O_CLOEXEC + if (!(Flags & OF_ChildInherit)) + Result |= O_CLOEXEC; +#endif + + return Result; +} + +std::error_code openFile(const Twine &Name, int &ResultFD, + CreationDisposition Disp, FileAccess Access, + OpenFlags Flags, unsigned Mode) { + int OpenFlags = nativeOpenFlags(Disp, Flags, Access); + + SmallString<128> Storage; + StringRef P = Name.toNullTerminatedStringRef(Storage); + // Call ::open in a lambda to avoid overload resolution in RetryAfterSignal + // when open is overloaded, such as in Bionic. + auto Open = [&]() { return ::open(P.begin(), OpenFlags, Mode); }; + if ((ResultFD = sys::RetryAfterSignal(-1, Open)) < 0) + return std::error_code(errno, std::generic_category()); +#ifndef O_CLOEXEC + if (!(Flags & OF_ChildInherit)) { + int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC); + (void)r; + assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed"); + } +#endif + return std::error_code(); +} + +Expected<int> openNativeFile(const Twine &Name, CreationDisposition Disp, + FileAccess Access, OpenFlags Flags, + unsigned Mode) { + + int FD; + std::error_code EC = openFile(Name, FD, Disp, Access, Flags, Mode); + if (EC) + return errorCodeToError(EC); + return FD; +} + +std::error_code openFileForRead(const Twine &Name, int &ResultFD, + OpenFlags Flags, + SmallVectorImpl<char> *RealPath) { + std::error_code EC = + openFile(Name, ResultFD, CD_OpenExisting, FA_Read, Flags, 0666); + if (EC) + return EC; + + // Attempt to get the real name of the file, if the user asked + if(!RealPath) + return std::error_code(); + RealPath->clear(); +#if defined(F_GETPATH) + // When F_GETPATH is availble, it is the quickest way to get + // the real path name. + char Buffer[MAXPATHLEN]; + if (::fcntl(ResultFD, F_GETPATH, Buffer) != -1) + RealPath->append(Buffer, Buffer + strlen(Buffer)); +#else + char Buffer[PATH_MAX]; + if (hasProcSelfFD()) { + char ProcPath[64]; + snprintf(ProcPath, sizeof(ProcPath), "/proc/self/fd/%d", ResultFD); + ssize_t CharCount = ::readlink(ProcPath, Buffer, sizeof(Buffer)); + if (CharCount > 0) + RealPath->append(Buffer, Buffer + CharCount); + } else { + SmallString<128> Storage; + StringRef P = Name.toNullTerminatedStringRef(Storage); + + // Use ::realpath to get the real path name + if (::realpath(P.begin(), Buffer) != nullptr) + RealPath->append(Buffer, Buffer + strlen(Buffer)); + } +#endif + return std::error_code(); +} + +Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags, + SmallVectorImpl<char> *RealPath) { + file_t ResultFD; + std::error_code EC = openFileForRead(Name, ResultFD, Flags, RealPath); + if (EC) + return errorCodeToError(EC); + return ResultFD; +} + +file_t getStdinHandle() { return 0; } +file_t getStdoutHandle() { return 1; } +file_t getStderrHandle() { return 2; } + +Expected<size_t> readNativeFile(file_t FD, MutableArrayRef<char> Buf) { + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); + if (ssize_t(NumRead) == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + return NumRead; +} + +Expected<size_t> readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf, + uint64_t Offset) { +#ifdef HAVE_PREAD + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::pread, FD, Buf.data(), Buf.size(), Offset); +#else + if (lseek(FD, Offset, SEEK_SET) == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); +#endif + if (NumRead == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + return NumRead; +} + +std::error_code closeFile(file_t &F) { + file_t TmpF = F; + F = kInvalidFile; + return Process::SafelyCloseFileDescriptor(TmpF); +} + +template <typename T> +static std::error_code remove_directories_impl(const T &Entry, + bool IgnoreErrors) { + std::error_code EC; + directory_iterator Begin(Entry, EC, false); + directory_iterator End; + while (Begin != End) { + auto &Item = *Begin; + ErrorOr<basic_file_status> st = Item.status(); + if (!st && !IgnoreErrors) + return st.getError(); + + if (is_directory(*st)) { + EC = remove_directories_impl(Item, IgnoreErrors); + if (EC && !IgnoreErrors) + return EC; + } + + EC = fs::remove(Item.path(), true); + if (EC && !IgnoreErrors) + return EC; + + Begin.increment(EC); + if (EC && !IgnoreErrors) + return EC; + } + return std::error_code(); +} + +std::error_code remove_directories(const Twine &path, bool IgnoreErrors) { + auto EC = remove_directories_impl(path, IgnoreErrors); + if (EC && !IgnoreErrors) + return EC; + EC = fs::remove(path, true); + if (EC && !IgnoreErrors) + return EC; + return std::error_code(); +} + +std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest, + bool expand_tilde) { + dest.clear(); + if (path.isTriviallyEmpty()) + return std::error_code(); + + if (expand_tilde) { + SmallString<128> Storage; + path.toVector(Storage); + expandTildeExpr(Storage); + return real_path(Storage, dest, false); + } + + SmallString<128> Storage; + StringRef P = path.toNullTerminatedStringRef(Storage); + char Buffer[PATH_MAX]; + if (::realpath(P.begin(), Buffer) == nullptr) + return std::error_code(errno, std::generic_category()); + dest.append(Buffer, Buffer + strlen(Buffer)); + return std::error_code(); +} + +} // end namespace fs + +namespace path { + +bool home_directory(SmallVectorImpl<char> &result) { + char *RequestedDir = getenv("HOME"); + if (!RequestedDir) { + struct passwd *pw = getpwuid(getuid()); + if (pw && pw->pw_dir) + RequestedDir = pw->pw_dir; + } + if (!RequestedDir) + return false; + + result.clear(); + result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); + return true; +} + +static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) { + #if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR) + // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. + // macros defined in <unistd.h> on darwin >= 9 + int ConfName = TempDir ? _CS_DARWIN_USER_TEMP_DIR + : _CS_DARWIN_USER_CACHE_DIR; + size_t ConfLen = confstr(ConfName, nullptr, 0); + if (ConfLen > 0) { + do { + Result.resize(ConfLen); + ConfLen = confstr(ConfName, Result.data(), Result.size()); + } while (ConfLen > 0 && ConfLen != Result.size()); + + if (ConfLen > 0) { + assert(Result.back() == 0); + Result.pop_back(); + return true; + } + + Result.clear(); + } + #endif + return false; +} + +static const char *getEnvTempDir() { + // Check whether the temporary directory is specified by an environment + // variable. + const char *EnvironmentVariables[] = {"TMPDIR", "TMP", "TEMP", "TEMPDIR"}; + for (const char *Env : EnvironmentVariables) { + if (const char *Dir = std::getenv(Env)) + return Dir; + } + + return nullptr; +} + +static const char *getDefaultTempDir(bool ErasedOnReboot) { +#ifdef P_tmpdir + if ((bool)P_tmpdir) + return P_tmpdir; +#endif + + if (ErasedOnReboot) + return "/tmp"; + return "/var/tmp"; +} + +void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) { + Result.clear(); + + if (ErasedOnReboot) { + // There is no env variable for the cache directory. + if (const char *RequestedDir = getEnvTempDir()) { + Result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); + return; + } + } + + if (getDarwinConfDir(ErasedOnReboot, Result)) + return; + + const char *RequestedDir = getDefaultTempDir(ErasedOnReboot); + Result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); +} + +} // end namespace path + +namespace fs { + +#ifdef __APPLE__ +/// This implementation tries to perform an APFS CoW clone of the file, +/// which can be much faster and uses less space. +/// Unfortunately fcopyfile(3) does not support COPYFILE_CLONE, so the +/// file descriptor variant of this function still uses the default +/// implementation. +std::error_code copy_file(const Twine &From, const Twine &To) { + uint32_t Flag = COPYFILE_DATA; +#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE) + if (__builtin_available(macos 10.12, *)) { + bool IsSymlink; + if (std::error_code Error = is_symlink_file(From, IsSymlink)) + return Error; + // COPYFILE_CLONE clones the symlink instead of following it + // and returns EEXISTS if the target file already exists. + if (!IsSymlink && !exists(To)) + Flag = COPYFILE_CLONE; + } +#endif + int Status = + copyfile(From.str().c_str(), To.str().c_str(), /* State */ NULL, Flag); + + if (Status == 0) + return std::error_code(); + return std::error_code(errno, std::generic_category()); +} +#endif // __APPLE__ + +} // end namespace fs + +} // end namespace sys +} // end namespace llvm diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc new file mode 100644 index 0000000000000..dfe81d7e28337 --- /dev/null +++ b/llvm/lib/Support/Unix/Process.inc @@ -0,0 +1,459 @@ +//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the generic Unix implementation of the Process class. +// +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/config.h" +#include "llvm/Support/ManagedStatic.h" +#include <mutex> +#if HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_SYS_RESOURCE_H +#include <sys/resource.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#if HAVE_SIGNAL_H +#include <signal.h> +#endif +#if defined(HAVE_MALLINFO) +#include <malloc.h> +#endif +#if defined(HAVE_MALLCTL) +#include <malloc_np.h> +#endif +#ifdef HAVE_MALLOC_MALLOC_H +#include <malloc/malloc.h> +#endif +#ifdef HAVE_SYS_IOCTL_H +# include <sys/ioctl.h> +#endif +#ifdef HAVE_TERMIOS_H +# include <termios.h> +#endif + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +using namespace llvm; +using namespace sys; + +static std::pair<std::chrono::microseconds, std::chrono::microseconds> getRUsageTimes() { +#if defined(HAVE_GETRUSAGE) + struct rusage RU; + ::getrusage(RUSAGE_SELF, &RU); + return { toDuration(RU.ru_utime), toDuration(RU.ru_stime) }; +#else +#warning Cannot get usage times on this platform + return { std::chrono::microseconds::zero(), std::chrono::microseconds::zero() }; +#endif +} + +// On Cygwin, getpagesize() returns 64k(AllocationGranularity) and +// offset in mmap(3) should be aligned to the AllocationGranularity. +Expected<unsigned> Process::getPageSize() { +#if defined(HAVE_GETPAGESIZE) + static const int page_size = ::getpagesize(); +#elif defined(HAVE_SYSCONF) + static long page_size = ::sysconf(_SC_PAGE_SIZE); +#else +#error Cannot get the page size on this machine +#endif + if (page_size == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + return static_cast<unsigned>(page_size); +} + +size_t Process::GetMallocUsage() { +#if defined(HAVE_MALLINFO) + struct mallinfo mi; + mi = ::mallinfo(); + return mi.uordblks; +#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H) + malloc_statistics_t Stats; + malloc_zone_statistics(malloc_default_zone(), &Stats); + return Stats.size_in_use; // darwin +#elif defined(HAVE_MALLCTL) + size_t alloc, sz; + sz = sizeof(size_t); + if (mallctl("stats.allocated", &alloc, &sz, NULL, 0) == 0) + return alloc; + return 0; +#elif defined(HAVE_SBRK) + // Note this is only an approximation and more closely resembles + // the value returned by mallinfo in the arena field. + static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0)); + char *EndOfMemory = (char*)sbrk(0); + if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1)) + return EndOfMemory - StartOfMemory; + return 0; +#else +#warning Cannot get malloc info on this platform + return 0; +#endif +} + +void Process::GetTimeUsage(TimePoint<> &elapsed, std::chrono::nanoseconds &user_time, + std::chrono::nanoseconds &sys_time) { + elapsed = std::chrono::system_clock::now(); + std::tie(user_time, sys_time) = getRUsageTimes(); +} + +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) +#include <mach/mach.h> +#endif + +// Some LLVM programs such as bugpoint produce core files as a normal part of +// their operation. To prevent the disk from filling up, this function +// does what's necessary to prevent their generation. +void Process::PreventCoreFiles() { +#if HAVE_SETRLIMIT + struct rlimit rlim; + rlim.rlim_cur = rlim.rlim_max = 0; + setrlimit(RLIMIT_CORE, &rlim); +#endif + +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) + // Disable crash reporting on Mac OS X 10.0-10.4 + + // get information about the original set of exception ports for the task + mach_msg_type_number_t Count = 0; + exception_mask_t OriginalMasks[EXC_TYPES_COUNT]; + exception_port_t OriginalPorts[EXC_TYPES_COUNT]; + exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT]; + thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT]; + kern_return_t err = + task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks, + &Count, OriginalPorts, OriginalBehaviors, + OriginalFlavors); + if (err == KERN_SUCCESS) { + // replace each with MACH_PORT_NULL. + for (unsigned i = 0; i != Count; ++i) + task_set_exception_ports(mach_task_self(), OriginalMasks[i], + MACH_PORT_NULL, OriginalBehaviors[i], + OriginalFlavors[i]); + } + + // Disable crash reporting on Mac OS X 10.5 + signal(SIGABRT, _exit); + signal(SIGILL, _exit); + signal(SIGFPE, _exit); + signal(SIGSEGV, _exit); + signal(SIGBUS, _exit); +#endif + + coreFilesPrevented = true; +} + +Optional<std::string> Process::GetEnv(StringRef Name) { + std::string NameStr = Name.str(); + const char *Val = ::getenv(NameStr.c_str()); + if (!Val) + return None; + return std::string(Val); +} + +namespace { +class FDCloser { +public: + FDCloser(int &FD) : FD(FD), KeepOpen(false) {} + void keepOpen() { KeepOpen = true; } + ~FDCloser() { + if (!KeepOpen && FD >= 0) + ::close(FD); + } + +private: + FDCloser(const FDCloser &) = delete; + void operator=(const FDCloser &) = delete; + + int &FD; + bool KeepOpen; +}; +} + +std::error_code Process::FixupStandardFileDescriptors() { + int NullFD = -1; + FDCloser FDC(NullFD); + const int StandardFDs[] = {STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO}; + for (int StandardFD : StandardFDs) { + struct stat st; + errno = 0; + if (RetryAfterSignal(-1, ::fstat, StandardFD, &st) < 0) { + assert(errno && "expected errno to be set if fstat failed!"); + // fstat should return EBADF if the file descriptor is closed. + if (errno != EBADF) + return std::error_code(errno, std::generic_category()); + } + // if fstat succeeds, move on to the next FD. + if (!errno) + continue; + assert(errno == EBADF && "expected errno to have EBADF at this point!"); + + if (NullFD < 0) { + // Call ::open in a lambda to avoid overload resolution in + // RetryAfterSignal when open is overloaded, such as in Bionic. + auto Open = [&]() { return ::open("/dev/null", O_RDWR); }; + if ((NullFD = RetryAfterSignal(-1, Open)) < 0) + return std::error_code(errno, std::generic_category()); + } + + if (NullFD == StandardFD) + FDC.keepOpen(); + else if (dup2(NullFD, StandardFD) < 0) + return std::error_code(errno, std::generic_category()); + } + return std::error_code(); +} + +std::error_code Process::SafelyCloseFileDescriptor(int FD) { + // Create a signal set filled with *all* signals. + sigset_t FullSet; + if (sigfillset(&FullSet) < 0) + return std::error_code(errno, std::generic_category()); + // Atomically swap our current signal mask with a full mask. + sigset_t SavedSet; +#if LLVM_ENABLE_THREADS + if (int EC = pthread_sigmask(SIG_SETMASK, &FullSet, &SavedSet)) + return std::error_code(EC, std::generic_category()); +#else + if (sigprocmask(SIG_SETMASK, &FullSet, &SavedSet) < 0) + return std::error_code(errno, std::generic_category()); +#endif + // Attempt to close the file descriptor. + // We need to save the error, if one occurs, because our subsequent call to + // pthread_sigmask might tamper with errno. + int ErrnoFromClose = 0; + if (::close(FD) < 0) + ErrnoFromClose = errno; + // Restore the signal mask back to what we saved earlier. + int EC = 0; +#if LLVM_ENABLE_THREADS + EC = pthread_sigmask(SIG_SETMASK, &SavedSet, nullptr); +#else + if (sigprocmask(SIG_SETMASK, &SavedSet, nullptr) < 0) + EC = errno; +#endif + // The error code from close takes precedence over the one from + // pthread_sigmask. + if (ErrnoFromClose) + return std::error_code(ErrnoFromClose, std::generic_category()); + return std::error_code(EC, std::generic_category()); +} + +bool Process::StandardInIsUserInput() { + return FileDescriptorIsDisplayed(STDIN_FILENO); +} + +bool Process::StandardOutIsDisplayed() { + return FileDescriptorIsDisplayed(STDOUT_FILENO); +} + +bool Process::StandardErrIsDisplayed() { + return FileDescriptorIsDisplayed(STDERR_FILENO); +} + +bool Process::FileDescriptorIsDisplayed(int fd) { +#if HAVE_ISATTY + return isatty(fd); +#else + // If we don't have isatty, just return false. + return false; +#endif +} + +static unsigned getColumns(int FileID) { + // If COLUMNS is defined in the environment, wrap to that many columns. + if (const char *ColumnsStr = std::getenv("COLUMNS")) { + int Columns = std::atoi(ColumnsStr); + if (Columns > 0) + return Columns; + } + + unsigned Columns = 0; + +#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) \ + && !(defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE)) + // Try to determine the width of the terminal. + struct winsize ws; + if (ioctl(FileID, TIOCGWINSZ, &ws) == 0) + Columns = ws.ws_col; +#endif + + return Columns; +} + +unsigned Process::StandardOutColumns() { + if (!StandardOutIsDisplayed()) + return 0; + + return getColumns(1); +} + +unsigned Process::StandardErrColumns() { + if (!StandardErrIsDisplayed()) + return 0; + + return getColumns(2); +} + +#ifdef HAVE_TERMINFO +// We manually declare these extern functions because finding the correct +// headers from various terminfo, curses, or other sources is harder than +// writing their specs down. +extern "C" int setupterm(char *term, int filedes, int *errret); +extern "C" struct term *set_curterm(struct term *termp); +extern "C" int del_curterm(struct term *termp); +extern "C" int tigetnum(char *capname); +#endif + +#ifdef HAVE_TERMINFO +static ManagedStatic<std::mutex> TermColorMutex; +#endif + +static bool terminalHasColors(int fd) { +#ifdef HAVE_TERMINFO + // First, acquire a global lock because these C routines are thread hostile. + std::lock_guard<std::mutex> G(*TermColorMutex); + + int errret = 0; + if (setupterm(nullptr, fd, &errret) != 0) + // Regardless of why, if we can't get terminfo, we shouldn't try to print + // colors. + return false; + + // Test whether the terminal as set up supports color output. How to do this + // isn't entirely obvious. We can use the curses routine 'has_colors' but it + // would be nice to avoid a dependency on curses proper when we can make do + // with a minimal terminfo parsing library. Also, we don't really care whether + // the terminal supports the curses-specific color changing routines, merely + // if it will interpret ANSI color escape codes in a reasonable way. Thus, the + // strategy here is just to query the baseline colors capability and if it + // supports colors at all to assume it will translate the escape codes into + // whatever range of colors it does support. We can add more detailed tests + // here if users report them as necessary. + // + // The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if + // the terminfo says that no colors are supported. + bool HasColors = tigetnum(const_cast<char *>("colors")) > 0; + + // Now extract the structure allocated by setupterm and free its memory + // through a really silly dance. + struct term *termp = set_curterm(nullptr); + (void)del_curterm(termp); // Drop any errors here. + + // Return true if we found a color capabilities for the current terminal. + if (HasColors) + return true; +#else + // When the terminfo database is not available, check if the current terminal + // is one of terminals that are known to support ANSI color escape codes. + if (const char *TermStr = std::getenv("TERM")) { + return StringSwitch<bool>(TermStr) + .Case("ansi", true) + .Case("cygwin", true) + .Case("linux", true) + .StartsWith("screen", true) + .StartsWith("xterm", true) + .StartsWith("vt100", true) + .StartsWith("rxvt", true) + .EndsWith("color", true) + .Default(false); + } +#endif + + // Otherwise, be conservative. + return false; +} + +bool Process::FileDescriptorHasColors(int fd) { + // A file descriptor has colors if it is displayed and the terminal has + // colors. + return FileDescriptorIsDisplayed(fd) && terminalHasColors(fd); +} + +bool Process::StandardOutHasColors() { + return FileDescriptorHasColors(STDOUT_FILENO); +} + +bool Process::StandardErrHasColors() { + return FileDescriptorHasColors(STDERR_FILENO); +} + +void Process::UseANSIEscapeCodes(bool /*enable*/) { + // No effect. +} + +bool Process::ColorNeedsFlush() { + // No, we use ANSI escape sequences. + return false; +} + +const char *Process::OutputColor(char code, bool bold, bool bg) { + return colorcodes[bg?1:0][bold?1:0][code&7]; +} + +const char *Process::OutputBold(bool bg) { + return "\033[1m"; +} + +const char *Process::OutputReverse() { + return "\033[7m"; +} + +const char *Process::ResetColor() { + return "\033[0m"; +} + +#if !HAVE_DECL_ARC4RANDOM +static unsigned GetRandomNumberSeed() { + // Attempt to get the initial seed from /dev/urandom, if possible. + int urandomFD = open("/dev/urandom", O_RDONLY); + + if (urandomFD != -1) { + unsigned seed; + // Don't use a buffered read to avoid reading more data + // from /dev/urandom than we need. + int count = read(urandomFD, (void *)&seed, sizeof(seed)); + + close(urandomFD); + + // Return the seed if the read was successful. + if (count == sizeof(seed)) + return seed; + } + + // Otherwise, swizzle the current time and the process ID to form a reasonable + // seed. + const auto Now = std::chrono::high_resolution_clock::now(); + return hash_combine(Now.time_since_epoch().count(), ::getpid()); +} +#endif + +unsigned llvm::sys::Process::GetRandomNumber() { +#if HAVE_DECL_ARC4RANDOM + return arc4random(); +#else + static int x = (static_cast<void>(::srand(GetRandomNumberSeed())), 0); + (void)x; + return ::rand(); +#endif +} diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc new file mode 100644 index 0000000000000..520685a0e9878 --- /dev/null +++ b/llvm/lib/Support/Unix/Program.inc @@ -0,0 +1,500 @@ +//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific portion of the Program class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#if HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#if HAVE_SYS_RESOURCE_H +#include <sys/resource.h> +#endif +#if HAVE_SIGNAL_H +#include <signal.h> +#endif +#if HAVE_FCNTL_H +#include <fcntl.h> +#endif +#if HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_POSIX_SPAWN +#include <spawn.h> + +#if defined(__APPLE__) +#include <TargetConditionals.h> +#endif + +#if defined(__APPLE__) && !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) +#define USE_NSGETENVIRON 1 +#else +#define USE_NSGETENVIRON 0 +#endif + +#if !USE_NSGETENVIRON + extern char **environ; +#else +#include <crt_externs.h> // _NSGetEnviron +#endif +#endif + +namespace llvm { + +using namespace sys; + +ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {} + +ErrorOr<std::string> sys::findProgramByName(StringRef Name, + ArrayRef<StringRef> Paths) { + assert(!Name.empty() && "Must have a name!"); + // Use the given path verbatim if it contains any slashes; this matches + // the behavior of sh(1) and friends. + if (Name.find('/') != StringRef::npos) + return std::string(Name); + + SmallVector<StringRef, 16> EnvironmentPaths; + if (Paths.empty()) + if (const char *PathEnv = std::getenv("PATH")) { + SplitString(PathEnv, EnvironmentPaths, ":"); + Paths = EnvironmentPaths; + } + + for (auto Path : Paths) { + if (Path.empty()) + continue; + + // Check to see if this first directory contains the executable... + SmallString<128> FilePath(Path); + sys::path::append(FilePath, Name); + if (sys::fs::can_execute(FilePath.c_str())) + return std::string(FilePath.str()); // Found the executable! + } + return errc::no_such_file_or_directory; +} + +static bool RedirectIO(Optional<StringRef> Path, int FD, std::string* ErrMsg) { + if (!Path) // Noop + return false; + std::string File; + if (Path->empty()) + // Redirect empty paths to /dev/null + File = "/dev/null"; + else + File = *Path; + + // Open the file + int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); + if (InFD == -1) { + MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for " + + (FD == 0 ? "input" : "output")); + return true; + } + + // Install it as the requested FD + if (dup2(InFD, FD) == -1) { + MakeErrMsg(ErrMsg, "Cannot dup2"); + close(InFD); + return true; + } + close(InFD); // Close the original FD + return false; +} + +#ifdef HAVE_POSIX_SPAWN +static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg, + posix_spawn_file_actions_t *FileActions) { + if (!Path) // Noop + return false; + const char *File; + if (Path->empty()) + // Redirect empty paths to /dev/null + File = "/dev/null"; + else + File = Path->c_str(); + + if (int Err = posix_spawn_file_actions_addopen( + FileActions, FD, File, + FD == 0 ? O_RDONLY : O_WRONLY | O_CREAT, 0666)) + return MakeErrMsg(ErrMsg, "Cannot posix_spawn_file_actions_addopen", Err); + return false; +} +#endif + +static void TimeOutHandler(int Sig) { +} + +static void SetMemoryLimits(unsigned size) { +#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT + struct rlimit r; + __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576; + + // Heap size + getrlimit (RLIMIT_DATA, &r); + r.rlim_cur = limit; + setrlimit (RLIMIT_DATA, &r); +#ifdef RLIMIT_RSS + // Resident set size. + getrlimit (RLIMIT_RSS, &r); + r.rlim_cur = limit; + setrlimit (RLIMIT_RSS, &r); +#endif +#endif +} + +} + +static std::vector<const char *> +toNullTerminatedCStringArray(ArrayRef<StringRef> Strings, StringSaver &Saver) { + std::vector<const char *> Result; + for (StringRef S : Strings) + Result.push_back(Saver.save(S).data()); + Result.push_back(nullptr); + return Result; +} + +static bool Execute(ProcessInfo &PI, StringRef Program, + ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env, + ArrayRef<Optional<StringRef>> Redirects, + unsigned MemoryLimit, std::string *ErrMsg) { + if (!llvm::sys::fs::exists(Program)) { + if (ErrMsg) + *ErrMsg = std::string("Executable \"") + Program.str() + + std::string("\" doesn't exist!"); + return false; + } + + BumpPtrAllocator Allocator; + StringSaver Saver(Allocator); + std::vector<const char *> ArgVector, EnvVector; + const char **Argv = nullptr; + const char **Envp = nullptr; + ArgVector = toNullTerminatedCStringArray(Args, Saver); + Argv = ArgVector.data(); + if (Env) { + EnvVector = toNullTerminatedCStringArray(*Env, Saver); + Envp = EnvVector.data(); + } + + // If this OS has posix_spawn and there is no memory limit being implied, use + // posix_spawn. It is more efficient than fork/exec. +#ifdef HAVE_POSIX_SPAWN + if (MemoryLimit == 0) { + posix_spawn_file_actions_t FileActionsStore; + posix_spawn_file_actions_t *FileActions = nullptr; + + // If we call posix_spawn_file_actions_addopen we have to make sure the + // c strings we pass to it stay alive until the call to posix_spawn, + // so we copy any StringRefs into this variable. + std::string RedirectsStorage[3]; + + if (!Redirects.empty()) { + assert(Redirects.size() == 3); + std::string *RedirectsStr[3] = {nullptr, nullptr, nullptr}; + for (int I = 0; I < 3; ++I) { + if (Redirects[I]) { + RedirectsStorage[I] = *Redirects[I]; + RedirectsStr[I] = &RedirectsStorage[I]; + } + } + + FileActions = &FileActionsStore; + posix_spawn_file_actions_init(FileActions); + + // Redirect stdin/stdout. + if (RedirectIO_PS(RedirectsStr[0], 0, ErrMsg, FileActions) || + RedirectIO_PS(RedirectsStr[1], 1, ErrMsg, FileActions)) + return false; + if (!Redirects[1] || !Redirects[2] || *Redirects[1] != *Redirects[2]) { + // Just redirect stderr + if (RedirectIO_PS(RedirectsStr[2], 2, ErrMsg, FileActions)) + return false; + } else { + // If stdout and stderr should go to the same place, redirect stderr + // to the FD already open for stdout. + if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2)) + return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err); + } + } + + if (!Envp) +#if !USE_NSGETENVIRON + Envp = const_cast<const char **>(environ); +#else + // environ is missing in dylibs. + Envp = const_cast<const char **>(*_NSGetEnviron()); +#endif + + constexpr int maxRetries = 8; + int retries = 0; + pid_t PID; + int Err; + do { + PID = 0; // Make Valgrind happy. + Err = posix_spawn(&PID, Program.str().c_str(), FileActions, + /*attrp*/ nullptr, const_cast<char **>(Argv), + const_cast<char **>(Envp)); + } while (Err == EINTR && ++retries < maxRetries); + + if (FileActions) + posix_spawn_file_actions_destroy(FileActions); + + if (Err) + return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err); + + PI.Pid = PID; + PI.Process = PID; + + return true; + } +#endif + + // Create a child process. + int child = fork(); + switch (child) { + // An error occurred: Return to the caller. + case -1: + MakeErrMsg(ErrMsg, "Couldn't fork"); + return false; + + // Child process: Execute the program. + case 0: { + // Redirect file descriptors... + if (!Redirects.empty()) { + // Redirect stdin + if (RedirectIO(Redirects[0], 0, ErrMsg)) { return false; } + // Redirect stdout + if (RedirectIO(Redirects[1], 1, ErrMsg)) { return false; } + if (Redirects[1] && Redirects[2] && *Redirects[1] == *Redirects[2]) { + // If stdout and stderr should go to the same place, redirect stderr + // to the FD already open for stdout. + if (-1 == dup2(1,2)) { + MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout"); + return false; + } + } else { + // Just redirect stderr + if (RedirectIO(Redirects[2], 2, ErrMsg)) { return false; } + } + } + + // Set memory limits + if (MemoryLimit!=0) { + SetMemoryLimits(MemoryLimit); + } + + // Execute! + std::string PathStr = Program; + if (Envp != nullptr) + execve(PathStr.c_str(), const_cast<char **>(Argv), + const_cast<char **>(Envp)); + else + execv(PathStr.c_str(), const_cast<char **>(Argv)); + // If the execve() failed, we should exit. Follow Unix protocol and + // return 127 if the executable was not found, and 126 otherwise. + // Use _exit rather than exit so that atexit functions and static + // object destructors cloned from the parent process aren't + // redundantly run, and so that any data buffered in stdio buffers + // cloned from the parent aren't redundantly written out. + _exit(errno == ENOENT ? 127 : 126); + } + + // Parent process: Break out of the switch to do our processing. + default: + break; + } + + PI.Pid = child; + PI.Process = child; + + return true; +} + +namespace llvm { + +ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, + bool WaitUntilTerminates, std::string *ErrMsg) { + struct sigaction Act, Old; + assert(PI.Pid && "invalid pid to wait on, process not started?"); + + int WaitPidOptions = 0; + pid_t ChildPid = PI.Pid; + if (WaitUntilTerminates) { + SecondsToWait = 0; + } else if (SecondsToWait) { + // Install a timeout handler. The handler itself does nothing, but the + // simple fact of having a handler at all causes the wait below to return + // with EINTR, unlike if we used SIG_IGN. + memset(&Act, 0, sizeof(Act)); + Act.sa_handler = TimeOutHandler; + sigemptyset(&Act.sa_mask); + sigaction(SIGALRM, &Act, &Old); + alarm(SecondsToWait); + } else if (SecondsToWait == 0) + WaitPidOptions = WNOHANG; + + // Parent process: Wait for the child process to terminate. + int status; + ProcessInfo WaitResult; + + do { + WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions); + } while (WaitUntilTerminates && WaitResult.Pid == -1 && errno == EINTR); + + if (WaitResult.Pid != PI.Pid) { + if (WaitResult.Pid == 0) { + // Non-blocking wait. + return WaitResult; + } else { + if (SecondsToWait && errno == EINTR) { + // Kill the child. + kill(PI.Pid, SIGKILL); + + // Turn off the alarm and restore the signal handler + alarm(0); + sigaction(SIGALRM, &Old, nullptr); + + // Wait for child to die + if (wait(&status) != ChildPid) + MakeErrMsg(ErrMsg, "Child timed out but wouldn't die"); + else + MakeErrMsg(ErrMsg, "Child timed out", 0); + + WaitResult.ReturnCode = -2; // Timeout detected + return WaitResult; + } else if (errno != EINTR) { + MakeErrMsg(ErrMsg, "Error waiting for child process"); + WaitResult.ReturnCode = -1; + return WaitResult; + } + } + } + + // We exited normally without timeout, so turn off the timer. + if (SecondsToWait && !WaitUntilTerminates) { + alarm(0); + sigaction(SIGALRM, &Old, nullptr); + } + + // Return the proper exit status. Detect error conditions + // so we can return -1 for them and set ErrMsg informatively. + int result = 0; + if (WIFEXITED(status)) { + result = WEXITSTATUS(status); + WaitResult.ReturnCode = result; + + if (result == 127) { + if (ErrMsg) + *ErrMsg = llvm::sys::StrError(ENOENT); + WaitResult.ReturnCode = -1; + return WaitResult; + } + if (result == 126) { + if (ErrMsg) + *ErrMsg = "Program could not be executed"; + WaitResult.ReturnCode = -1; + return WaitResult; + } + } else if (WIFSIGNALED(status)) { + if (ErrMsg) { + *ErrMsg = strsignal(WTERMSIG(status)); +#ifdef WCOREDUMP + if (WCOREDUMP(status)) + *ErrMsg += " (core dumped)"; +#endif + } + // Return a special value to indicate that the process received an unhandled + // signal during execution as opposed to failing to execute. + WaitResult.ReturnCode = -2; + } + return WaitResult; +} + +std::error_code sys::ChangeStdinToBinary() { + // Do nothing, as Unix doesn't differentiate between text and binary. + return std::error_code(); +} + +std::error_code sys::ChangeStdoutToBinary() { + // Do nothing, as Unix doesn't differentiate between text and binary. + return std::error_code(); +} + +std::error_code +llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, + WindowsEncodingMethod Encoding /*unused*/) { + std::error_code EC; + llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::OF_Text); + + if (EC) + return EC; + + OS << Contents; + + if (OS.has_error()) + return make_error_code(errc::io_error); + + return EC; +} + +bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef<StringRef> Args) { + static long ArgMax = sysconf(_SC_ARG_MAX); + // POSIX requires that _POSIX_ARG_MAX is 4096, which is the lowest possible + // value for ARG_MAX on a POSIX compliant system. + static long ArgMin = _POSIX_ARG_MAX; + + // This the same baseline used by xargs. + long EffectiveArgMax = 128 * 1024; + + if (EffectiveArgMax > ArgMax) + EffectiveArgMax = ArgMax; + else if (EffectiveArgMax < ArgMin) + EffectiveArgMax = ArgMin; + + // System says no practical limit. + if (ArgMax == -1) + return true; + + // Conservatively account for space required by environment variables. + long HalfArgMax = EffectiveArgMax / 2; + + size_t ArgLength = Program.size() + 1; + for (StringRef Arg : Args) { + // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which + // does not have a constant unlike what the man pages would have you + // believe. Since this limit is pretty high, perform the check + // unconditionally rather than trying to be aggressive and limiting it to + // Linux only. + if (Arg.size() >= (32 * 4096)) + return false; + + ArgLength += Arg.size() + 1; + if (ArgLength > size_t(HalfArgMax)) { + return false; + } + } + + return true; +} +} diff --git a/llvm/lib/Support/Unix/README.txt b/llvm/lib/Support/Unix/README.txt new file mode 100644 index 0000000000000..3d547c2990d59 --- /dev/null +++ b/llvm/lib/Support/Unix/README.txt @@ -0,0 +1,16 @@ +llvm/lib/Support/Unix README +=========================== + +This directory provides implementations of the lib/System classes that +are common to two or more variants of UNIX. For example, the directory +structure underneath this directory could look like this: + +Unix - only code that is truly generic to all UNIX platforms + Posix - code that is specific to Posix variants of UNIX + SUS - code that is specific to the Single Unix Specification + SysV - code that is specific to System V variants of UNIX + +As a rule, only those directories actually needing to be created should be +created. Also, further subdirectories could be created to reflect versions of +the various standards. For example, under SUS there could be v1, v2, and v3 +subdirectories to reflect the three major versions of SUS. diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc new file mode 100644 index 0000000000000..5e0cde4a81ed3 --- /dev/null +++ b/llvm/lib/Support/Unix/Signals.inc @@ -0,0 +1,632 @@ +//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some helpful functions for dealing with the possibility of +// Unix signals occurring while your program is running. +// +//===----------------------------------------------------------------------===// +// +// This file is extremely careful to only do signal-safe things while in a +// signal handler. In particular, memory allocation and acquiring a mutex +// while in a signal handler should never occur. ManagedStatic isn't usable from +// a signal handler for 2 reasons: +// +// 1. Creating a new one allocates. +// 2. The signal handler could fire while llvm_shutdown is being processed, in +// which case the ManagedStatic is in an unknown state because it could +// already have been destroyed, or be in the process of being destroyed. +// +// Modifying the behavior of the signal handlers (such as registering new ones) +// can acquire a mutex, but all this guarantees is that the signal handler +// behavior is only modified by one thread at a time. A signal handler can still +// fire while this occurs! +// +// Adding work to a signal handler requires lock-freedom (and assume atomics are +// always lock-free) because the signal handler could fire while new work is +// being added. +// +//===----------------------------------------------------------------------===// + +#include "Unix.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <string> +#include <sysexits.h> +#ifdef HAVE_BACKTRACE +# include BACKTRACE_HEADER // For backtrace(). +#endif +#if HAVE_SIGNAL_H +#include <signal.h> +#endif +#if HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif +#if HAVE_MACH_MACH_H +#include <mach/mach.h> +#endif +#if HAVE_LINK_H +#include <link.h> +#endif +#ifdef HAVE__UNWIND_BACKTRACE +// FIXME: We should be able to use <unwind.h> for any target that has an +// _Unwind_Backtrace function, but on FreeBSD the configure test passes +// despite the function not existing, and on Android, <unwind.h> conflicts +// with <link.h>. +#ifdef __GLIBC__ +#include <unwind.h> +#else +#undef HAVE__UNWIND_BACKTRACE +#endif +#endif + +using namespace llvm; + +static RETSIGTYPE SignalHandler(int Sig); // defined below. +static RETSIGTYPE InfoSignalHandler(int Sig); // defined below. + +static void DefaultPipeSignalFunction() { + exit(EX_IOERR); +} + +using SignalHandlerFunctionType = void (*)(); +/// The function to call if ctrl-c is pressed. +static std::atomic<SignalHandlerFunctionType> InterruptFunction = + ATOMIC_VAR_INIT(nullptr); +static std::atomic<SignalHandlerFunctionType> InfoSignalFunction = + ATOMIC_VAR_INIT(nullptr); +static std::atomic<SignalHandlerFunctionType> PipeSignalFunction = + ATOMIC_VAR_INIT(DefaultPipeSignalFunction); + +namespace { +/// Signal-safe removal of files. +/// Inserting and erasing from the list isn't signal-safe, but removal of files +/// themselves is signal-safe. Memory is freed when the head is freed, deletion +/// is therefore not signal-safe either. +class FileToRemoveList { + std::atomic<char *> Filename = ATOMIC_VAR_INIT(nullptr); + std::atomic<FileToRemoveList *> Next = ATOMIC_VAR_INIT(nullptr); + + FileToRemoveList() = default; + // Not signal-safe. + FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {} + +public: + // Not signal-safe. + ~FileToRemoveList() { + if (FileToRemoveList *N = Next.exchange(nullptr)) + delete N; + if (char *F = Filename.exchange(nullptr)) + free(F); + } + + // Not signal-safe. + static void insert(std::atomic<FileToRemoveList *> &Head, + const std::string &Filename) { + // Insert the new file at the end of the list. + FileToRemoveList *NewHead = new FileToRemoveList(Filename); + std::atomic<FileToRemoveList *> *InsertionPoint = &Head; + FileToRemoveList *OldHead = nullptr; + while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) { + InsertionPoint = &OldHead->Next; + OldHead = nullptr; + } + } + + // Not signal-safe. + static void erase(std::atomic<FileToRemoveList *> &Head, + const std::string &Filename) { + // Use a lock to avoid concurrent erase: the comparison would access + // free'd memory. + static ManagedStatic<sys::SmartMutex<true>> Lock; + sys::SmartScopedLock<true> Writer(*Lock); + + for (FileToRemoveList *Current = Head.load(); Current; + Current = Current->Next.load()) { + if (char *OldFilename = Current->Filename.load()) { + if (OldFilename != Filename) + continue; + // Leave an empty filename. + OldFilename = Current->Filename.exchange(nullptr); + // The filename might have become null between the time we + // compared it and we exchanged it. + if (OldFilename) + free(OldFilename); + } + } + } + + // Signal-safe. + static void removeAllFiles(std::atomic<FileToRemoveList *> &Head) { + // If cleanup were to occur while we're removing files we'd have a bad time. + // Make sure we're OK by preventing cleanup from doing anything while we're + // removing files. If cleanup races with us and we win we'll have a leak, + // but we won't crash. + FileToRemoveList *OldHead = Head.exchange(nullptr); + + for (FileToRemoveList *currentFile = OldHead; currentFile; + currentFile = currentFile->Next.load()) { + // If erasing was occuring while we're trying to remove files we'd look + // at free'd data. Take away the path and put it back when done. + if (char *path = currentFile->Filename.exchange(nullptr)) { + // Get the status so we can determine if it's a file or directory. If we + // can't stat the file, ignore it. + struct stat buf; + if (stat(path, &buf) != 0) + continue; + + // If this is not a regular file, ignore it. We want to prevent removal + // of special files like /dev/null, even if the compiler is being run + // with the super-user permissions. + if (!S_ISREG(buf.st_mode)) + continue; + + // Otherwise, remove the file. We ignore any errors here as there is + // nothing else we can do. + unlink(path); + + // We're done removing the file, erasing can safely proceed. + currentFile->Filename.exchange(path); + } + } + + // We're done removing files, cleanup can safely proceed. + Head.exchange(OldHead); + } +}; +static std::atomic<FileToRemoveList *> FilesToRemove = ATOMIC_VAR_INIT(nullptr); + +/// Clean up the list in a signal-friendly manner. +/// Recall that signals can fire during llvm_shutdown. If this occurs we should +/// either clean something up or nothing at all, but we shouldn't crash! +struct FilesToRemoveCleanup { + // Not signal-safe. + ~FilesToRemoveCleanup() { + FileToRemoveList *Head = FilesToRemove.exchange(nullptr); + if (Head) + delete Head; + } +}; +} // namespace + +static StringRef Argv0; + +/// Signals that represent requested termination. There's no bug or failure, or +/// if there is, it's not our direct responsibility. For whatever reason, our +/// continued execution is no longer desirable. +static const int IntSigs[] = { + SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR2 +}; + +/// Signals that represent that we have a bug, and our prompt termination has +/// been ordered. +static const int KillSigs[] = { + SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT +#ifdef SIGSYS + , SIGSYS +#endif +#ifdef SIGXCPU + , SIGXCPU +#endif +#ifdef SIGXFSZ + , SIGXFSZ +#endif +#ifdef SIGEMT + , SIGEMT +#endif +}; + +/// Signals that represent requests for status. +static const int InfoSigs[] = { + SIGUSR1 +#ifdef SIGINFO + , SIGINFO +#endif +}; + +static const size_t NumSigs = + array_lengthof(IntSigs) + array_lengthof(KillSigs) + + array_lengthof(InfoSigs); + + +static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0); +static struct { + struct sigaction SA; + int SigNo; +} RegisteredSignalInfo[NumSigs]; + +#if defined(HAVE_SIGALTSTACK) +// Hold onto both the old and new alternate signal stack so that it's not +// reported as a leak. We don't make any attempt to remove our alt signal +// stack if we remove our signal handlers; that can't be done reliably if +// someone else is also trying to do the same thing. +static stack_t OldAltStack; +static void* NewAltStackPointer; + +static void CreateSigAltStack() { + const size_t AltStackSize = MINSIGSTKSZ + 64 * 1024; + + // If we're executing on the alternate stack, or we already have an alternate + // signal stack that we're happy with, there's nothing for us to do. Don't + // reduce the size, some other part of the process might need a larger stack + // than we do. + if (sigaltstack(nullptr, &OldAltStack) != 0 || + OldAltStack.ss_flags & SS_ONSTACK || + (OldAltStack.ss_sp && OldAltStack.ss_size >= AltStackSize)) + return; + + stack_t AltStack = {}; + AltStack.ss_sp = static_cast<char *>(safe_malloc(AltStackSize)); + NewAltStackPointer = AltStack.ss_sp; // Save to avoid reporting a leak. + AltStack.ss_size = AltStackSize; + if (sigaltstack(&AltStack, &OldAltStack) != 0) + free(AltStack.ss_sp); +} +#else +static void CreateSigAltStack() {} +#endif + +static void RegisterHandlers() { // Not signal-safe. + // The mutex prevents other threads from registering handlers while we're + // doing it. We also have to protect the handlers and their count because + // a signal handler could fire while we're registeting handlers. + static ManagedStatic<sys::SmartMutex<true>> SignalHandlerRegistrationMutex; + sys::SmartScopedLock<true> Guard(*SignalHandlerRegistrationMutex); + + // If the handlers are already registered, we're done. + if (NumRegisteredSignals.load() != 0) + return; + + // Create an alternate stack for signal handling. This is necessary for us to + // be able to reliably handle signals due to stack overflow. + CreateSigAltStack(); + + enum class SignalKind { IsKill, IsInfo }; + auto registerHandler = [&](int Signal, SignalKind Kind) { + unsigned Index = NumRegisteredSignals.load(); + assert(Index < array_lengthof(RegisteredSignalInfo) && + "Out of space for signal handlers!"); + + struct sigaction NewHandler; + + switch (Kind) { + case SignalKind::IsKill: + NewHandler.sa_handler = SignalHandler; + NewHandler.sa_flags = SA_NODEFER | SA_RESETHAND | SA_ONSTACK; + break; + case SignalKind::IsInfo: + NewHandler.sa_handler = InfoSignalHandler; + NewHandler.sa_flags = SA_ONSTACK; + break; + } + sigemptyset(&NewHandler.sa_mask); + + // Install the new handler, save the old one in RegisteredSignalInfo. + sigaction(Signal, &NewHandler, &RegisteredSignalInfo[Index].SA); + RegisteredSignalInfo[Index].SigNo = Signal; + ++NumRegisteredSignals; + }; + + for (auto S : IntSigs) + registerHandler(S, SignalKind::IsKill); + for (auto S : KillSigs) + registerHandler(S, SignalKind::IsKill); + for (auto S : InfoSigs) + registerHandler(S, SignalKind::IsInfo); +} + +static void UnregisterHandlers() { + // Restore all of the signal handlers to how they were before we showed up. + for (unsigned i = 0, e = NumRegisteredSignals.load(); i != e; ++i) { + sigaction(RegisteredSignalInfo[i].SigNo, + &RegisteredSignalInfo[i].SA, nullptr); + --NumRegisteredSignals; + } +} + +/// Process the FilesToRemove list. +static void RemoveFilesToRemove() { + FileToRemoveList::removeAllFiles(FilesToRemove); +} + +// The signal handler that runs. +static RETSIGTYPE SignalHandler(int Sig) { + // Restore the signal behavior to default, so that the program actually + // crashes when we return and the signal reissues. This also ensures that if + // we crash in our signal handler that the program will terminate immediately + // instead of recursing in the signal handler. + UnregisterHandlers(); + + // Unmask all potentially blocked kill signals. + sigset_t SigMask; + sigfillset(&SigMask); + sigprocmask(SIG_UNBLOCK, &SigMask, nullptr); + + { + RemoveFilesToRemove(); + + if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig) + != std::end(IntSigs)) { + if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr)) + return OldInterruptFunction(); + + // Send a special return code that drivers can check for, from sysexits.h. + if (Sig == SIGPIPE) + if (SignalHandlerFunctionType CurrentPipeFunction = PipeSignalFunction) + CurrentPipeFunction(); + + raise(Sig); // Execute the default handler. + return; + } + } + + // Otherwise if it is a fault (like SEGV) run any handler. + llvm::sys::RunSignalHandlers(); + +#ifdef __s390__ + // On S/390, certain signals are delivered with PSW Address pointing to + // *after* the faulting instruction. Simply returning from the signal + // handler would continue execution after that point, instead of + // re-raising the signal. Raise the signal manually in those cases. + if (Sig == SIGILL || Sig == SIGFPE || Sig == SIGTRAP) + raise(Sig); +#endif +} + +static RETSIGTYPE InfoSignalHandler(int Sig) { + SaveAndRestore<int> SaveErrnoDuringASignalHandler(errno); + if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction) + CurrentInfoFunction(); +} + +void llvm::sys::RunInterruptHandlers() { + RemoveFilesToRemove(); +} + +void llvm::sys::SetInterruptFunction(void (*IF)()) { + InterruptFunction.exchange(IF); + RegisterHandlers(); +} + +void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { + InfoSignalFunction.exchange(Handler); + RegisterHandlers(); +} + +void llvm::sys::SetPipeSignalFunction(void (*Handler)()) { + PipeSignalFunction.exchange(Handler); + RegisterHandlers(); +} + +// The public API +bool llvm::sys::RemoveFileOnSignal(StringRef Filename, + std::string* ErrMsg) { + // Ensure that cleanup will occur as soon as one file is added. + static ManagedStatic<FilesToRemoveCleanup> FilesToRemoveCleanup; + *FilesToRemoveCleanup; + FileToRemoveList::insert(FilesToRemove, Filename.str()); + RegisterHandlers(); + return false; +} + +// The public API +void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) { + FileToRemoveList::erase(FilesToRemove, Filename.str()); +} + +/// Add a function to be called when a signal is delivered to the process. The +/// handler can have a cookie passed to it to identify what instance of the +/// handler it is. +void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr, + void *Cookie) { // Signal-safe. + insertSignalHandler(FnPtr, Cookie); + RegisterHandlers(); +} + +#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H && \ + (defined(__linux__) || defined(__FreeBSD__) || \ + defined(__FreeBSD_kernel__) || defined(__NetBSD__)) +struct DlIteratePhdrData { + void **StackTrace; + int depth; + bool first; + const char **modules; + intptr_t *offsets; + const char *main_exec_name; +}; + +static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { + DlIteratePhdrData *data = (DlIteratePhdrData*)arg; + const char *name = data->first ? data->main_exec_name : info->dlpi_name; + data->first = false; + for (int i = 0; i < info->dlpi_phnum; i++) { + const auto *phdr = &info->dlpi_phdr[i]; + if (phdr->p_type != PT_LOAD) + continue; + intptr_t beg = info->dlpi_addr + phdr->p_vaddr; + intptr_t end = beg + phdr->p_memsz; + for (int j = 0; j < data->depth; j++) { + if (data->modules[j]) + continue; + intptr_t addr = (intptr_t)data->StackTrace[j]; + if (beg <= addr && addr < end) { + data->modules[j] = name; + data->offsets[j] = addr - info->dlpi_addr; + } + } + } + return 0; +} + +/// If this is an ELF platform, we can find all loaded modules and their virtual +/// addresses with dl_iterate_phdr. +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool) { + DlIteratePhdrData data = {StackTrace, Depth, true, + Modules, Offsets, MainExecutableName}; + dl_iterate_phdr(dl_iterate_phdr_cb, &data); + return true; +} +#else +/// This platform does not have dl_iterate_phdr, so we do not yet know how to +/// find all loaded DSOs. +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool) { + return false; +} +#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ... + +#if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE) +static int unwindBacktrace(void **StackTrace, int MaxEntries) { + if (MaxEntries < 0) + return 0; + + // Skip the first frame ('unwindBacktrace' itself). + int Entries = -1; + + auto HandleFrame = [&](_Unwind_Context *Context) -> _Unwind_Reason_Code { + // Apparently we need to detect reaching the end of the stack ourselves. + void *IP = (void *)_Unwind_GetIP(Context); + if (!IP) + return _URC_END_OF_STACK; + + assert(Entries < MaxEntries && "recursively called after END_OF_STACK?"); + if (Entries >= 0) + StackTrace[Entries] = IP; + + if (++Entries == MaxEntries) + return _URC_END_OF_STACK; + return _URC_NO_REASON; + }; + + _Unwind_Backtrace( + [](_Unwind_Context *Context, void *Handler) { + return (*static_cast<decltype(HandleFrame) *>(Handler))(Context); + }, + static_cast<void *>(&HandleFrame)); + return std::max(Entries, 0); +} +#endif + +// In the case of a program crash or fault, print out a stack trace so that the +// user has an indication of why and where we died. +// +// On glibc systems we have the 'backtrace' function, which works nicely, but +// doesn't demangle symbols. +void llvm::sys::PrintStackTrace(raw_ostream &OS) { +#if ENABLE_BACKTRACES + static void *StackTrace[256]; + int depth = 0; +#if defined(HAVE_BACKTRACE) + // Use backtrace() to output a backtrace on Linux systems with glibc. + if (!depth) + depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace))); +#endif +#if defined(HAVE__UNWIND_BACKTRACE) + // Try _Unwind_Backtrace() if backtrace() failed. + if (!depth) + depth = unwindBacktrace(StackTrace, + static_cast<int>(array_lengthof(StackTrace))); +#endif + if (!depth) + return; + + if (printSymbolizedStackTrace(Argv0, StackTrace, depth, OS)) + return; +#if HAVE_DLFCN_H && HAVE_DLADDR + int width = 0; + for (int i = 0; i < depth; ++i) { + Dl_info dlinfo; + dladdr(StackTrace[i], &dlinfo); + const char* name = strrchr(dlinfo.dli_fname, '/'); + + int nwidth; + if (!name) nwidth = strlen(dlinfo.dli_fname); + else nwidth = strlen(name) - 1; + + if (nwidth > width) width = nwidth; + } + + for (int i = 0; i < depth; ++i) { + Dl_info dlinfo; + dladdr(StackTrace[i], &dlinfo); + + OS << format("%-2d", i); + + const char* name = strrchr(dlinfo.dli_fname, '/'); + if (!name) OS << format(" %-*s", width, dlinfo.dli_fname); + else OS << format(" %-*s", width, name+1); + + OS << format(" %#0*lx", (int)(sizeof(void*) * 2) + 2, + (unsigned long)StackTrace[i]); + + if (dlinfo.dli_sname != nullptr) { + OS << ' '; + int res; + char* d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res); + if (!d) OS << dlinfo.dli_sname; + else OS << d; + free(d); + + OS << format(" + %tu", (static_cast<const char*>(StackTrace[i])- + static_cast<const char*>(dlinfo.dli_saddr))); + } + OS << '\n'; + } +#elif defined(HAVE_BACKTRACE) + backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO); +#endif +#endif +} + +static void PrintStackTraceSignalHandler(void *) { + sys::PrintStackTrace(llvm::errs()); +} + +void llvm::sys::DisableSystemDialogsOnCrash() {} + +/// When an error signal (such as SIGABRT or SIGSEGV) is delivered to the +/// process, print a stack trace and then exit. +void llvm::sys::PrintStackTraceOnErrorSignal(StringRef Argv0, + bool DisableCrashReporting) { + ::Argv0 = Argv0; + + AddSignalHandler(PrintStackTraceSignalHandler, nullptr); + +#if defined(__APPLE__) && ENABLE_CRASH_OVERRIDES + // Environment variable to disable any kind of crash dialog. + if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) { + mach_port_t self = mach_task_self(); + + exception_mask_t mask = EXC_MASK_CRASH; + + kern_return_t ret = task_set_exception_ports(self, + mask, + MACH_PORT_NULL, + EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, + THREAD_STATE_NONE); + (void)ret; + } +#endif +} diff --git a/llvm/lib/Support/Unix/ThreadLocal.inc b/llvm/lib/Support/Unix/ThreadLocal.inc new file mode 100644 index 0000000000000..a402ae980424e --- /dev/null +++ b/llvm/lib/Support/Unix/ThreadLocal.inc @@ -0,0 +1,70 @@ +//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific (non-pthread) ThreadLocal class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" + +#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC) + +#include <cassert> +#include <pthread.h> +#include <stdlib.h> + +namespace llvm { +using namespace sys; + +ThreadLocalImpl::ThreadLocalImpl() : data() { + static_assert(sizeof(pthread_key_t) <= sizeof(data), "size too big"); + pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data); + int errorcode = pthread_key_create(key, nullptr); + assert(errorcode == 0); + (void) errorcode; +} + +ThreadLocalImpl::~ThreadLocalImpl() { + pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data); + int errorcode = pthread_key_delete(*key); + assert(errorcode == 0); + (void) errorcode; +} + +void ThreadLocalImpl::setInstance(const void* d) { + pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data); + int errorcode = pthread_setspecific(*key, d); + assert(errorcode == 0); + (void) errorcode; +} + +void *ThreadLocalImpl::getInstance() { + pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data); + return pthread_getspecific(*key); +} + +void ThreadLocalImpl::removeInstance() { + setInstance(nullptr); +} + +} +#else +namespace llvm { +using namespace sys; +ThreadLocalImpl::ThreadLocalImpl() : data() { } +ThreadLocalImpl::~ThreadLocalImpl() { } +void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);} +void *ThreadLocalImpl::getInstance() { return data; } +void ThreadLocalImpl::removeInstance() { setInstance(0); } +} +#endif diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc new file mode 100644 index 0000000000000..ed9a96563055d --- /dev/null +++ b/llvm/lib/Support/Unix/Threading.inc @@ -0,0 +1,258 @@ +//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Unix specific implementation of Threading functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" + +#if defined(__APPLE__) +#include <mach/mach_init.h> +#include <mach/mach_port.h> +#endif + +#include <pthread.h> + +#if defined(__FreeBSD__) || defined(__OpenBSD__) +#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np() +#endif + +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include <errno.h> +#include <sys/sysctl.h> +#include <sys/user.h> +#include <unistd.h> +#endif + +#if defined(__NetBSD__) +#include <lwp.h> // For _lwp_self() +#endif + +#if defined(__linux__) +#include <sys/syscall.h> // For syscall codes +#include <unistd.h> // For syscall() +#endif + +namespace { + struct ThreadInfo { + void(*UserFn)(void *); + void *UserData; + }; +} + +static void *ExecuteOnThread_Dispatch(void *Arg) { + ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg); + TI->UserFn(TI->UserData); + return nullptr; +} + +void llvm::llvm_execute_on_thread(void(*Fn)(void*), void *UserData, + unsigned RequestedStackSize) { + ThreadInfo Info = { Fn, UserData }; + pthread_attr_t Attr; + pthread_t Thread; + + // Construct the attributes object. + if (::pthread_attr_init(&Attr) != 0) + return; + + // Set the requested stack size, if given. + if (RequestedStackSize != 0) { + if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0) + goto error; + } + + // Construct and execute the thread. + if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0) + goto error; + + // Wait for the thread and clean up. + ::pthread_join(Thread, nullptr); + +error: + ::pthread_attr_destroy(&Attr); +} + + +uint64_t llvm::get_threadid() { +#if defined(__APPLE__) + // Calling "mach_thread_self()" bumps the reference count on the thread + // port, so we need to deallocate it. mach_task_self() doesn't bump the ref + // count. + thread_port_t Self = mach_thread_self(); + mach_port_deallocate(mach_task_self(), Self); + return Self; +#elif defined(__FreeBSD__) + return uint64_t(pthread_getthreadid_np()); +#elif defined(__NetBSD__) + return uint64_t(_lwp_self()); +#elif defined(__ANDROID__) + return uint64_t(gettid()); +#elif defined(__linux__) + return uint64_t(syscall(SYS_gettid)); +#else + return uint64_t(pthread_self()); +#endif +} + + +static constexpr uint32_t get_max_thread_name_length_impl() { +#if defined(__NetBSD__) + return PTHREAD_MAX_NAMELEN_NP; +#elif defined(__APPLE__) + return 64; +#elif defined(__linux__) +#if HAVE_PTHREAD_SETNAME_NP + return 16; +#else + return 0; +#endif +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + return 16; +#elif defined(__OpenBSD__) + return 32; +#else + return 0; +#endif +} + +uint32_t llvm::get_max_thread_name_length() { + return get_max_thread_name_length_impl(); +} + +void llvm::set_thread_name(const Twine &Name) { + // Make sure the input is null terminated. + SmallString<64> Storage; + StringRef NameStr = Name.toNullTerminatedStringRef(Storage); + + // Truncate from the beginning, not the end, if the specified name is too + // long. For one, this ensures that the resulting string is still null + // terminated, but additionally the end of a long thread name will usually + // be more unique than the beginning, since a common pattern is for similar + // threads to share a common prefix. + // Note that the name length includes the null terminator. + if (get_max_thread_name_length() > 0) + NameStr = NameStr.take_back(get_max_thread_name_length() - 1); + (void)NameStr; +#if defined(__linux__) +#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__) +#if HAVE_PTHREAD_SETNAME_NP + ::pthread_setname_np(::pthread_self(), NameStr.data()); +#endif +#endif +#elif defined(__FreeBSD__) || defined(__OpenBSD__) + ::pthread_set_name_np(::pthread_self(), NameStr.data()); +#elif defined(__NetBSD__) + ::pthread_setname_np(::pthread_self(), "%s", + const_cast<char *>(NameStr.data())); +#elif defined(__APPLE__) + ::pthread_setname_np(NameStr.data()); +#endif +} + +void llvm::get_thread_name(SmallVectorImpl<char> &Name) { + Name.clear(); + +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + int pid = ::getpid(); + uint64_t tid = get_threadid(); + + struct kinfo_proc *kp = nullptr, *nkp; + size_t len = 0; + int error; + int ctl[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD, + (int)pid }; + + while (1) { + error = sysctl(ctl, 4, kp, &len, nullptr, 0); + if (kp == nullptr || (error != 0 && errno == ENOMEM)) { + // Add extra space in case threads are added before next call. + len += sizeof(*kp) + len / 10; + nkp = (struct kinfo_proc *)::realloc(kp, len); + if (nkp == nullptr) { + free(kp); + return; + } + kp = nkp; + continue; + } + if (error != 0) + len = 0; + break; + } + + for (size_t i = 0; i < len / sizeof(*kp); i++) { + if (kp[i].ki_tid == (lwpid_t)tid) { + Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname)); + break; + } + } + free(kp); + return; +#elif defined(__NetBSD__) + constexpr uint32_t len = get_max_thread_name_length_impl(); + char buf[len]; + ::pthread_getname_np(::pthread_self(), buf, len); + + Name.append(buf, buf + strlen(buf)); +#elif defined(__OpenBSD__) + constexpr uint32_t len = get_max_thread_name_length_impl(); + char buf[len]; + ::pthread_get_name_np(::pthread_self(), buf, len); + + Name.append(buf, buf + strlen(buf)); +#elif defined(__linux__) +#if HAVE_PTHREAD_GETNAME_NP + constexpr uint32_t len = get_max_thread_name_length_impl(); + char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. + if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len)) + Name.append(Buffer, Buffer + strlen(Buffer)); +#endif +#endif +} + +SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { +#if defined(__linux__) && defined(SCHED_IDLE) + // Some *really* old glibcs are missing SCHED_IDLE. + // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html + // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html + sched_param priority; + // For each of the above policies, param->sched_priority must be 0. + priority.sched_priority = 0; + // SCHED_IDLE for running very low priority background jobs. + // SCHED_OTHER the standard round-robin time-sharing policy; + return !pthread_setschedparam( + pthread_self(), + Priority == ThreadPriority::Background ? SCHED_IDLE : SCHED_OTHER, + &priority) + ? SetThreadPriorityResult::SUCCESS + : SetThreadPriorityResult::FAILURE; +#elif defined(__APPLE__) + // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/getpriority.2.html + // When setting a thread into background state the scheduling priority is set + // to lowest value, disk and network IO are throttled. Network IO will be + // throttled for any sockets the thread opens after going into background + // state. Any previously opened sockets are not affected. + + // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/getiopolicy_np.3.html + // I/Os with THROTTLE policy are called THROTTLE I/Os. If a THROTTLE I/O + // request occurs within a small time window (usually a fraction of a second) + // of another NORMAL I/O request, the thread that issues the THROTTLE I/O is + // forced to sleep for a certain interval. This slows down the thread that + // issues the THROTTLE I/O so that NORMAL I/Os can utilize most of the disk + // I/O bandwidth. + return !setpriority(PRIO_DARWIN_THREAD, 0, + Priority == ThreadPriority::Background ? PRIO_DARWIN_BG + : 0) + ? SetThreadPriorityResult::SUCCESS + : SetThreadPriorityResult::FAILURE; +#endif + return SetThreadPriorityResult::FAILURE; +} diff --git a/llvm/lib/Support/Unix/Unix.h b/llvm/lib/Support/Unix/Unix.h new file mode 100644 index 0000000000000..86309b0567f52 --- /dev/null +++ b/llvm/lib/Support/Unix/Unix.h @@ -0,0 +1,105 @@ +//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines things specific to Unix implementations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_UNIX_UNIX_H +#define LLVM_LIB_SUPPORT_UNIX_UNIX_H + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on all UNIX variants. +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/Errno.h" +#include <algorithm> +#include <assert.h> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <sys/types.h> +#include <sys/wait.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifdef HAVE_SYS_PARAM_H +#include <sys/param.h> +#endif + +#ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +#endif +#include <time.h> + +#ifdef HAVE_DLFCN_H +# include <dlfcn.h> +#endif + +#ifdef HAVE_FCNTL_H +# include <fcntl.h> +#endif + +/// This function builds an error message into \p ErrMsg using the \p prefix +/// string and the Unix error number given by \p errnum. If errnum is -1, the +/// default then the value of errno is used. +/// Make an error message +/// +/// If the error number can be converted to a string, it will be +/// separated from prefix by ": ". +static inline bool MakeErrMsg( + std::string* ErrMsg, const std::string& prefix, int errnum = -1) { + if (!ErrMsg) + return true; + if (errnum == -1) + errnum = errno; + *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum); + return true; +} + +namespace llvm { +namespace sys { + +/// Convert a struct timeval to a duration. Note that timeval can be used both +/// as a time point and a duration. Be sure to check what the input represents. +inline std::chrono::microseconds toDuration(const struct timeval &TV) { + return std::chrono::seconds(TV.tv_sec) + + std::chrono::microseconds(TV.tv_usec); +} + +/// Convert a time point to struct timespec. +inline struct timespec toTimeSpec(TimePoint<> TP) { + using namespace std::chrono; + + struct timespec RetVal; + RetVal.tv_sec = toTimeT(TP); + RetVal.tv_nsec = (TP.time_since_epoch() % seconds(1)).count(); + return RetVal; +} + +/// Convert a time point to struct timeval. +inline struct timeval toTimeVal(TimePoint<std::chrono::microseconds> TP) { + using namespace std::chrono; + + struct timeval RetVal; + RetVal.tv_sec = toTimeT(TP); + RetVal.tv_usec = (TP.time_since_epoch() % seconds(1)).count(); + return RetVal; +} + +} // namespace sys +} // namespace llvm + +#endif diff --git a/llvm/lib/Support/Unix/Watchdog.inc b/llvm/lib/Support/Unix/Watchdog.inc new file mode 100644 index 0000000000000..b363ef7795603 --- /dev/null +++ b/llvm/lib/Support/Unix/Watchdog.inc @@ -0,0 +1,33 @@ +//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the generic Unix implementation of the Watchdog class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +namespace llvm { + namespace sys { + Watchdog::Watchdog(unsigned int seconds) { +#ifdef HAVE_UNISTD_H + alarm(seconds); +#endif + } + + Watchdog::~Watchdog() { +#ifdef HAVE_UNISTD_H + alarm(0); +#endif + } + } +} diff --git a/llvm/lib/Support/Valgrind.cpp b/llvm/lib/Support/Valgrind.cpp new file mode 100644 index 0000000000000..886cb6ba33111 --- /dev/null +++ b/llvm/lib/Support/Valgrind.cpp @@ -0,0 +1,54 @@ +//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is +// defined. If we have valgrind.h but valgrind isn't running, its macros are +// no-ops. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Valgrind.h" +#include "llvm/Config/config.h" +#include <cstddef> + +#if HAVE_VALGRIND_VALGRIND_H +#include <valgrind/valgrind.h> + +static bool InitNotUnderValgrind() { + return !RUNNING_ON_VALGRIND; +} + +// This bool is negated from what we'd expect because code may run before it +// gets initialized. If that happens, it will appear to be 0 (false), and we +// want that to cause the rest of the code in this file to run the +// Valgrind-provided macros. +static const bool NotUnderValgrind = InitNotUnderValgrind(); + +bool llvm::sys::RunningOnValgrind() { + if (NotUnderValgrind) + return false; + return RUNNING_ON_VALGRIND; +} + +void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { + if (NotUnderValgrind) + return; + + VALGRIND_DISCARD_TRANSLATIONS(Addr, Len); +} + +#else // !HAVE_VALGRIND_VALGRIND_H + +bool llvm::sys::RunningOnValgrind() { + return false; +} + +void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { +} + +#endif // !HAVE_VALGRIND_VALGRIND_H diff --git a/llvm/lib/Support/VersionTuple.cpp b/llvm/lib/Support/VersionTuple.cpp new file mode 100644 index 0000000000000..60b59424fbb49 --- /dev/null +++ b/llvm/lib/Support/VersionTuple.cpp @@ -0,0 +1,109 @@ +//===- VersionTuple.cpp - Version Number Handling ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the VersionTuple class, which represents a version in +// the form major[.minor[.subminor]]. +// +//===----------------------------------------------------------------------===// +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +std::string VersionTuple::getAsString() const { + std::string Result; + { + llvm::raw_string_ostream Out(Result); + Out << *this; + } + return Result; +} + +raw_ostream &llvm::operator<<(raw_ostream &Out, const VersionTuple &V) { + Out << V.getMajor(); + if (Optional<unsigned> Minor = V.getMinor()) + Out << '.' << *Minor; + if (Optional<unsigned> Subminor = V.getSubminor()) + Out << '.' << *Subminor; + if (Optional<unsigned> Build = V.getBuild()) + Out << '.' << *Build; + return Out; +} + +static bool parseInt(StringRef &input, unsigned &value) { + assert(value == 0); + if (input.empty()) + return true; + + char next = input[0]; + input = input.substr(1); + if (next < '0' || next > '9') + return true; + value = (unsigned)(next - '0'); + + while (!input.empty()) { + next = input[0]; + if (next < '0' || next > '9') + return false; + input = input.substr(1); + value = value * 10 + (unsigned)(next - '0'); + } + + return false; +} + +bool VersionTuple::tryParse(StringRef input) { + unsigned major = 0, minor = 0, micro = 0, build = 0; + + // Parse the major version, [0-9]+ + if (parseInt(input, major)) + return true; + + if (input.empty()) { + *this = VersionTuple(major); + return false; + } + + // If we're not done, parse the minor version, \.[0-9]+ + if (input[0] != '.') + return true; + input = input.substr(1); + if (parseInt(input, minor)) + return true; + + if (input.empty()) { + *this = VersionTuple(major, minor); + return false; + } + + // If we're not done, parse the micro version, \.[0-9]+ + if (input[0] != '.') + return true; + input = input.substr(1); + if (parseInt(input, micro)) + return true; + + if (input.empty()) { + *this = VersionTuple(major, minor, micro); + return false; + } + + // If we're not done, parse the micro version, \.[0-9]+ + if (input[0] != '.') + return true; + input = input.substr(1); + if (parseInt(input, build)) + return true; + + // If we have characters left over, it's an error. + if (!input.empty()) + return true; + + *this = VersionTuple(major, minor, micro, build); + return false; +} diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp new file mode 100644 index 0000000000000..c390cb1b22275 --- /dev/null +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -0,0 +1,2139 @@ +//===- VirtualFileSystem.cpp - Virtual File System Layer ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtualFileSystem interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <atomic> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <map> +#include <memory> +#include <mutex> +#include <string> +#include <system_error> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::vfs; + +using llvm::sys::fs::file_t; +using llvm::sys::fs::file_status; +using llvm::sys::fs::file_type; +using llvm::sys::fs::kInvalidFile; +using llvm::sys::fs::perms; +using llvm::sys::fs::UniqueID; + +Status::Status(const file_status &Status) + : UID(Status.getUniqueID()), MTime(Status.getLastModificationTime()), + User(Status.getUser()), Group(Status.getGroup()), Size(Status.getSize()), + Type(Status.type()), Perms(Status.permissions()) {} + +Status::Status(const Twine &Name, UniqueID UID, sys::TimePoint<> MTime, + uint32_t User, uint32_t Group, uint64_t Size, file_type Type, + perms Perms) + : Name(Name.str()), UID(UID), MTime(MTime), User(User), Group(Group), + Size(Size), Type(Type), Perms(Perms) {} + +Status Status::copyWithNewName(const Status &In, const Twine &NewName) { + return Status(NewName, In.getUniqueID(), In.getLastModificationTime(), + In.getUser(), In.getGroup(), In.getSize(), In.getType(), + In.getPermissions()); +} + +Status Status::copyWithNewName(const file_status &In, const Twine &NewName) { + return Status(NewName, In.getUniqueID(), In.getLastModificationTime(), + In.getUser(), In.getGroup(), In.getSize(), In.type(), + In.permissions()); +} + +bool Status::equivalent(const Status &Other) const { + assert(isStatusKnown() && Other.isStatusKnown()); + return getUniqueID() == Other.getUniqueID(); +} + +bool Status::isDirectory() const { return Type == file_type::directory_file; } + +bool Status::isRegularFile() const { return Type == file_type::regular_file; } + +bool Status::isOther() const { + return exists() && !isRegularFile() && !isDirectory() && !isSymlink(); +} + +bool Status::isSymlink() const { return Type == file_type::symlink_file; } + +bool Status::isStatusKnown() const { return Type != file_type::status_error; } + +bool Status::exists() const { + return isStatusKnown() && Type != file_type::file_not_found; +} + +File::~File() = default; + +FileSystem::~FileSystem() = default; + +ErrorOr<std::unique_ptr<MemoryBuffer>> +FileSystem::getBufferForFile(const llvm::Twine &Name, int64_t FileSize, + bool RequiresNullTerminator, bool IsVolatile) { + auto F = openFileForRead(Name); + if (!F) + return F.getError(); + + return (*F)->getBuffer(Name, FileSize, RequiresNullTerminator, IsVolatile); +} + +std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const { + if (llvm::sys::path::is_absolute(Path)) + return {}; + + auto WorkingDir = getCurrentWorkingDirectory(); + if (!WorkingDir) + return WorkingDir.getError(); + + llvm::sys::fs::make_absolute(WorkingDir.get(), Path); + return {}; +} + +std::error_code FileSystem::getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const { + return errc::operation_not_permitted; +} + +std::error_code FileSystem::isLocal(const Twine &Path, bool &Result) { + return errc::operation_not_permitted; +} + +bool FileSystem::exists(const Twine &Path) { + auto Status = status(Path); + return Status && Status->exists(); +} + +#ifndef NDEBUG +static bool isTraversalComponent(StringRef Component) { + return Component.equals("..") || Component.equals("."); +} + +static bool pathHasTraversal(StringRef Path) { + using namespace llvm::sys; + + for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path))) + if (isTraversalComponent(Comp)) + return true; + return false; +} +#endif + +//===-----------------------------------------------------------------------===/ +// RealFileSystem implementation +//===-----------------------------------------------------------------------===/ + +namespace { + +/// Wrapper around a raw file descriptor. +class RealFile : public File { + friend class RealFileSystem; + + file_t FD; + Status S; + std::string RealName; + + RealFile(file_t RawFD, StringRef NewName, StringRef NewRealPathName) + : FD(RawFD), S(NewName, {}, {}, {}, {}, {}, + llvm::sys::fs::file_type::status_error, {}), + RealName(NewRealPathName.str()) { + assert(FD != kInvalidFile && "Invalid or inactive file descriptor"); + } + +public: + ~RealFile() override; + + ErrorOr<Status> status() override; + ErrorOr<std::string> getName() override; + ErrorOr<std::unique_ptr<MemoryBuffer>> getBuffer(const Twine &Name, + int64_t FileSize, + bool RequiresNullTerminator, + bool IsVolatile) override; + std::error_code close() override; +}; + +} // namespace + +RealFile::~RealFile() { close(); } + +ErrorOr<Status> RealFile::status() { + assert(FD != kInvalidFile && "cannot stat closed file"); + if (!S.isStatusKnown()) { + file_status RealStatus; + if (std::error_code EC = sys::fs::status(FD, RealStatus)) + return EC; + S = Status::copyWithNewName(RealStatus, S.getName()); + } + return S; +} + +ErrorOr<std::string> RealFile::getName() { + return RealName.empty() ? S.getName().str() : RealName; +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +RealFile::getBuffer(const Twine &Name, int64_t FileSize, + bool RequiresNullTerminator, bool IsVolatile) { + assert(FD != kInvalidFile && "cannot get buffer for closed file"); + return MemoryBuffer::getOpenFile(FD, Name, FileSize, RequiresNullTerminator, + IsVolatile); +} + +std::error_code RealFile::close() { + std::error_code EC = sys::fs::closeFile(FD); + FD = kInvalidFile; + return EC; +} + +namespace { + +/// A file system according to your operating system. +/// This may be linked to the process's working directory, or maintain its own. +/// +/// Currently, its own working directory is emulated by storing the path and +/// sending absolute paths to llvm::sys::fs:: functions. +/// A more principled approach would be to push this down a level, modelling +/// the working dir as an llvm::sys::fs::WorkingDir or similar. +/// This would enable the use of openat()-style functions on some platforms. +class RealFileSystem : public FileSystem { +public: + explicit RealFileSystem(bool LinkCWDToProcess) { + if (!LinkCWDToProcess) { + SmallString<128> PWD, RealPWD; + if (llvm::sys::fs::current_path(PWD)) + return; // Awful, but nothing to do here. + if (llvm::sys::fs::real_path(PWD, RealPWD)) + WD = {PWD, PWD}; + else + WD = {PWD, RealPWD}; + } + } + + ErrorOr<Status> status(const Twine &Path) override; + ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override; + directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; + + llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; + std::error_code setCurrentWorkingDirectory(const Twine &Path) override; + std::error_code isLocal(const Twine &Path, bool &Result) override; + std::error_code getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const override; + +private: + // If this FS has its own working dir, use it to make Path absolute. + // The returned twine is safe to use as long as both Storage and Path live. + Twine adjustPath(const Twine &Path, SmallVectorImpl<char> &Storage) const { + if (!WD) + return Path; + Path.toVector(Storage); + sys::fs::make_absolute(WD->Resolved, Storage); + return Storage; + } + + struct WorkingDirectory { + // The current working directory, without symlinks resolved. (echo $PWD). + SmallString<128> Specified; + // The current working directory, with links resolved. (readlink .). + SmallString<128> Resolved; + }; + Optional<WorkingDirectory> WD; +}; + +} // namespace + +ErrorOr<Status> RealFileSystem::status(const Twine &Path) { + SmallString<256> Storage; + sys::fs::file_status RealStatus; + if (std::error_code EC = + sys::fs::status(adjustPath(Path, Storage), RealStatus)) + return EC; + return Status::copyWithNewName(RealStatus, Path); +} + +ErrorOr<std::unique_ptr<File>> +RealFileSystem::openFileForRead(const Twine &Name) { + SmallString<256> RealName, Storage; + Expected<file_t> FDOrErr = sys::fs::openNativeFileForRead( + adjustPath(Name, Storage), sys::fs::OF_None, &RealName); + if (!FDOrErr) + return errorToErrorCode(FDOrErr.takeError()); + return std::unique_ptr<File>( + new RealFile(*FDOrErr, Name.str(), RealName.str())); +} + +llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const { + if (WD) + return WD->Specified.str(); + + SmallString<128> Dir; + if (std::error_code EC = llvm::sys::fs::current_path(Dir)) + return EC; + return Dir.str(); +} + +std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) { + if (!WD) + return llvm::sys::fs::set_current_path(Path); + + SmallString<128> Absolute, Resolved, Storage; + adjustPath(Path, Storage).toVector(Absolute); + bool IsDir; + if (auto Err = llvm::sys::fs::is_directory(Absolute, IsDir)) + return Err; + if (!IsDir) + return std::make_error_code(std::errc::not_a_directory); + if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved)) + return Err; + WD = {Absolute, Resolved}; + return std::error_code(); +} + +std::error_code RealFileSystem::isLocal(const Twine &Path, bool &Result) { + SmallString<256> Storage; + return llvm::sys::fs::is_local(adjustPath(Path, Storage), Result); +} + +std::error_code +RealFileSystem::getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const { + SmallString<256> Storage; + return llvm::sys::fs::real_path(adjustPath(Path, Storage), Output); +} + +IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() { + static IntrusiveRefCntPtr<FileSystem> FS(new RealFileSystem(true)); + return FS; +} + +std::unique_ptr<FileSystem> vfs::createPhysicalFileSystem() { + return std::make_unique<RealFileSystem>(false); +} + +namespace { + +class RealFSDirIter : public llvm::vfs::detail::DirIterImpl { + llvm::sys::fs::directory_iterator Iter; + +public: + RealFSDirIter(const Twine &Path, std::error_code &EC) : Iter(Path, EC) { + if (Iter != llvm::sys::fs::directory_iterator()) + CurrentEntry = directory_entry(Iter->path(), Iter->type()); + } + + std::error_code increment() override { + std::error_code EC; + Iter.increment(EC); + CurrentEntry = (Iter == llvm::sys::fs::directory_iterator()) + ? directory_entry() + : directory_entry(Iter->path(), Iter->type()); + return EC; + } +}; + +} // namespace + +directory_iterator RealFileSystem::dir_begin(const Twine &Dir, + std::error_code &EC) { + SmallString<128> Storage; + return directory_iterator( + std::make_shared<RealFSDirIter>(adjustPath(Dir, Storage), EC)); +} + +//===-----------------------------------------------------------------------===/ +// OverlayFileSystem implementation +//===-----------------------------------------------------------------------===/ + +OverlayFileSystem::OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> BaseFS) { + FSList.push_back(std::move(BaseFS)); +} + +void OverlayFileSystem::pushOverlay(IntrusiveRefCntPtr<FileSystem> FS) { + FSList.push_back(FS); + // Synchronize added file systems by duplicating the working directory from + // the first one in the list. + FS->setCurrentWorkingDirectory(getCurrentWorkingDirectory().get()); +} + +ErrorOr<Status> OverlayFileSystem::status(const Twine &Path) { + // FIXME: handle symlinks that cross file systems + for (iterator I = overlays_begin(), E = overlays_end(); I != E; ++I) { + ErrorOr<Status> Status = (*I)->status(Path); + if (Status || Status.getError() != llvm::errc::no_such_file_or_directory) + return Status; + } + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +ErrorOr<std::unique_ptr<File>> +OverlayFileSystem::openFileForRead(const llvm::Twine &Path) { + // FIXME: handle symlinks that cross file systems + for (iterator I = overlays_begin(), E = overlays_end(); I != E; ++I) { + auto Result = (*I)->openFileForRead(Path); + if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) + return Result; + } + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +llvm::ErrorOr<std::string> +OverlayFileSystem::getCurrentWorkingDirectory() const { + // All file systems are synchronized, just take the first working directory. + return FSList.front()->getCurrentWorkingDirectory(); +} + +std::error_code +OverlayFileSystem::setCurrentWorkingDirectory(const Twine &Path) { + for (auto &FS : FSList) + if (std::error_code EC = FS->setCurrentWorkingDirectory(Path)) + return EC; + return {}; +} + +std::error_code OverlayFileSystem::isLocal(const Twine &Path, bool &Result) { + for (auto &FS : FSList) + if (FS->exists(Path)) + return FS->isLocal(Path, Result); + return errc::no_such_file_or_directory; +} + +std::error_code +OverlayFileSystem::getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const { + for (auto &FS : FSList) + if (FS->exists(Path)) + return FS->getRealPath(Path, Output); + return errc::no_such_file_or_directory; +} + +llvm::vfs::detail::DirIterImpl::~DirIterImpl() = default; + +namespace { + +class OverlayFSDirIterImpl : public llvm::vfs::detail::DirIterImpl { + OverlayFileSystem &Overlays; + std::string Path; + OverlayFileSystem::iterator CurrentFS; + directory_iterator CurrentDirIter; + llvm::StringSet<> SeenNames; + + std::error_code incrementFS() { + assert(CurrentFS != Overlays.overlays_end() && "incrementing past end"); + ++CurrentFS; + for (auto E = Overlays.overlays_end(); CurrentFS != E; ++CurrentFS) { + std::error_code EC; + CurrentDirIter = (*CurrentFS)->dir_begin(Path, EC); + if (EC && EC != errc::no_such_file_or_directory) + return EC; + if (CurrentDirIter != directory_iterator()) + break; // found + } + return {}; + } + + std::error_code incrementDirIter(bool IsFirstTime) { + assert((IsFirstTime || CurrentDirIter != directory_iterator()) && + "incrementing past end"); + std::error_code EC; + if (!IsFirstTime) + CurrentDirIter.increment(EC); + if (!EC && CurrentDirIter == directory_iterator()) + EC = incrementFS(); + return EC; + } + + std::error_code incrementImpl(bool IsFirstTime) { + while (true) { + std::error_code EC = incrementDirIter(IsFirstTime); + if (EC || CurrentDirIter == directory_iterator()) { + CurrentEntry = directory_entry(); + return EC; + } + CurrentEntry = *CurrentDirIter; + StringRef Name = llvm::sys::path::filename(CurrentEntry.path()); + if (SeenNames.insert(Name).second) + return EC; // name not seen before + } + llvm_unreachable("returned above"); + } + +public: + OverlayFSDirIterImpl(const Twine &Path, OverlayFileSystem &FS, + std::error_code &EC) + : Overlays(FS), Path(Path.str()), CurrentFS(Overlays.overlays_begin()) { + CurrentDirIter = (*CurrentFS)->dir_begin(Path, EC); + EC = incrementImpl(true); + } + + std::error_code increment() override { return incrementImpl(false); } +}; + +} // namespace + +directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir, + std::error_code &EC) { + return directory_iterator( + std::make_shared<OverlayFSDirIterImpl>(Dir, *this, EC)); +} + +void ProxyFileSystem::anchor() {} + +namespace llvm { +namespace vfs { + +namespace detail { + +enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink }; + +/// The in memory file system is a tree of Nodes. Every node can either be a +/// file , hardlink or a directory. +class InMemoryNode { + InMemoryNodeKind Kind; + std::string FileName; + +public: + InMemoryNode(llvm::StringRef FileName, InMemoryNodeKind Kind) + : Kind(Kind), FileName(llvm::sys::path::filename(FileName)) {} + virtual ~InMemoryNode() = default; + + /// Get the filename of this node (the name without the directory part). + StringRef getFileName() const { return FileName; } + InMemoryNodeKind getKind() const { return Kind; } + virtual std::string toString(unsigned Indent) const = 0; +}; + +class InMemoryFile : public InMemoryNode { + Status Stat; + std::unique_ptr<llvm::MemoryBuffer> Buffer; + +public: + InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer) + : InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)), + Buffer(std::move(Buffer)) {} + + /// Return the \p Status for this node. \p RequestedName should be the name + /// through which the caller referred to this node. It will override + /// \p Status::Name in the return value, to mimic the behavior of \p RealFile. + Status getStatus(const Twine &RequestedName) const { + return Status::copyWithNewName(Stat, RequestedName); + } + llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); } + + std::string toString(unsigned Indent) const override { + return (std::string(Indent, ' ') + Stat.getName() + "\n").str(); + } + + static bool classof(const InMemoryNode *N) { + return N->getKind() == IME_File; + } +}; + +namespace { + +class InMemoryHardLink : public InMemoryNode { + const InMemoryFile &ResolvedFile; + +public: + InMemoryHardLink(StringRef Path, const InMemoryFile &ResolvedFile) + : InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {} + const InMemoryFile &getResolvedFile() const { return ResolvedFile; } + + std::string toString(unsigned Indent) const override { + return std::string(Indent, ' ') + "HardLink to -> " + + ResolvedFile.toString(0); + } + + static bool classof(const InMemoryNode *N) { + return N->getKind() == IME_HardLink; + } +}; + +/// Adapt a InMemoryFile for VFS' File interface. The goal is to make +/// \p InMemoryFileAdaptor mimic as much as possible the behavior of +/// \p RealFile. +class InMemoryFileAdaptor : public File { + const InMemoryFile &Node; + /// The name to use when returning a Status for this file. + std::string RequestedName; + +public: + explicit InMemoryFileAdaptor(const InMemoryFile &Node, + std::string RequestedName) + : Node(Node), RequestedName(std::move(RequestedName)) {} + + llvm::ErrorOr<Status> status() override { + return Node.getStatus(RequestedName); + } + + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> + getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, + bool IsVolatile) override { + llvm::MemoryBuffer *Buf = Node.getBuffer(); + return llvm::MemoryBuffer::getMemBuffer( + Buf->getBuffer(), Buf->getBufferIdentifier(), RequiresNullTerminator); + } + + std::error_code close() override { return {}; } +}; +} // namespace + +class InMemoryDirectory : public InMemoryNode { + Status Stat; + llvm::StringMap<std::unique_ptr<InMemoryNode>> Entries; + +public: + InMemoryDirectory(Status Stat) + : InMemoryNode(Stat.getName(), IME_Directory), Stat(std::move(Stat)) {} + + /// Return the \p Status for this node. \p RequestedName should be the name + /// through which the caller referred to this node. It will override + /// \p Status::Name in the return value, to mimic the behavior of \p RealFile. + Status getStatus(const Twine &RequestedName) const { + return Status::copyWithNewName(Stat, RequestedName); + } + InMemoryNode *getChild(StringRef Name) { + auto I = Entries.find(Name); + if (I != Entries.end()) + return I->second.get(); + return nullptr; + } + + InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) { + return Entries.insert(make_pair(Name, std::move(Child))) + .first->second.get(); + } + + using const_iterator = decltype(Entries)::const_iterator; + + const_iterator begin() const { return Entries.begin(); } + const_iterator end() const { return Entries.end(); } + + std::string toString(unsigned Indent) const override { + std::string Result = + (std::string(Indent, ' ') + Stat.getName() + "\n").str(); + for (const auto &Entry : Entries) + Result += Entry.second->toString(Indent + 2); + return Result; + } + + static bool classof(const InMemoryNode *N) { + return N->getKind() == IME_Directory; + } +}; + +namespace { +Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) { + if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node)) + return Dir->getStatus(RequestedName); + if (auto File = dyn_cast<detail::InMemoryFile>(Node)) + return File->getStatus(RequestedName); + if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) + return Link->getResolvedFile().getStatus(RequestedName); + llvm_unreachable("Unknown node type"); +} +} // namespace +} // namespace detail + +InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths) + : Root(new detail::InMemoryDirectory( + Status("", getNextVirtualUniqueID(), llvm::sys::TimePoint<>(), 0, 0, + 0, llvm::sys::fs::file_type::directory_file, + llvm::sys::fs::perms::all_all))), + UseNormalizedPaths(UseNormalizedPaths) {} + +InMemoryFileSystem::~InMemoryFileSystem() = default; + +std::string InMemoryFileSystem::toString() const { + return Root->toString(/*Indent=*/0); +} + +bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, + std::unique_ptr<llvm::MemoryBuffer> Buffer, + Optional<uint32_t> User, + Optional<uint32_t> Group, + Optional<llvm::sys::fs::file_type> Type, + Optional<llvm::sys::fs::perms> Perms, + const detail::InMemoryFile *HardLinkTarget) { + SmallString<128> Path; + P.toVector(Path); + + // Fix up relative paths. This just prepends the current working directory. + std::error_code EC = makeAbsolute(Path); + assert(!EC); + (void)EC; + + if (useNormalizedPaths()) + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + + if (Path.empty()) + return false; + + detail::InMemoryDirectory *Dir = Root.get(); + auto I = llvm::sys::path::begin(Path), E = sys::path::end(Path); + const auto ResolvedUser = User.getValueOr(0); + const auto ResolvedGroup = Group.getValueOr(0); + const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file); + const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all); + assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer"); + // Any intermediate directories we create should be accessible by + // the owner, even if Perms says otherwise for the final path. + const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all; + while (true) { + StringRef Name = *I; + detail::InMemoryNode *Node = Dir->getChild(Name); + ++I; + if (!Node) { + if (I == E) { + // End of the path. + std::unique_ptr<detail::InMemoryNode> Child; + if (HardLinkTarget) + Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget)); + else { + // Create a new file or directory. + Status Stat(P.str(), getNextVirtualUniqueID(), + llvm::sys::toTimePoint(ModificationTime), ResolvedUser, + ResolvedGroup, Buffer->getBufferSize(), ResolvedType, + ResolvedPerms); + if (ResolvedType == sys::fs::file_type::directory_file) { + Child.reset(new detail::InMemoryDirectory(std::move(Stat))); + } else { + Child.reset( + new detail::InMemoryFile(std::move(Stat), std::move(Buffer))); + } + } + Dir->addChild(Name, std::move(Child)); + return true; + } + + // Create a new directory. Use the path up to here. + Status Stat( + StringRef(Path.str().begin(), Name.end() - Path.str().begin()), + getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime), + ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file, + NewDirectoryPerms); + Dir = cast<detail::InMemoryDirectory>(Dir->addChild( + Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat)))); + continue; + } + + if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) { + Dir = NewDir; + } else { + assert((isa<detail::InMemoryFile>(Node) || + isa<detail::InMemoryHardLink>(Node)) && + "Must be either file, hardlink or directory!"); + + // Trying to insert a directory in place of a file. + if (I != E) + return false; + + // Return false only if the new file is different from the existing one. + if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) { + return Link->getResolvedFile().getBuffer()->getBuffer() == + Buffer->getBuffer(); + } + return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() == + Buffer->getBuffer(); + } + } +} + +bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, + std::unique_ptr<llvm::MemoryBuffer> Buffer, + Optional<uint32_t> User, + Optional<uint32_t> Group, + Optional<llvm::sys::fs::file_type> Type, + Optional<llvm::sys::fs::perms> Perms) { + return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type, + Perms, /*HardLinkTarget=*/nullptr); +} + +bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime, + llvm::MemoryBuffer *Buffer, + Optional<uint32_t> User, + Optional<uint32_t> Group, + Optional<llvm::sys::fs::file_type> Type, + Optional<llvm::sys::fs::perms> Perms) { + return addFile(P, ModificationTime, + llvm::MemoryBuffer::getMemBuffer( + Buffer->getBuffer(), Buffer->getBufferIdentifier()), + std::move(User), std::move(Group), std::move(Type), + std::move(Perms)); +} + +static ErrorOr<const detail::InMemoryNode *> +lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir, + const Twine &P) { + SmallString<128> Path; + P.toVector(Path); + + // Fix up relative paths. This just prepends the current working directory. + std::error_code EC = FS.makeAbsolute(Path); + assert(!EC); + (void)EC; + + if (FS.useNormalizedPaths()) + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + + if (Path.empty()) + return Dir; + + auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path); + while (true) { + detail::InMemoryNode *Node = Dir->getChild(*I); + ++I; + if (!Node) + return errc::no_such_file_or_directory; + + // Return the file if it's at the end of the path. + if (auto File = dyn_cast<detail::InMemoryFile>(Node)) { + if (I == E) + return File; + return errc::no_such_file_or_directory; + } + + // If Node is HardLink then return the resolved file. + if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) { + if (I == E) + return &File->getResolvedFile(); + return errc::no_such_file_or_directory; + } + // Traverse directories. + Dir = cast<detail::InMemoryDirectory>(Node); + if (I == E) + return Dir; + } +} + +bool InMemoryFileSystem::addHardLink(const Twine &FromPath, + const Twine &ToPath) { + auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath); + auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath); + // FromPath must not have been added before. ToPath must have been added + // before. Resolved ToPath must be a File. + if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode)) + return false; + return this->addFile(FromPath, 0, nullptr, None, None, None, None, + cast<detail::InMemoryFile>(*ToNode)); +} + +llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) { + auto Node = lookupInMemoryNode(*this, Root.get(), Path); + if (Node) + return detail::getNodeStatus(*Node, Path); + return Node.getError(); +} + +llvm::ErrorOr<std::unique_ptr<File>> +InMemoryFileSystem::openFileForRead(const Twine &Path) { + auto Node = lookupInMemoryNode(*this, Root.get(), Path); + if (!Node) + return Node.getError(); + + // When we have a file provide a heap-allocated wrapper for the memory buffer + // to match the ownership semantics for File. + if (auto *F = dyn_cast<detail::InMemoryFile>(*Node)) + return std::unique_ptr<File>( + new detail::InMemoryFileAdaptor(*F, Path.str())); + + // FIXME: errc::not_a_file? + return make_error_code(llvm::errc::invalid_argument); +} + +namespace { + +/// Adaptor from InMemoryDir::iterator to directory_iterator. +class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl { + detail::InMemoryDirectory::const_iterator I; + detail::InMemoryDirectory::const_iterator E; + std::string RequestedDirName; + + void setCurrentEntry() { + if (I != E) { + SmallString<256> Path(RequestedDirName); + llvm::sys::path::append(Path, I->second->getFileName()); + sys::fs::file_type Type; + switch (I->second->getKind()) { + case detail::IME_File: + case detail::IME_HardLink: + Type = sys::fs::file_type::regular_file; + break; + case detail::IME_Directory: + Type = sys::fs::file_type::directory_file; + break; + } + CurrentEntry = directory_entry(Path.str(), Type); + } else { + // When we're at the end, make CurrentEntry invalid and DirIterImpl will + // do the rest. + CurrentEntry = directory_entry(); + } + } + +public: + InMemoryDirIterator() = default; + + explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir, + std::string RequestedDirName) + : I(Dir.begin()), E(Dir.end()), + RequestedDirName(std::move(RequestedDirName)) { + setCurrentEntry(); + } + + std::error_code increment() override { + ++I; + setCurrentEntry(); + return {}; + } +}; + +} // namespace + +directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir, + std::error_code &EC) { + auto Node = lookupInMemoryNode(*this, Root.get(), Dir); + if (!Node) { + EC = Node.getError(); + return directory_iterator(std::make_shared<InMemoryDirIterator>()); + } + + if (auto *DirNode = dyn_cast<detail::InMemoryDirectory>(*Node)) + return directory_iterator( + std::make_shared<InMemoryDirIterator>(*DirNode, Dir.str())); + + EC = make_error_code(llvm::errc::not_a_directory); + return directory_iterator(std::make_shared<InMemoryDirIterator>()); +} + +std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) { + SmallString<128> Path; + P.toVector(Path); + + // Fix up relative paths. This just prepends the current working directory. + std::error_code EC = makeAbsolute(Path); + assert(!EC); + (void)EC; + + if (useNormalizedPaths()) + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + + if (!Path.empty()) + WorkingDirectory = Path.str(); + return {}; +} + +std::error_code +InMemoryFileSystem::getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const { + auto CWD = getCurrentWorkingDirectory(); + if (!CWD || CWD->empty()) + return errc::operation_not_permitted; + Path.toVector(Output); + if (auto EC = makeAbsolute(Output)) + return EC; + llvm::sys::path::remove_dots(Output, /*remove_dot_dot=*/true); + return {}; +} + +std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) { + Result = false; + return {}; +} + +} // namespace vfs +} // namespace llvm + +//===-----------------------------------------------------------------------===/ +// RedirectingFileSystem implementation +//===-----------------------------------------------------------------------===/ + +RedirectingFileSystem::RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> FS) + : ExternalFS(std::move(FS)) { + if (ExternalFS) + if (auto ExternalWorkingDirectory = + ExternalFS->getCurrentWorkingDirectory()) { + WorkingDirectory = *ExternalWorkingDirectory; + ExternalFSValidWD = true; + } +} + +// FIXME: reuse implementation common with OverlayFSDirIterImpl as these +// iterators are conceptually similar. +class llvm::vfs::VFSFromYamlDirIterImpl + : public llvm::vfs::detail::DirIterImpl { + std::string Dir; + RedirectingFileSystem::RedirectingDirectoryEntry::iterator Current, End; + + // To handle 'fallthrough' mode we need to iterate at first through + // RedirectingDirectoryEntry and then through ExternalFS. These operations are + // done sequentially, we just need to keep a track of what kind of iteration + // we are currently performing. + + /// Flag telling if we should iterate through ExternalFS or stop at the last + /// RedirectingDirectoryEntry::iterator. + bool IterateExternalFS; + /// Flag telling if we have switched to iterating through ExternalFS. + bool IsExternalFSCurrent = false; + FileSystem &ExternalFS; + directory_iterator ExternalDirIter; + llvm::StringSet<> SeenNames; + + /// To combine multiple iterations, different methods are responsible for + /// different iteration steps. + /// @{ + + /// Responsible for dispatching between RedirectingDirectoryEntry iteration + /// and ExternalFS iteration. + std::error_code incrementImpl(bool IsFirstTime); + /// Responsible for RedirectingDirectoryEntry iteration. + std::error_code incrementContent(bool IsFirstTime); + /// Responsible for ExternalFS iteration. + std::error_code incrementExternal(); + /// @} + +public: + VFSFromYamlDirIterImpl( + const Twine &Path, + RedirectingFileSystem::RedirectingDirectoryEntry::iterator Begin, + RedirectingFileSystem::RedirectingDirectoryEntry::iterator End, + bool IterateExternalFS, FileSystem &ExternalFS, std::error_code &EC); + + std::error_code increment() override; +}; + +llvm::ErrorOr<std::string> +RedirectingFileSystem::getCurrentWorkingDirectory() const { + return WorkingDirectory; +} + +std::error_code +RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) { + // Don't change the working directory if the path doesn't exist. + if (!exists(Path)) + return errc::no_such_file_or_directory; + + // Always change the external FS but ignore its result. + if (ExternalFS) { + auto EC = ExternalFS->setCurrentWorkingDirectory(Path); + ExternalFSValidWD = !static_cast<bool>(EC); + } + + SmallString<128> AbsolutePath; + Path.toVector(AbsolutePath); + if (std::error_code EC = makeAbsolute(AbsolutePath)) + return EC; + WorkingDirectory = AbsolutePath.str(); + return {}; +} + +std::error_code RedirectingFileSystem::isLocal(const Twine &Path, + bool &Result) { + return ExternalFS->isLocal(Path, Result); +} + +directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, + std::error_code &EC) { + ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Dir); + if (!E) { + EC = E.getError(); + if (shouldUseExternalFS() && EC == errc::no_such_file_or_directory) + return ExternalFS->dir_begin(Dir, EC); + return {}; + } + ErrorOr<Status> S = status(Dir, *E); + if (!S) { + EC = S.getError(); + return {}; + } + if (!S->isDirectory()) { + EC = std::error_code(static_cast<int>(errc::not_a_directory), + std::system_category()); + return {}; + } + + auto *D = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(*E); + return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>( + Dir, D->contents_begin(), D->contents_end(), + /*IterateExternalFS=*/shouldUseExternalFS(), *ExternalFS, EC)); +} + +void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) { + ExternalContentsPrefixDir = PrefixDir.str(); +} + +StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { + return ExternalContentsPrefixDir; +} + +void RedirectingFileSystem::dump(raw_ostream &OS) const { + for (const auto &Root : Roots) + dumpEntry(OS, Root.get()); +} + +void RedirectingFileSystem::dumpEntry(raw_ostream &OS, + RedirectingFileSystem::Entry *E, + int NumSpaces) const { + StringRef Name = E->getName(); + for (int i = 0, e = NumSpaces; i < e; ++i) + OS << " "; + OS << "'" << Name.str().c_str() << "'" + << "\n"; + + if (E->getKind() == RedirectingFileSystem::EK_Directory) { + auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E); + assert(DE && "Should be a directory"); + + for (std::unique_ptr<Entry> &SubEntry : + llvm::make_range(DE->contents_begin(), DE->contents_end())) + dumpEntry(OS, SubEntry.get(), NumSpaces + 2); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { dump(dbgs()); } +#endif + +/// A helper class to hold the common YAML parsing state. +class llvm::vfs::RedirectingFileSystemParser { + yaml::Stream &Stream; + + void error(yaml::Node *N, const Twine &Msg) { Stream.printError(N, Msg); } + + // false on error + bool parseScalarString(yaml::Node *N, StringRef &Result, + SmallVectorImpl<char> &Storage) { + const auto *S = dyn_cast<yaml::ScalarNode>(N); + + if (!S) { + error(N, "expected string"); + return false; + } + Result = S->getValue(Storage); + return true; + } + + // false on error + bool parseScalarBool(yaml::Node *N, bool &Result) { + SmallString<5> Storage; + StringRef Value; + if (!parseScalarString(N, Value, Storage)) + return false; + + if (Value.equals_lower("true") || Value.equals_lower("on") || + Value.equals_lower("yes") || Value == "1") { + Result = true; + return true; + } else if (Value.equals_lower("false") || Value.equals_lower("off") || + Value.equals_lower("no") || Value == "0") { + Result = false; + return true; + } + + error(N, "expected boolean value"); + return false; + } + + struct KeyStatus { + bool Required; + bool Seen = false; + + KeyStatus(bool Required = false) : Required(Required) {} + }; + + using KeyStatusPair = std::pair<StringRef, KeyStatus>; + + // false on error + bool checkDuplicateOrUnknownKey(yaml::Node *KeyNode, StringRef Key, + DenseMap<StringRef, KeyStatus> &Keys) { + if (!Keys.count(Key)) { + error(KeyNode, "unknown key"); + return false; + } + KeyStatus &S = Keys[Key]; + if (S.Seen) { + error(KeyNode, Twine("duplicate key '") + Key + "'"); + return false; + } + S.Seen = true; + return true; + } + + // false on error + bool checkMissingKeys(yaml::Node *Obj, DenseMap<StringRef, KeyStatus> &Keys) { + for (const auto &I : Keys) { + if (I.second.Required && !I.second.Seen) { + error(Obj, Twine("missing key '") + I.first + "'"); + return false; + } + } + return true; + } + + RedirectingFileSystem::Entry * + lookupOrCreateEntry(RedirectingFileSystem *FS, StringRef Name, + RedirectingFileSystem::Entry *ParentEntry = nullptr) { + if (!ParentEntry) { // Look for a existent root + for (const auto &Root : FS->Roots) { + if (Name.equals(Root->getName())) { + ParentEntry = Root.get(); + return ParentEntry; + } + } + } else { // Advance to the next component + auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>( + ParentEntry); + for (std::unique_ptr<RedirectingFileSystem::Entry> &Content : + llvm::make_range(DE->contents_begin(), DE->contents_end())) { + auto *DirContent = + dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>( + Content.get()); + if (DirContent && Name.equals(Content->getName())) + return DirContent; + } + } + + // ... or create a new one + std::unique_ptr<RedirectingFileSystem::Entry> E = + std::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>( + Name, Status("", getNextVirtualUniqueID(), + std::chrono::system_clock::now(), 0, 0, 0, + file_type::directory_file, sys::fs::all_all)); + + if (!ParentEntry) { // Add a new root to the overlay + FS->Roots.push_back(std::move(E)); + ParentEntry = FS->Roots.back().get(); + return ParentEntry; + } + + auto *DE = + cast<RedirectingFileSystem::RedirectingDirectoryEntry>(ParentEntry); + DE->addContent(std::move(E)); + return DE->getLastContent(); + } + + void uniqueOverlayTree(RedirectingFileSystem *FS, + RedirectingFileSystem::Entry *SrcE, + RedirectingFileSystem::Entry *NewParentE = nullptr) { + StringRef Name = SrcE->getName(); + switch (SrcE->getKind()) { + case RedirectingFileSystem::EK_Directory: { + auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE); + // Empty directories could be present in the YAML as a way to + // describe a file for a current directory after some of its subdir + // is parsed. This only leads to redundant walks, ignore it. + if (!Name.empty()) + NewParentE = lookupOrCreateEntry(FS, Name, NewParentE); + for (std::unique_ptr<RedirectingFileSystem::Entry> &SubEntry : + llvm::make_range(DE->contents_begin(), DE->contents_end())) + uniqueOverlayTree(FS, SubEntry.get(), NewParentE); + break; + } + case RedirectingFileSystem::EK_File: { + assert(NewParentE && "Parent entry must exist"); + auto *FE = cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE); + auto *DE = + cast<RedirectingFileSystem::RedirectingDirectoryEntry>(NewParentE); + DE->addContent( + std::make_unique<RedirectingFileSystem::RedirectingFileEntry>( + Name, FE->getExternalContentsPath(), FE->getUseName())); + break; + } + } + } + + std::unique_ptr<RedirectingFileSystem::Entry> + parseEntry(yaml::Node *N, RedirectingFileSystem *FS, bool IsRootEntry) { + auto *M = dyn_cast<yaml::MappingNode>(N); + if (!M) { + error(N, "expected mapping node for file or directory entry"); + return nullptr; + } + + KeyStatusPair Fields[] = { + KeyStatusPair("name", true), + KeyStatusPair("type", true), + KeyStatusPair("contents", false), + KeyStatusPair("external-contents", false), + KeyStatusPair("use-external-name", false), + }; + + DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields)); + + bool HasContents = false; // external or otherwise + std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> + EntryArrayContents; + std::string ExternalContentsPath; + std::string Name; + yaml::Node *NameValueNode = nullptr; + auto UseExternalName = + RedirectingFileSystem::RedirectingFileEntry::NK_NotSet; + RedirectingFileSystem::EntryKind Kind; + + for (auto &I : *M) { + StringRef Key; + // Reuse the buffer for key and value, since we don't look at key after + // parsing value. + SmallString<256> Buffer; + if (!parseScalarString(I.getKey(), Key, Buffer)) + return nullptr; + + if (!checkDuplicateOrUnknownKey(I.getKey(), Key, Keys)) + return nullptr; + + StringRef Value; + if (Key == "name") { + if (!parseScalarString(I.getValue(), Value, Buffer)) + return nullptr; + + NameValueNode = I.getValue(); + if (FS->UseCanonicalizedPaths) { + SmallString<256> Path(Value); + // Guarantee that old YAML files containing paths with ".." and "." + // are properly canonicalized before read into the VFS. + Path = sys::path::remove_leading_dotslash(Path); + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + Name = Path.str(); + } else { + Name = Value; + } + } else if (Key == "type") { + if (!parseScalarString(I.getValue(), Value, Buffer)) + return nullptr; + if (Value == "file") + Kind = RedirectingFileSystem::EK_File; + else if (Value == "directory") + Kind = RedirectingFileSystem::EK_Directory; + else { + error(I.getValue(), "unknown value for 'type'"); + return nullptr; + } + } else if (Key == "contents") { + if (HasContents) { + error(I.getKey(), + "entry already has 'contents' or 'external-contents'"); + return nullptr; + } + HasContents = true; + auto *Contents = dyn_cast<yaml::SequenceNode>(I.getValue()); + if (!Contents) { + // FIXME: this is only for directories, what about files? + error(I.getValue(), "expected array"); + return nullptr; + } + + for (auto &I : *Contents) { + if (std::unique_ptr<RedirectingFileSystem::Entry> E = + parseEntry(&I, FS, /*IsRootEntry*/ false)) + EntryArrayContents.push_back(std::move(E)); + else + return nullptr; + } + } else if (Key == "external-contents") { + if (HasContents) { + error(I.getKey(), + "entry already has 'contents' or 'external-contents'"); + return nullptr; + } + HasContents = true; + if (!parseScalarString(I.getValue(), Value, Buffer)) + return nullptr; + + SmallString<256> FullPath; + if (FS->IsRelativeOverlay) { + FullPath = FS->getExternalContentsPrefixDir(); + assert(!FullPath.empty() && + "External contents prefix directory must exist"); + llvm::sys::path::append(FullPath, Value); + } else { + FullPath = Value; + } + + if (FS->UseCanonicalizedPaths) { + // Guarantee that old YAML files containing paths with ".." and "." + // are properly canonicalized before read into the VFS. + FullPath = sys::path::remove_leading_dotslash(FullPath); + sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true); + } + ExternalContentsPath = FullPath.str(); + } else if (Key == "use-external-name") { + bool Val; + if (!parseScalarBool(I.getValue(), Val)) + return nullptr; + UseExternalName = + Val ? RedirectingFileSystem::RedirectingFileEntry::NK_External + : RedirectingFileSystem::RedirectingFileEntry::NK_Virtual; + } else { + llvm_unreachable("key missing from Keys"); + } + } + + if (Stream.failed()) + return nullptr; + + // check for missing keys + if (!HasContents) { + error(N, "missing key 'contents' or 'external-contents'"); + return nullptr; + } + if (!checkMissingKeys(N, Keys)) + return nullptr; + + // check invalid configuration + if (Kind == RedirectingFileSystem::EK_Directory && + UseExternalName != + RedirectingFileSystem::RedirectingFileEntry::NK_NotSet) { + error(N, "'use-external-name' is not supported for directories"); + return nullptr; + } + + if (IsRootEntry && !sys::path::is_absolute(Name)) { + assert(NameValueNode && "Name presence should be checked earlier"); + error(NameValueNode, + "entry with relative path at the root level is not discoverable"); + return nullptr; + } + + // Remove trailing slash(es), being careful not to remove the root path + StringRef Trimmed(Name); + size_t RootPathLen = sys::path::root_path(Trimmed).size(); + while (Trimmed.size() > RootPathLen && + sys::path::is_separator(Trimmed.back())) + Trimmed = Trimmed.slice(0, Trimmed.size() - 1); + // Get the last component + StringRef LastComponent = sys::path::filename(Trimmed); + + std::unique_ptr<RedirectingFileSystem::Entry> Result; + switch (Kind) { + case RedirectingFileSystem::EK_File: + Result = std::make_unique<RedirectingFileSystem::RedirectingFileEntry>( + LastComponent, std::move(ExternalContentsPath), UseExternalName); + break; + case RedirectingFileSystem::EK_Directory: + Result = + std::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>( + LastComponent, std::move(EntryArrayContents), + Status("", getNextVirtualUniqueID(), + std::chrono::system_clock::now(), 0, 0, 0, + file_type::directory_file, sys::fs::all_all)); + break; + } + + StringRef Parent = sys::path::parent_path(Trimmed); + if (Parent.empty()) + return Result; + + // if 'name' contains multiple components, create implicit directory entries + for (sys::path::reverse_iterator I = sys::path::rbegin(Parent), + E = sys::path::rend(Parent); + I != E; ++I) { + std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> Entries; + Entries.push_back(std::move(Result)); + Result = + std::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>( + *I, std::move(Entries), + Status("", getNextVirtualUniqueID(), + std::chrono::system_clock::now(), 0, 0, 0, + file_type::directory_file, sys::fs::all_all)); + } + return Result; + } + +public: + RedirectingFileSystemParser(yaml::Stream &S) : Stream(S) {} + + // false on error + bool parse(yaml::Node *Root, RedirectingFileSystem *FS) { + auto *Top = dyn_cast<yaml::MappingNode>(Root); + if (!Top) { + error(Root, "expected mapping node"); + return false; + } + + KeyStatusPair Fields[] = { + KeyStatusPair("version", true), + KeyStatusPair("case-sensitive", false), + KeyStatusPair("use-external-names", false), + KeyStatusPair("overlay-relative", false), + KeyStatusPair("fallthrough", false), + KeyStatusPair("roots", true), + }; + + DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields)); + std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> RootEntries; + + // Parse configuration and 'roots' + for (auto &I : *Top) { + SmallString<10> KeyBuffer; + StringRef Key; + if (!parseScalarString(I.getKey(), Key, KeyBuffer)) + return false; + + if (!checkDuplicateOrUnknownKey(I.getKey(), Key, Keys)) + return false; + + if (Key == "roots") { + auto *Roots = dyn_cast<yaml::SequenceNode>(I.getValue()); + if (!Roots) { + error(I.getValue(), "expected array"); + return false; + } + + for (auto &I : *Roots) { + if (std::unique_ptr<RedirectingFileSystem::Entry> E = + parseEntry(&I, FS, /*IsRootEntry*/ true)) + RootEntries.push_back(std::move(E)); + else + return false; + } + } else if (Key == "version") { + StringRef VersionString; + SmallString<4> Storage; + if (!parseScalarString(I.getValue(), VersionString, Storage)) + return false; + int Version; + if (VersionString.getAsInteger<int>(10, Version)) { + error(I.getValue(), "expected integer"); + return false; + } + if (Version < 0) { + error(I.getValue(), "invalid version number"); + return false; + } + if (Version != 0) { + error(I.getValue(), "version mismatch, expected 0"); + return false; + } + } else if (Key == "case-sensitive") { + if (!parseScalarBool(I.getValue(), FS->CaseSensitive)) + return false; + } else if (Key == "overlay-relative") { + if (!parseScalarBool(I.getValue(), FS->IsRelativeOverlay)) + return false; + } else if (Key == "use-external-names") { + if (!parseScalarBool(I.getValue(), FS->UseExternalNames)) + return false; + } else if (Key == "fallthrough") { + if (!parseScalarBool(I.getValue(), FS->IsFallthrough)) + return false; + } else { + llvm_unreachable("key missing from Keys"); + } + } + + if (Stream.failed()) + return false; + + if (!checkMissingKeys(Top, Keys)) + return false; + + // Now that we sucessefully parsed the YAML file, canonicalize the internal + // representation to a proper directory tree so that we can search faster + // inside the VFS. + for (auto &E : RootEntries) + uniqueOverlayTree(FS, E.get()); + + return true; + } +}; + +RedirectingFileSystem * +RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer, + SourceMgr::DiagHandlerTy DiagHandler, + StringRef YAMLFilePath, void *DiagContext, + IntrusiveRefCntPtr<FileSystem> ExternalFS) { + SourceMgr SM; + yaml::Stream Stream(Buffer->getMemBufferRef(), SM); + + SM.setDiagHandler(DiagHandler, DiagContext); + yaml::document_iterator DI = Stream.begin(); + yaml::Node *Root = DI->getRoot(); + if (DI == Stream.end() || !Root) { + SM.PrintMessage(SMLoc(), SourceMgr::DK_Error, "expected root node"); + return nullptr; + } + + RedirectingFileSystemParser P(Stream); + + std::unique_ptr<RedirectingFileSystem> FS( + new RedirectingFileSystem(ExternalFS)); + + if (!YAMLFilePath.empty()) { + // Use the YAML path from -ivfsoverlay to compute the dir to be prefixed + // to each 'external-contents' path. + // + // Example: + // -ivfsoverlay dummy.cache/vfs/vfs.yaml + // yields: + // FS->ExternalContentsPrefixDir => /<absolute_path_to>/dummy.cache/vfs + // + SmallString<256> OverlayAbsDir = sys::path::parent_path(YAMLFilePath); + std::error_code EC = llvm::sys::fs::make_absolute(OverlayAbsDir); + assert(!EC && "Overlay dir final path must be absolute"); + (void)EC; + FS->setExternalContentsPrefixDir(OverlayAbsDir); + } + + if (!P.parse(Root, FS.get())) + return nullptr; + + return FS.release(); +} + +ErrorOr<RedirectingFileSystem::Entry *> +RedirectingFileSystem::lookupPath(const Twine &Path_) const { + SmallString<256> Path; + Path_.toVector(Path); + + // Handle relative paths + if (std::error_code EC = makeAbsolute(Path)) + return EC; + + // Canonicalize path by removing ".", "..", "./", etc components. This is + // a VFS request, do bot bother about symlinks in the path components + // but canonicalize in order to perform the correct entry search. + if (UseCanonicalizedPaths) { + Path = sys::path::remove_leading_dotslash(Path); + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + } + + if (Path.empty()) + return make_error_code(llvm::errc::invalid_argument); + + sys::path::const_iterator Start = sys::path::begin(Path); + sys::path::const_iterator End = sys::path::end(Path); + for (const auto &Root : Roots) { + ErrorOr<RedirectingFileSystem::Entry *> Result = + lookupPath(Start, End, Root.get()); + if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) + return Result; + } + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +ErrorOr<RedirectingFileSystem::Entry *> +RedirectingFileSystem::lookupPath(sys::path::const_iterator Start, + sys::path::const_iterator End, + RedirectingFileSystem::Entry *From) const { +#ifndef _WIN32 + assert(!isTraversalComponent(*Start) && + !isTraversalComponent(From->getName()) && + "Paths should not contain traversal components"); +#else + // FIXME: this is here to support windows, remove it once canonicalized + // paths become globally default. + if (Start->equals(".")) + ++Start; +#endif + + StringRef FromName = From->getName(); + + // Forward the search to the next component in case this is an empty one. + if (!FromName.empty()) { + if (CaseSensitive ? !Start->equals(FromName) + : !Start->equals_lower(FromName)) + // failure to match + return make_error_code(llvm::errc::no_such_file_or_directory); + + ++Start; + + if (Start == End) { + // Match! + return From; + } + } + + auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(From); + if (!DE) + return make_error_code(llvm::errc::not_a_directory); + + for (const std::unique_ptr<RedirectingFileSystem::Entry> &DirEntry : + llvm::make_range(DE->contents_begin(), DE->contents_end())) { + ErrorOr<RedirectingFileSystem::Entry *> Result = + lookupPath(Start, End, DirEntry.get()); + if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) + return Result; + } + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames, + Status ExternalStatus) { + Status S = ExternalStatus; + if (!UseExternalNames) + S = Status::copyWithNewName(S, Path); + S.IsVFSMapped = true; + return S; +} + +ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path, + RedirectingFileSystem::Entry *E) { + assert(E != nullptr); + if (auto *F = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(E)) { + ErrorOr<Status> S = ExternalFS->status(F->getExternalContentsPath()); + assert(!S || S->getName() == F->getExternalContentsPath()); + if (S) + return getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames), + *S); + return S; + } else { // directory + auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E); + return Status::copyWithNewName(DE->getStatus(), Path); + } +} + +ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path) { + ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path); + if (!Result) { + if (shouldUseExternalFS() && + Result.getError() == llvm::errc::no_such_file_or_directory) { + return ExternalFS->status(Path); + } + return Result.getError(); + } + return status(Path, *Result); +} + +namespace { + +/// Provide a file wrapper with an overriden status. +class FileWithFixedStatus : public File { + std::unique_ptr<File> InnerFile; + Status S; + +public: + FileWithFixedStatus(std::unique_ptr<File> InnerFile, Status S) + : InnerFile(std::move(InnerFile)), S(std::move(S)) {} + + ErrorOr<Status> status() override { return S; } + ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> + + getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, + bool IsVolatile) override { + return InnerFile->getBuffer(Name, FileSize, RequiresNullTerminator, + IsVolatile); + } + + std::error_code close() override { return InnerFile->close(); } +}; + +} // namespace + +ErrorOr<std::unique_ptr<File>> +RedirectingFileSystem::openFileForRead(const Twine &Path) { + ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Path); + if (!E) { + if (shouldUseExternalFS() && + E.getError() == llvm::errc::no_such_file_or_directory) { + return ExternalFS->openFileForRead(Path); + } + return E.getError(); + } + + auto *F = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(*E); + if (!F) // FIXME: errc::not_a_file? + return make_error_code(llvm::errc::invalid_argument); + + auto Result = ExternalFS->openFileForRead(F->getExternalContentsPath()); + if (!Result) + return Result; + + auto ExternalStatus = (*Result)->status(); + if (!ExternalStatus) + return ExternalStatus.getError(); + + // FIXME: Update the status with the name and VFSMapped. + Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames), + *ExternalStatus); + return std::unique_ptr<File>( + std::make_unique<FileWithFixedStatus>(std::move(*Result), S)); +} + +std::error_code +RedirectingFileSystem::getRealPath(const Twine &Path, + SmallVectorImpl<char> &Output) const { + ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path); + if (!Result) { + if (shouldUseExternalFS() && + Result.getError() == llvm::errc::no_such_file_or_directory) { + return ExternalFS->getRealPath(Path, Output); + } + return Result.getError(); + } + + if (auto *F = + dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(*Result)) { + return ExternalFS->getRealPath(F->getExternalContentsPath(), Output); + } + // Even if there is a directory entry, fall back to ExternalFS if allowed, + // because directories don't have a single external contents path. + return shouldUseExternalFS() ? ExternalFS->getRealPath(Path, Output) + : llvm::errc::invalid_argument; +} + +IntrusiveRefCntPtr<FileSystem> +vfs::getVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer, + SourceMgr::DiagHandlerTy DiagHandler, + StringRef YAMLFilePath, void *DiagContext, + IntrusiveRefCntPtr<FileSystem> ExternalFS) { + return RedirectingFileSystem::create(std::move(Buffer), DiagHandler, + YAMLFilePath, DiagContext, + std::move(ExternalFS)); +} + +static void getVFSEntries(RedirectingFileSystem::Entry *SrcE, + SmallVectorImpl<StringRef> &Path, + SmallVectorImpl<YAMLVFSEntry> &Entries) { + auto Kind = SrcE->getKind(); + if (Kind == RedirectingFileSystem::EK_Directory) { + auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE); + assert(DE && "Must be a directory"); + for (std::unique_ptr<RedirectingFileSystem::Entry> &SubEntry : + llvm::make_range(DE->contents_begin(), DE->contents_end())) { + Path.push_back(SubEntry->getName()); + getVFSEntries(SubEntry.get(), Path, Entries); + Path.pop_back(); + } + return; + } + + assert(Kind == RedirectingFileSystem::EK_File && "Must be a EK_File"); + auto *FE = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE); + assert(FE && "Must be a file"); + SmallString<128> VPath; + for (auto &Comp : Path) + llvm::sys::path::append(VPath, Comp); + Entries.push_back(YAMLVFSEntry(VPath.c_str(), FE->getExternalContentsPath())); +} + +void vfs::collectVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer, + SourceMgr::DiagHandlerTy DiagHandler, + StringRef YAMLFilePath, + SmallVectorImpl<YAMLVFSEntry> &CollectedEntries, + void *DiagContext, + IntrusiveRefCntPtr<FileSystem> ExternalFS) { + RedirectingFileSystem *VFS = RedirectingFileSystem::create( + std::move(Buffer), DiagHandler, YAMLFilePath, DiagContext, + std::move(ExternalFS)); + ErrorOr<RedirectingFileSystem::Entry *> RootE = VFS->lookupPath("/"); + if (!RootE) + return; + SmallVector<StringRef, 8> Components; + Components.push_back("/"); + getVFSEntries(*RootE, Components, CollectedEntries); +} + +UniqueID vfs::getNextVirtualUniqueID() { + static std::atomic<unsigned> UID; + unsigned ID = ++UID; + // The following assumes that uint64_t max will never collide with a real + // dev_t value from the OS. + return UniqueID(std::numeric_limits<uint64_t>::max(), ID); +} + +void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) { + assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute"); + assert(sys::path::is_absolute(RealPath) && "real path not absolute"); + assert(!pathHasTraversal(VirtualPath) && "path traversal is not supported"); + Mappings.emplace_back(VirtualPath, RealPath); +} + +namespace { + +class JSONWriter { + llvm::raw_ostream &OS; + SmallVector<StringRef, 16> DirStack; + + unsigned getDirIndent() { return 4 * DirStack.size(); } + unsigned getFileIndent() { return 4 * (DirStack.size() + 1); } + bool containedIn(StringRef Parent, StringRef Path); + StringRef containedPart(StringRef Parent, StringRef Path); + void startDirectory(StringRef Path); + void endDirectory(); + void writeEntry(StringRef VPath, StringRef RPath); + +public: + JSONWriter(llvm::raw_ostream &OS) : OS(OS) {} + + void write(ArrayRef<YAMLVFSEntry> Entries, Optional<bool> UseExternalNames, + Optional<bool> IsCaseSensitive, Optional<bool> IsOverlayRelative, + StringRef OverlayDir); +}; + +} // namespace + +bool JSONWriter::containedIn(StringRef Parent, StringRef Path) { + using namespace llvm::sys; + + // Compare each path component. + auto IParent = path::begin(Parent), EParent = path::end(Parent); + for (auto IChild = path::begin(Path), EChild = path::end(Path); + IParent != EParent && IChild != EChild; ++IParent, ++IChild) { + if (*IParent != *IChild) + return false; + } + // Have we exhausted the parent path? + return IParent == EParent; +} + +StringRef JSONWriter::containedPart(StringRef Parent, StringRef Path) { + assert(!Parent.empty()); + assert(containedIn(Parent, Path)); + return Path.slice(Parent.size() + 1, StringRef::npos); +} + +void JSONWriter::startDirectory(StringRef Path) { + StringRef Name = + DirStack.empty() ? Path : containedPart(DirStack.back(), Path); + DirStack.push_back(Path); + unsigned Indent = getDirIndent(); + OS.indent(Indent) << "{\n"; + OS.indent(Indent + 2) << "'type': 'directory',\n"; + OS.indent(Indent + 2) << "'name': \"" << llvm::yaml::escape(Name) << "\",\n"; + OS.indent(Indent + 2) << "'contents': [\n"; +} + +void JSONWriter::endDirectory() { + unsigned Indent = getDirIndent(); + OS.indent(Indent + 2) << "]\n"; + OS.indent(Indent) << "}"; + + DirStack.pop_back(); +} + +void JSONWriter::writeEntry(StringRef VPath, StringRef RPath) { + unsigned Indent = getFileIndent(); + OS.indent(Indent) << "{\n"; + OS.indent(Indent + 2) << "'type': 'file',\n"; + OS.indent(Indent + 2) << "'name': \"" << llvm::yaml::escape(VPath) << "\",\n"; + OS.indent(Indent + 2) << "'external-contents': \"" + << llvm::yaml::escape(RPath) << "\"\n"; + OS.indent(Indent) << "}"; +} + +void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries, + Optional<bool> UseExternalNames, + Optional<bool> IsCaseSensitive, + Optional<bool> IsOverlayRelative, + StringRef OverlayDir) { + using namespace llvm::sys; + + OS << "{\n" + " 'version': 0,\n"; + if (IsCaseSensitive.hasValue()) + OS << " 'case-sensitive': '" + << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n"; + if (UseExternalNames.hasValue()) + OS << " 'use-external-names': '" + << (UseExternalNames.getValue() ? "true" : "false") << "',\n"; + bool UseOverlayRelative = false; + if (IsOverlayRelative.hasValue()) { + UseOverlayRelative = IsOverlayRelative.getValue(); + OS << " 'overlay-relative': '" << (UseOverlayRelative ? "true" : "false") + << "',\n"; + } + OS << " 'roots': [\n"; + + if (!Entries.empty()) { + const YAMLVFSEntry &Entry = Entries.front(); + startDirectory(path::parent_path(Entry.VPath)); + + StringRef RPath = Entry.RPath; + if (UseOverlayRelative) { + unsigned OverlayDirLen = OverlayDir.size(); + assert(RPath.substr(0, OverlayDirLen) == OverlayDir && + "Overlay dir must be contained in RPath"); + RPath = RPath.slice(OverlayDirLen, RPath.size()); + } + + writeEntry(path::filename(Entry.VPath), RPath); + + for (const auto &Entry : Entries.slice(1)) { + StringRef Dir = path::parent_path(Entry.VPath); + if (Dir == DirStack.back()) + OS << ",\n"; + else { + while (!DirStack.empty() && !containedIn(DirStack.back(), Dir)) { + OS << "\n"; + endDirectory(); + } + OS << ",\n"; + startDirectory(Dir); + } + StringRef RPath = Entry.RPath; + if (UseOverlayRelative) { + unsigned OverlayDirLen = OverlayDir.size(); + assert(RPath.substr(0, OverlayDirLen) == OverlayDir && + "Overlay dir must be contained in RPath"); + RPath = RPath.slice(OverlayDirLen, RPath.size()); + } + writeEntry(path::filename(Entry.VPath), RPath); + } + + while (!DirStack.empty()) { + OS << "\n"; + endDirectory(); + } + OS << "\n"; + } + + OS << " ]\n" + << "}\n"; +} + +void YAMLVFSWriter::write(llvm::raw_ostream &OS) { + llvm::sort(Mappings, [](const YAMLVFSEntry &LHS, const YAMLVFSEntry &RHS) { + return LHS.VPath < RHS.VPath; + }); + + JSONWriter(OS).write(Mappings, UseExternalNames, IsCaseSensitive, + IsOverlayRelative, OverlayDir); +} + +VFSFromYamlDirIterImpl::VFSFromYamlDirIterImpl( + const Twine &_Path, + RedirectingFileSystem::RedirectingDirectoryEntry::iterator Begin, + RedirectingFileSystem::RedirectingDirectoryEntry::iterator End, + bool IterateExternalFS, FileSystem &ExternalFS, std::error_code &EC) + : Dir(_Path.str()), Current(Begin), End(End), + IterateExternalFS(IterateExternalFS), ExternalFS(ExternalFS) { + EC = incrementImpl(/*IsFirstTime=*/true); +} + +std::error_code VFSFromYamlDirIterImpl::increment() { + return incrementImpl(/*IsFirstTime=*/false); +} + +std::error_code VFSFromYamlDirIterImpl::incrementExternal() { + assert(!(IsExternalFSCurrent && ExternalDirIter == directory_iterator()) && + "incrementing past end"); + std::error_code EC; + if (IsExternalFSCurrent) { + ExternalDirIter.increment(EC); + } else if (IterateExternalFS) { + ExternalDirIter = ExternalFS.dir_begin(Dir, EC); + IsExternalFSCurrent = true; + if (EC && EC != errc::no_such_file_or_directory) + return EC; + EC = {}; + } + if (EC || ExternalDirIter == directory_iterator()) { + CurrentEntry = directory_entry(); + } else { + CurrentEntry = *ExternalDirIter; + } + return EC; +} + +std::error_code VFSFromYamlDirIterImpl::incrementContent(bool IsFirstTime) { + assert((IsFirstTime || Current != End) && "cannot iterate past end"); + if (!IsFirstTime) + ++Current; + while (Current != End) { + SmallString<128> PathStr(Dir); + llvm::sys::path::append(PathStr, (*Current)->getName()); + sys::fs::file_type Type; + switch ((*Current)->getKind()) { + case RedirectingFileSystem::EK_Directory: + Type = sys::fs::file_type::directory_file; + break; + case RedirectingFileSystem::EK_File: + Type = sys::fs::file_type::regular_file; + break; + } + CurrentEntry = directory_entry(PathStr.str(), Type); + return {}; + } + return incrementExternal(); +} + +std::error_code VFSFromYamlDirIterImpl::incrementImpl(bool IsFirstTime) { + while (true) { + std::error_code EC = IsExternalFSCurrent ? incrementExternal() + : incrementContent(IsFirstTime); + if (EC || CurrentEntry.path().empty()) + return EC; + StringRef Name = llvm::sys::path::filename(CurrentEntry.path()); + if (SeenNames.insert(Name).second) + return EC; // name not seen before + } + llvm_unreachable("returned above"); +} + +vfs::recursive_directory_iterator::recursive_directory_iterator( + FileSystem &FS_, const Twine &Path, std::error_code &EC) + : FS(&FS_) { + directory_iterator I = FS->dir_begin(Path, EC); + if (I != directory_iterator()) { + State = std::make_shared<detail::RecDirIterState>(); + State->Stack.push(I); + } +} + +vfs::recursive_directory_iterator & +recursive_directory_iterator::increment(std::error_code &EC) { + assert(FS && State && !State->Stack.empty() && "incrementing past end"); + assert(!State->Stack.top()->path().empty() && "non-canonical end iterator"); + vfs::directory_iterator End; + + if (State->HasNoPushRequest) + State->HasNoPushRequest = false; + else { + if (State->Stack.top()->type() == sys::fs::file_type::directory_file) { + vfs::directory_iterator I = FS->dir_begin(State->Stack.top()->path(), EC); + if (I != End) { + State->Stack.push(I); + return *this; + } + } + } + + while (!State->Stack.empty() && State->Stack.top().increment(EC) == End) + State->Stack.pop(); + + if (State->Stack.empty()) + State.reset(); // end iterator + + return *this; +} diff --git a/llvm/lib/Support/Watchdog.cpp b/llvm/lib/Support/Watchdog.cpp new file mode 100644 index 0000000000000..246f3dc7a0cad --- /dev/null +++ b/llvm/lib/Support/Watchdog.cpp @@ -0,0 +1,22 @@ +//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Watchdog class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Watchdog.h" +#include "llvm/Config/llvm-config.h" + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Watchdog.inc" +#endif +#ifdef _WIN32 +#include "Windows/Watchdog.inc" +#endif diff --git a/llvm/lib/Support/Windows/COM.inc b/llvm/lib/Support/Windows/COM.inc new file mode 100644 index 0000000000000..002182bc39394 --- /dev/null +++ b/llvm/lib/Support/Windows/COM.inc @@ -0,0 +1,36 @@ +//==- llvm/Support/Windows/COM.inc - Windows COM Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Windows portion of COM support. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Windows code. +//===----------------------------------------------------------------------===// + +#include <objbase.h> + +namespace llvm { +namespace sys { + +InitializeCOMRAII::InitializeCOMRAII(COMThreadingMode Threading, + bool SpeedOverMemory) { + DWORD Coinit = 0; + if (Threading == COMThreadingMode::SingleThreaded) + Coinit |= COINIT_APARTMENTTHREADED; + else + Coinit |= COINIT_MULTITHREADED; + if (SpeedOverMemory) + Coinit |= COINIT_SPEED_OVER_MEMORY; + ::CoInitializeEx(nullptr, Coinit); +} + +InitializeCOMRAII::~InitializeCOMRAII() { ::CoUninitialize(); } +} +} diff --git a/llvm/lib/Support/Windows/DynamicLibrary.inc b/llvm/lib/Support/Windows/DynamicLibrary.inc new file mode 100644 index 0000000000000..71b206c4cf9ee --- /dev/null +++ b/llvm/lib/Support/Windows/DynamicLibrary.inc @@ -0,0 +1,202 @@ +//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of DynamicLibrary. +// +//===----------------------------------------------------------------------===// + +#include "WindowsSupport.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/raw_ostream.h" + +#include <psapi.h> + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code. +//===----------------------------------------------------------------------===// + + +DynamicLibrary::HandleSet::~HandleSet() { + for (void *Handle : llvm::reverse(Handles)) + FreeLibrary(HMODULE(Handle)); + + // 'Process' should not be released on Windows. + assert((!Process || Process==this) && "Bad Handle"); + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; +} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + // Create the instance and return it to be the *Process* handle + // simillar to dlopen(NULL, RTLD_LAZY|RTLD_GLOBAL) + if (!File) + return &(*OpenedHandles); + + SmallVector<wchar_t, MAX_PATH> FileUnicode; + if (std::error_code ec = windows::UTF8ToUTF16(File, FileUnicode)) { + SetLastError(ec.value()); + MakeErrMsg(Err, std::string(File) + ": Can't convert to UTF-16"); + return &DynamicLibrary::Invalid; + } + + HMODULE Handle = LoadLibraryW(FileUnicode.data()); + if (Handle == NULL) { + MakeErrMsg(Err, std::string(File) + ": Can't open"); + return &DynamicLibrary::Invalid; + } + + return reinterpret_cast<void*>(Handle); +} + +static DynamicLibrary::HandleSet *IsOpenedHandlesInstance(void *Handle) { + if (!OpenedHandles.isConstructed()) + return nullptr; + DynamicLibrary::HandleSet &Inst = *OpenedHandles; + return Handle == &Inst ? &Inst : nullptr; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + if (HandleSet* HS = IsOpenedHandlesInstance(Handle)) + HS->Process = nullptr; // Just drop the *Process* handle. + else + FreeLibrary((HMODULE)Handle); +} + +static bool GetProcessModules(HANDLE H, DWORD &Bytes, HMODULE *Data = nullptr) { + // EnumProcessModules will fail on Windows 64 while some versions of + // MingW-32 don't have EnumProcessModulesEx. + if ( +#ifdef _WIN64 + !EnumProcessModulesEx(H, Data, Bytes, &Bytes, LIST_MODULES_64BIT) +#else + !EnumProcessModules(H, Data, Bytes, &Bytes) +#endif + ) { + std::string Err; + if (MakeErrMsg(&Err, "EnumProcessModules failure")) + llvm::errs() << Err << "\n"; + return false; + } + return true; +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + HandleSet* HS = IsOpenedHandlesInstance(Handle); + if (!HS) + return (void *)uintptr_t(GetProcAddress((HMODULE)Handle, Symbol)); + + // Could have done a dlclose on the *Process* handle + if (!HS->Process) + return nullptr; + + // Trials indicate EnumProcessModulesEx is consistantly faster than using + // EnumerateLoadedModules64 or CreateToolhelp32Snapshot. + // + // | Handles | DbgHelp.dll | CreateSnapshot | EnumProcessModulesEx + // |=========|=============|======================================== + // | 37 | 0.0000585 * | 0.0003031 | 0.0000152 + // | 1020 | 0.0026310 * | 0.0121598 | 0.0002683 + // | 2084 | 0.0149418 * | 0.0369936 | 0.0005610 + // + // * Not including the load time of Dbghelp.dll (~.005 sec) + // + // There's still a case to somehow cache the result of EnumProcessModulesEx + // across invocations, but the complication of doing that properly... + // Possibly using LdrRegisterDllNotification to invalidate the cache? + + DWORD Bytes = 0; + HMODULE Self = HMODULE(GetCurrentProcess()); + if (!GetProcessModules(Self, Bytes)) + return nullptr; + + // Get the most recent list in case any modules added/removed between calls + // to EnumProcessModulesEx that gets the amount of, then copies the HMODULES. + // MSDN is pretty clear that if the module list changes during the call to + // EnumProcessModulesEx the results should not be used. + std::vector<HMODULE> Handles; + do { + assert(Bytes && ((Bytes % sizeof(HMODULE)) == 0) && + "Should have at least one module and be aligned"); + Handles.resize(Bytes / sizeof(HMODULE)); + if (!GetProcessModules(Self, Bytes, Handles.data())) + return nullptr; + } while (Bytes != (Handles.size() * sizeof(HMODULE))); + + // Try EXE first, mirroring what dlsym(dlopen(NULL)) does. + if (FARPROC Ptr = GetProcAddress(HMODULE(Handles.front()), Symbol)) + return (void *) uintptr_t(Ptr); + + if (Handles.size() > 1) { + // This is different behaviour than what Posix dlsym(dlopen(NULL)) does. + // Doing that here is causing real problems for the JIT where msvc.dll + // and ucrt.dll can define the same symbols. The runtime linker will choose + // symbols from ucrt.dll first, but iterating NOT in reverse here would + // mean that the msvc.dll versions would be returned. + + for (auto I = Handles.rbegin(), E = Handles.rend()-1; I != E; ++I) { + if (FARPROC Ptr = GetProcAddress(HMODULE(*I), Symbol)) + return (void *) uintptr_t(Ptr); + } + } + return nullptr; +} + + +// Stack probing routines are in the support library (e.g. libgcc), but we don't +// have dynamic linking on windows. Provide a hook. +#define EXPLICIT_SYMBOL(SYM) \ + extern "C" { extern void *SYM; } +#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO) + +#ifdef _M_IX86 +// Win32 on x86 implements certain single-precision math functions as macros. +// These functions are not exported by the DLL, but will still be needed +// for symbol-resolution by the JIT loader. Therefore, this Support libray +// provides helper functions with the same implementation. + +#define INLINE_DEF_SYMBOL1(TYP, SYM) \ + extern "C" TYP inline_##SYM(TYP _X) { return SYM(_X); } +#define INLINE_DEF_SYMBOL2(TYP, SYM) \ + extern "C" TYP inline_##SYM(TYP _X, TYP _Y) { return SYM(_X, _Y); } +#endif + +#include "explicit_symbols.inc" + +#undef EXPLICIT_SYMBOL +#undef EXPLICIT_SYMBOL2 +#undef INLINE_DEF_SYMBOL1 +#undef INLINE_DEF_SYMBOL2 + +static void *DoSearch(const char *SymbolName) { + +#define EXPLICIT_SYMBOL(SYM) \ + if (!strcmp(SymbolName, #SYM)) \ + return (void *)&SYM; +#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \ + if (!strcmp(SymbolName, #SYMFROM)) \ + return (void *)&SYMTO; + +#ifdef _M_IX86 +#define INLINE_DEF_SYMBOL1(TYP, SYM) \ + if (!strcmp(SymbolName, #SYM)) \ + return (void *)&inline_##SYM; +#define INLINE_DEF_SYMBOL2(TYP, SYM) INLINE_DEF_SYMBOL1(TYP, SYM) +#endif + + { +#include "explicit_symbols.inc" + } + +#undef EXPLICIT_SYMBOL +#undef EXPLICIT_SYMBOL2 +#undef INLINE_DEF_SYMBOL1 +#undef INLINE_DEF_SYMBOL2 + + return nullptr; +} diff --git a/llvm/lib/Support/Windows/Host.inc b/llvm/lib/Support/Windows/Host.inc new file mode 100644 index 0000000000000..21b947f26df3e --- /dev/null +++ b/llvm/lib/Support/Windows/Host.inc @@ -0,0 +1,33 @@ +//===- llvm/Support/Win32/Host.inc ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Win32 Host support. +// +//===----------------------------------------------------------------------===// + +#include "WindowsSupport.h" +#include <cstdio> +#include <string> + +using namespace llvm; + +static std::string updateTripleOSVersion(std::string Triple) { + return Triple; +} + +std::string sys::getDefaultTargetTriple() { + const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE; + + // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + Triple = EnvTriple; +#endif + + return Triple; +} diff --git a/llvm/lib/Support/Windows/Memory.inc b/llvm/lib/Support/Windows/Memory.inc new file mode 100644 index 0000000000000..a67f9c7d0f35b --- /dev/null +++ b/llvm/lib/Support/Windows/Memory.inc @@ -0,0 +1,200 @@ +//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of various Memory +// management utilities +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/WindowsError.h" + +// The Windows.h header must be the last one included. +#include "WindowsSupport.h" + +namespace { + +DWORD getWindowsProtectionFlags(unsigned Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { + // Contrary to what you might expect, the Windows page protection flags + // are not a bitwise combination of RWX values + case llvm::sys::Memory::MF_READ: + return PAGE_READONLY; + case llvm::sys::Memory::MF_WRITE: + // Note: PAGE_WRITE is not supported by VirtualProtect + return PAGE_READWRITE; + case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_WRITE: + return PAGE_READWRITE; + case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE_READ; + case llvm::sys::Memory::MF_READ | + llvm::sys::Memory::MF_WRITE | + llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE_READWRITE; + case llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE; + default: + llvm_unreachable("Illegal memory protection flag specified!"); + } + // Provide a default return value as required by some compilers. + return PAGE_NOACCESS; +} + +// While we'd be happy to allocate single pages, the Windows allocation +// granularity may be larger than a single page (in practice, it is 64K) +// so mapping less than that will create an unreachable fragment of memory. +size_t getAllocationGranularity() { + SYSTEM_INFO Info; + ::GetSystemInfo(&Info); + if (Info.dwPageSize > Info.dwAllocationGranularity) + return Info.dwPageSize; + else + return Info.dwAllocationGranularity; +} + +// Large/huge memory pages need explicit process permissions in order to be +// used. See https://blogs.msdn.microsoft.com/oldnewthing/20110128-00/?p=11643 +// Also large pages need to be manually enabled on your OS. If all this is +// sucessfull, we return the minimal large memory page size. +static size_t enableProcessLargePages() { + HANDLE Token = 0; + size_t LargePageMin = GetLargePageMinimum(); + if (LargePageMin) + OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, + &Token); + if (!Token) + return 0; + LUID Luid; + if (!LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &Luid)) { + CloseHandle(Token); + return 0; + } + TOKEN_PRIVILEGES TP{}; + TP.PrivilegeCount = 1; + TP.Privileges[0].Luid = Luid; + TP.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + if (!AdjustTokenPrivileges(Token, FALSE, &TP, 0, 0, 0)) { + CloseHandle(Token); + return 0; + } + DWORD E = GetLastError(); + CloseHandle(Token); + if (E == ERROR_SUCCESS) + return LargePageMin; + return 0; +} + +} // namespace + +namespace llvm { +namespace sys { + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, + const MemoryBlock *const NearBlock, + unsigned Flags, + std::error_code &EC) { + EC = std::error_code(); + if (NumBytes == 0) + return MemoryBlock(); + + static size_t DefaultGranularity = getAllocationGranularity(); + static size_t LargePageGranularity = enableProcessLargePages(); + + DWORD AllocType = MEM_RESERVE | MEM_COMMIT; + bool HugePages = false; + size_t Granularity = DefaultGranularity; + + if ((Flags & MF_HUGE_HINT) && LargePageGranularity > 0) { + AllocType |= MEM_LARGE_PAGES; + HugePages = true; + Granularity = LargePageGranularity; + } + + size_t NumBlocks = (NumBytes + Granularity - 1) / Granularity; + + uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) + + NearBlock->allocatedSize() + : 0; + + // If the requested address is not aligned to the allocation granularity, + // round up to get beyond NearBlock. VirtualAlloc would have rounded down. + if (Start && Start % Granularity != 0) + Start += Granularity - Start % Granularity; + + DWORD Protect = getWindowsProtectionFlags(Flags); + + size_t AllocSize = NumBlocks * Granularity; + void *PA = ::VirtualAlloc(reinterpret_cast<void *>(Start), + AllocSize, AllocType, Protect); + if (PA == NULL) { + if (NearBlock || HugePages) { + // Try again without the NearBlock hint and without large memory pages + return allocateMappedMemory(NumBytes, NULL, Flags & ~MF_HUGE_HINT, EC); + } + EC = mapWindowsError(::GetLastError()); + return MemoryBlock(); + } + + MemoryBlock Result; + Result.Address = PA; + Result.AllocatedSize = AllocSize; + Result.Flags = (Flags & ~MF_HUGE_HINT) | (HugePages ? MF_HUGE_HINT : 0); + + if (Flags & MF_EXEC) + Memory::InvalidateInstructionCache(Result.Address, AllocSize); + + return Result; +} + + std::error_code Memory::releaseMappedMemory(MemoryBlock &M) { + if (M.Address == 0 || M.AllocatedSize == 0) + return std::error_code(); + + if (!VirtualFree(M.Address, 0, MEM_RELEASE)) + return mapWindowsError(::GetLastError()); + + M.Address = 0; + M.AllocatedSize = 0; + + return std::error_code(); +} + + std::error_code Memory::protectMappedMemory(const MemoryBlock &M, + unsigned Flags) { + if (M.Address == 0 || M.AllocatedSize == 0) + return std::error_code(); + + DWORD Protect = getWindowsProtectionFlags(Flags); + + DWORD OldFlags; + if (!VirtualProtect(M.Address, M.AllocatedSize, Protect, &OldFlags)) + return mapWindowsError(::GetLastError()); + + if (Flags & MF_EXEC) + Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize); + + return std::error_code(); +} + +/// InvalidateInstructionCache - Before the JIT can run a block of code +/// that has been emitted it must invalidate the instruction cache on some +/// platforms. +void Memory::InvalidateInstructionCache( + const void *Addr, size_t Len) { + FlushInstructionCache(GetCurrentProcess(), Addr, Len); +} + +} // namespace sys +} // namespace llvm diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc new file mode 100644 index 0000000000000..c3b13abef5def --- /dev/null +++ b/llvm/lib/Support/Windows/Path.inc @@ -0,0 +1,1498 @@ +//===- llvm/Support/Windows/Path.inc - Windows Path Impl --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Windows specific implementation of the Path API. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Windows code that +//=== is guaranteed to work on *all* Windows variants. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/WindowsError.h" +#include <fcntl.h> +#include <io.h> +#include <sys/stat.h> +#include <sys/types.h> + +// These two headers must be included last, and make sure shlobj is required +// after Windows.h to make sure it picks up our definition of _WIN32_WINNT +#include "WindowsSupport.h" +#include <shellapi.h> +#include <shlobj.h> + +#undef max + +// MinGW doesn't define this. +#ifndef _ERRNO_T_DEFINED +#define _ERRNO_T_DEFINED +typedef int errno_t; +#endif + +#ifdef _MSC_VER +# pragma comment(lib, "advapi32.lib") // This provides CryptAcquireContextW. +# pragma comment(lib, "ole32.lib") // This provides CoTaskMemFree +#endif + +using namespace llvm; + +using llvm::sys::windows::UTF8ToUTF16; +using llvm::sys::windows::CurCPToUTF16; +using llvm::sys::windows::UTF16ToUTF8; +using llvm::sys::path::widenPath; + +static bool is_separator(const wchar_t value) { + switch (value) { + case L'\\': + case L'/': + return true; + default: + return false; + } +} + +namespace llvm { +namespace sys { +namespace path { + +// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the +// path is longer than CreateDirectory can tolerate, make it absolute and +// prefixed by '\\?\'. +std::error_code widenPath(const Twine &Path8, + SmallVectorImpl<wchar_t> &Path16) { + const size_t MaxDirLen = MAX_PATH - 12; // Must leave room for 8.3 filename. + + // Several operations would convert Path8 to SmallString; more efficient to + // do it once up front. + SmallString<128> Path8Str; + Path8.toVector(Path8Str); + + // If we made this path absolute, how much longer would it get? + size_t CurPathLen; + if (llvm::sys::path::is_absolute(Twine(Path8Str))) + CurPathLen = 0; // No contribution from current_path needed. + else { + CurPathLen = ::GetCurrentDirectoryW(0, NULL); + if (CurPathLen == 0) + return mapWindowsError(::GetLastError()); + } + + // Would the absolute path be longer than our limit? + if ((Path8Str.size() + CurPathLen) >= MaxDirLen && + !Path8Str.startswith("\\\\?\\")) { + SmallString<2*MAX_PATH> FullPath("\\\\?\\"); + if (CurPathLen) { + SmallString<80> CurPath; + if (std::error_code EC = llvm::sys::fs::current_path(CurPath)) + return EC; + FullPath.append(CurPath); + } + // Traverse the requested path, canonicalizing . and .. (because the \\?\ + // prefix is documented to treat them as real components). Ignore + // separators, which can be returned from the iterator if the path has a + // drive name. We don't need to call native() on the result since append() + // always attaches preferred_separator. + for (llvm::sys::path::const_iterator I = llvm::sys::path::begin(Path8Str), + E = llvm::sys::path::end(Path8Str); + I != E; ++I) { + if (I->size() == 1 && is_separator((*I)[0])) + continue; + if (I->size() == 1 && *I == ".") + continue; + if (I->size() == 2 && *I == "..") + llvm::sys::path::remove_filename(FullPath); + else + llvm::sys::path::append(FullPath, *I); + } + return UTF8ToUTF16(FullPath, Path16); + } + + // Just use the caller's original path. + return UTF8ToUTF16(Path8Str, Path16); +} +} // end namespace path + +namespace fs { + +const file_t kInvalidFile = INVALID_HANDLE_VALUE; + +std::string getMainExecutable(const char *argv0, void *MainExecAddr) { + SmallVector<wchar_t, MAX_PATH> PathName; + DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.capacity()); + + // A zero return value indicates a failure other than insufficient space. + if (Size == 0) + return ""; + + // Insufficient space is determined by a return value equal to the size of + // the buffer passed in. + if (Size == PathName.capacity()) + return ""; + + // On success, GetModuleFileNameW returns the number of characters written to + // the buffer not including the NULL terminator. + PathName.set_size(Size); + + // Convert the result from UTF-16 to UTF-8. + SmallVector<char, MAX_PATH> PathNameUTF8; + if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8)) + return ""; + + return std::string(PathNameUTF8.data()); +} + +UniqueID file_status::getUniqueID() const { + // The file is uniquely identified by the volume serial number along + // with the 64-bit file identifier. + uint64_t FileID = (static_cast<uint64_t>(FileIndexHigh) << 32ULL) | + static_cast<uint64_t>(FileIndexLow); + + return UniqueID(VolumeSerialNumber, FileID); +} + +ErrorOr<space_info> disk_space(const Twine &Path) { + ULARGE_INTEGER Avail, Total, Free; + if (!::GetDiskFreeSpaceExA(Path.str().c_str(), &Avail, &Total, &Free)) + return mapWindowsError(::GetLastError()); + space_info SpaceInfo; + SpaceInfo.capacity = + (static_cast<uint64_t>(Total.HighPart) << 32) + Total.LowPart; + SpaceInfo.free = (static_cast<uint64_t>(Free.HighPart) << 32) + Free.LowPart; + SpaceInfo.available = + (static_cast<uint64_t>(Avail.HighPart) << 32) + Avail.LowPart; + return SpaceInfo; +} + +TimePoint<> basic_file_status::getLastAccessedTime() const { + FILETIME Time; + Time.dwLowDateTime = LastAccessedTimeLow; + Time.dwHighDateTime = LastAccessedTimeHigh; + return toTimePoint(Time); +} + +TimePoint<> basic_file_status::getLastModificationTime() const { + FILETIME Time; + Time.dwLowDateTime = LastWriteTimeLow; + Time.dwHighDateTime = LastWriteTimeHigh; + return toTimePoint(Time); +} + +uint32_t file_status::getLinkCount() const { + return NumLinks; +} + +std::error_code current_path(SmallVectorImpl<char> &result) { + SmallVector<wchar_t, MAX_PATH> cur_path; + DWORD len = MAX_PATH; + + do { + cur_path.reserve(len); + len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); + + // A zero return value indicates a failure other than insufficient space. + if (len == 0) + return mapWindowsError(::GetLastError()); + + // If there's insufficient space, the len returned is larger than the len + // given. + } while (len > cur_path.capacity()); + + // On success, GetCurrentDirectoryW returns the number of characters not + // including the null-terminator. + cur_path.set_size(len); + return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); +} + +std::error_code set_current_path(const Twine &path) { + // Convert to utf-16. + SmallVector<wchar_t, 128> wide_path; + if (std::error_code ec = widenPath(path, wide_path)) + return ec; + + if (!::SetCurrentDirectoryW(wide_path.begin())) + return mapWindowsError(::GetLastError()); + + return std::error_code(); +} + +std::error_code create_directory(const Twine &path, bool IgnoreExisting, + perms Perms) { + SmallVector<wchar_t, 128> path_utf16; + + if (std::error_code ec = widenPath(path, path_utf16)) + return ec; + + if (!::CreateDirectoryW(path_utf16.begin(), NULL)) { + DWORD LastError = ::GetLastError(); + if (LastError != ERROR_ALREADY_EXISTS || !IgnoreExisting) + return mapWindowsError(LastError); + } + + return std::error_code(); +} + +// We can't use symbolic links for windows. +std::error_code create_link(const Twine &to, const Twine &from) { + // Convert to utf-16. + SmallVector<wchar_t, 128> wide_from; + SmallVector<wchar_t, 128> wide_to; + if (std::error_code ec = widenPath(from, wide_from)) + return ec; + if (std::error_code ec = widenPath(to, wide_to)) + return ec; + + if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL)) + return mapWindowsError(::GetLastError()); + + return std::error_code(); +} + +std::error_code create_hard_link(const Twine &to, const Twine &from) { + return create_link(to, from); +} + +std::error_code remove(const Twine &path, bool IgnoreNonExisting) { + SmallVector<wchar_t, 128> path_utf16; + + if (std::error_code ec = widenPath(path, path_utf16)) + return ec; + + // We don't know whether this is a file or a directory, and remove() can + // accept both. The usual way to delete a file or directory is to use one of + // the DeleteFile or RemoveDirectory functions, but that requires you to know + // which one it is. We could stat() the file to determine that, but that would + // cost us additional system calls, which can be slow in a directory + // containing a large number of files. So instead we call CreateFile directly. + // The important part is the FILE_FLAG_DELETE_ON_CLOSE flag, which causes the + // file to be deleted once it is closed. We also use the flags + // FILE_FLAG_BACKUP_SEMANTICS (which allows us to open directories), and + // FILE_FLAG_OPEN_REPARSE_POINT (don't follow symlinks). + ScopedFileHandle h(::CreateFileW( + c_str(path_utf16), DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS | + FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_DELETE_ON_CLOSE, + NULL)); + if (!h) { + std::error_code EC = mapWindowsError(::GetLastError()); + if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) + return EC; + } + + return std::error_code(); +} + +static std::error_code is_local_internal(SmallVectorImpl<wchar_t> &Path, + bool &Result) { + SmallVector<wchar_t, 128> VolumePath; + size_t Len = 128; + while (true) { + VolumePath.resize(Len); + BOOL Success = + ::GetVolumePathNameW(Path.data(), VolumePath.data(), VolumePath.size()); + + if (Success) + break; + + DWORD Err = ::GetLastError(); + if (Err != ERROR_INSUFFICIENT_BUFFER) + return mapWindowsError(Err); + + Len *= 2; + } + // If the output buffer has exactly enough space for the path name, but not + // the null terminator, it will leave the output unterminated. Push a null + // terminator onto the end to ensure that this never happens. + VolumePath.push_back(L'\0'); + VolumePath.set_size(wcslen(VolumePath.data())); + const wchar_t *P = VolumePath.data(); + + UINT Type = ::GetDriveTypeW(P); + switch (Type) { + case DRIVE_FIXED: + Result = true; + return std::error_code(); + case DRIVE_REMOTE: + case DRIVE_CDROM: + case DRIVE_RAMDISK: + case DRIVE_REMOVABLE: + Result = false; + return std::error_code(); + default: + return make_error_code(errc::no_such_file_or_directory); + } + llvm_unreachable("Unreachable!"); +} + +std::error_code is_local(const Twine &path, bool &result) { + if (!llvm::sys::fs::exists(path) || !llvm::sys::path::has_root_path(path)) + return make_error_code(errc::no_such_file_or_directory); + + SmallString<128> Storage; + StringRef P = path.toStringRef(Storage); + + // Convert to utf-16. + SmallVector<wchar_t, 128> WidePath; + if (std::error_code ec = widenPath(P, WidePath)) + return ec; + return is_local_internal(WidePath, result); +} + +static std::error_code realPathFromHandle(HANDLE H, + SmallVectorImpl<wchar_t> &Buffer) { + DWORD CountChars = ::GetFinalPathNameByHandleW( + H, Buffer.begin(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED); + if (CountChars > Buffer.capacity()) { + // The buffer wasn't big enough, try again. In this case the return value + // *does* indicate the size of the null terminator. + Buffer.reserve(CountChars); + CountChars = ::GetFinalPathNameByHandleW( + H, Buffer.data(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED); + } + if (CountChars == 0) + return mapWindowsError(GetLastError()); + Buffer.set_size(CountChars); + return std::error_code(); +} + +static std::error_code realPathFromHandle(HANDLE H, + SmallVectorImpl<char> &RealPath) { + RealPath.clear(); + SmallVector<wchar_t, MAX_PATH> Buffer; + if (std::error_code EC = realPathFromHandle(H, Buffer)) + return EC; + + // Strip the \\?\ prefix. We don't want it ending up in output, and such + // paths don't get canonicalized by file APIs. + wchar_t *Data = Buffer.data(); + DWORD CountChars = Buffer.size(); + if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) { + // Convert \\?\UNC\foo\bar to \\foo\bar + CountChars -= 6; + Data += 6; + Data[0] = '\\'; + } else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) { + // Convert \\?\c:\foo to c:\foo + CountChars -= 4; + Data += 4; + } + + // Convert the result from UTF-16 to UTF-8. + return UTF16ToUTF8(Data, CountChars, RealPath); +} + +std::error_code is_local(int FD, bool &Result) { + SmallVector<wchar_t, 128> FinalPath; + HANDLE Handle = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); + + if (std::error_code EC = realPathFromHandle(Handle, FinalPath)) + return EC; + + return is_local_internal(FinalPath, Result); +} + +static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { + FILE_DISPOSITION_INFO Disposition; + Disposition.DeleteFile = Delete; + if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, + sizeof(Disposition))) + return mapWindowsError(::GetLastError()); + return std::error_code(); +} + +static std::error_code rename_internal(HANDLE FromHandle, const Twine &To, + bool ReplaceIfExists) { + SmallVector<wchar_t, 0> ToWide; + if (auto EC = widenPath(To, ToWide)) + return EC; + + std::vector<char> RenameInfoBuf(sizeof(FILE_RENAME_INFO) - sizeof(wchar_t) + + (ToWide.size() * sizeof(wchar_t))); + FILE_RENAME_INFO &RenameInfo = + *reinterpret_cast<FILE_RENAME_INFO *>(RenameInfoBuf.data()); + RenameInfo.ReplaceIfExists = ReplaceIfExists; + RenameInfo.RootDirectory = 0; + RenameInfo.FileNameLength = ToWide.size() * sizeof(wchar_t); + std::copy(ToWide.begin(), ToWide.end(), &RenameInfo.FileName[0]); + + SetLastError(ERROR_SUCCESS); + if (!SetFileInformationByHandle(FromHandle, FileRenameInfo, &RenameInfo, + RenameInfoBuf.size())) { + unsigned Error = GetLastError(); + if (Error == ERROR_SUCCESS) + Error = ERROR_CALL_NOT_IMPLEMENTED; // Wine doesn't always set error code. + return mapWindowsError(Error); + } + + return std::error_code(); +} + +static std::error_code rename_handle(HANDLE FromHandle, const Twine &To) { + SmallVector<wchar_t, 128> WideTo; + if (std::error_code EC = widenPath(To, WideTo)) + return EC; + + // We normally expect this loop to succeed after a few iterations. If it + // requires more than 200 tries, it's more likely that the failures are due to + // a true error, so stop trying. + for (unsigned Retry = 0; Retry != 200; ++Retry) { + auto EC = rename_internal(FromHandle, To, true); + + if (EC == + std::error_code(ERROR_CALL_NOT_IMPLEMENTED, std::system_category())) { + // Wine doesn't support SetFileInformationByHandle in rename_internal. + // Fall back to MoveFileEx. + SmallVector<wchar_t, MAX_PATH> WideFrom; + if (std::error_code EC2 = realPathFromHandle(FromHandle, WideFrom)) + return EC2; + if (::MoveFileExW(WideFrom.begin(), WideTo.begin(), + MOVEFILE_REPLACE_EXISTING)) + return std::error_code(); + return mapWindowsError(GetLastError()); + } + + if (!EC || EC != errc::permission_denied) + return EC; + + // The destination file probably exists and is currently open in another + // process, either because the file was opened without FILE_SHARE_DELETE or + // it is mapped into memory (e.g. using MemoryBuffer). Rename it in order to + // move it out of the way of the source file. Use FILE_FLAG_DELETE_ON_CLOSE + // to arrange for the destination file to be deleted when the other process + // closes it. + ScopedFileHandle ToHandle( + ::CreateFileW(WideTo.begin(), GENERIC_READ | DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, NULL)); + if (!ToHandle) { + auto EC = mapWindowsError(GetLastError()); + // Another process might have raced with us and moved the existing file + // out of the way before we had a chance to open it. If that happens, try + // to rename the source file again. + if (EC == errc::no_such_file_or_directory) + continue; + return EC; + } + + BY_HANDLE_FILE_INFORMATION FI; + if (!GetFileInformationByHandle(ToHandle, &FI)) + return mapWindowsError(GetLastError()); + + // Try to find a unique new name for the destination file. + for (unsigned UniqueId = 0; UniqueId != 200; ++UniqueId) { + std::string TmpFilename = (To + ".tmp" + utostr(UniqueId)).str(); + if (auto EC = rename_internal(ToHandle, TmpFilename, false)) { + if (EC == errc::file_exists || EC == errc::permission_denied) { + // Again, another process might have raced with us and moved the file + // before we could move it. Check whether this is the case, as it + // might have caused the permission denied error. If that was the + // case, we don't need to move it ourselves. + ScopedFileHandle ToHandle2(::CreateFileW( + WideTo.begin(), 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); + if (!ToHandle2) { + auto EC = mapWindowsError(GetLastError()); + if (EC == errc::no_such_file_or_directory) + break; + return EC; + } + BY_HANDLE_FILE_INFORMATION FI2; + if (!GetFileInformationByHandle(ToHandle2, &FI2)) + return mapWindowsError(GetLastError()); + if (FI.nFileIndexHigh != FI2.nFileIndexHigh || + FI.nFileIndexLow != FI2.nFileIndexLow || + FI.dwVolumeSerialNumber != FI2.dwVolumeSerialNumber) + break; + continue; + } + return EC; + } + break; + } + + // Okay, the old destination file has probably been moved out of the way at + // this point, so try to rename the source file again. Still, another + // process might have raced with us to create and open the destination + // file, so we need to keep doing this until we succeed. + } + + // The most likely root cause. + return errc::permission_denied; +} + +static std::error_code rename_fd(int FromFD, const Twine &To) { + HANDLE FromHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FromFD)); + return rename_handle(FromHandle, To); +} + +std::error_code rename(const Twine &From, const Twine &To) { + // Convert to utf-16. + SmallVector<wchar_t, 128> WideFrom; + if (std::error_code EC = widenPath(From, WideFrom)) + return EC; + + ScopedFileHandle FromHandle; + // Retry this a few times to defeat badly behaved file system scanners. + for (unsigned Retry = 0; Retry != 200; ++Retry) { + if (Retry != 0) + ::Sleep(10); + FromHandle = + ::CreateFileW(WideFrom.begin(), GENERIC_READ | DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (FromHandle) + break; + } + if (!FromHandle) + return mapWindowsError(GetLastError()); + + return rename_handle(FromHandle, To); +} + +std::error_code resize_file(int FD, uint64_t Size) { +#ifdef HAVE__CHSIZE_S + errno_t error = ::_chsize_s(FD, Size); +#else + errno_t error = ::_chsize(FD, Size); +#endif + return std::error_code(error, std::generic_category()); +} + +std::error_code access(const Twine &Path, AccessMode Mode) { + SmallVector<wchar_t, 128> PathUtf16; + + if (std::error_code EC = widenPath(Path, PathUtf16)) + return EC; + + DWORD Attributes = ::GetFileAttributesW(PathUtf16.begin()); + + if (Attributes == INVALID_FILE_ATTRIBUTES) { + // See if the file didn't actually exist. + DWORD LastError = ::GetLastError(); + if (LastError != ERROR_FILE_NOT_FOUND && + LastError != ERROR_PATH_NOT_FOUND) + return mapWindowsError(LastError); + return errc::no_such_file_or_directory; + } + + if (Mode == AccessMode::Write && (Attributes & FILE_ATTRIBUTE_READONLY)) + return errc::permission_denied; + + return std::error_code(); +} + +bool can_execute(const Twine &Path) { + return !access(Path, AccessMode::Execute) || + !access(Path + ".exe", AccessMode::Execute); +} + +bool equivalent(file_status A, file_status B) { + assert(status_known(A) && status_known(B)); + return A.FileIndexHigh == B.FileIndexHigh && + A.FileIndexLow == B.FileIndexLow && + A.FileSizeHigh == B.FileSizeHigh && + A.FileSizeLow == B.FileSizeLow && + A.LastAccessedTimeHigh == B.LastAccessedTimeHigh && + A.LastAccessedTimeLow == B.LastAccessedTimeLow && + A.LastWriteTimeHigh == B.LastWriteTimeHigh && + A.LastWriteTimeLow == B.LastWriteTimeLow && + A.VolumeSerialNumber == B.VolumeSerialNumber; +} + +std::error_code equivalent(const Twine &A, const Twine &B, bool &result) { + file_status fsA, fsB; + if (std::error_code ec = status(A, fsA)) + return ec; + if (std::error_code ec = status(B, fsB)) + return ec; + result = equivalent(fsA, fsB); + return std::error_code(); +} + +static bool isReservedName(StringRef path) { + // This list of reserved names comes from MSDN, at: + // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx + static const char *const sReservedNames[] = { "nul", "con", "prn", "aux", + "com1", "com2", "com3", "com4", + "com5", "com6", "com7", "com8", + "com9", "lpt1", "lpt2", "lpt3", + "lpt4", "lpt5", "lpt6", "lpt7", + "lpt8", "lpt9" }; + + // First, check to see if this is a device namespace, which always + // starts with \\.\, since device namespaces are not legal file paths. + if (path.startswith("\\\\.\\")) + return true; + + // Then compare against the list of ancient reserved names. + for (size_t i = 0; i < array_lengthof(sReservedNames); ++i) { + if (path.equals_lower(sReservedNames[i])) + return true; + } + + // The path isn't what we consider reserved. + return false; +} + +static file_type file_type_from_attrs(DWORD Attrs) { + return (Attrs & FILE_ATTRIBUTE_DIRECTORY) ? file_type::directory_file + : file_type::regular_file; +} + +static perms perms_from_attrs(DWORD Attrs) { + return (Attrs & FILE_ATTRIBUTE_READONLY) ? (all_read | all_exe) : all_all; +} + +static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { + if (FileHandle == INVALID_HANDLE_VALUE) + goto handle_status_error; + + switch (::GetFileType(FileHandle)) { + default: + llvm_unreachable("Don't know anything about this file type"); + case FILE_TYPE_UNKNOWN: { + DWORD Err = ::GetLastError(); + if (Err != NO_ERROR) + return mapWindowsError(Err); + Result = file_status(file_type::type_unknown); + return std::error_code(); + } + case FILE_TYPE_DISK: + break; + case FILE_TYPE_CHAR: + Result = file_status(file_type::character_file); + return std::error_code(); + case FILE_TYPE_PIPE: + Result = file_status(file_type::fifo_file); + return std::error_code(); + } + + BY_HANDLE_FILE_INFORMATION Info; + if (!::GetFileInformationByHandle(FileHandle, &Info)) + goto handle_status_error; + + Result = file_status( + file_type_from_attrs(Info.dwFileAttributes), + perms_from_attrs(Info.dwFileAttributes), Info.nNumberOfLinks, + Info.ftLastAccessTime.dwHighDateTime, Info.ftLastAccessTime.dwLowDateTime, + Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime, + Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, + Info.nFileIndexHigh, Info.nFileIndexLow); + return std::error_code(); + +handle_status_error: + DWORD LastError = ::GetLastError(); + if (LastError == ERROR_FILE_NOT_FOUND || + LastError == ERROR_PATH_NOT_FOUND) + Result = file_status(file_type::file_not_found); + else if (LastError == ERROR_SHARING_VIOLATION) + Result = file_status(file_type::type_unknown); + else + Result = file_status(file_type::status_error); + return mapWindowsError(LastError); +} + +std::error_code status(const Twine &path, file_status &result, bool Follow) { + SmallString<128> path_storage; + SmallVector<wchar_t, 128> path_utf16; + + StringRef path8 = path.toStringRef(path_storage); + if (isReservedName(path8)) { + result = file_status(file_type::character_file); + return std::error_code(); + } + + if (std::error_code ec = widenPath(path8, path_utf16)) + return ec; + + DWORD attr = ::GetFileAttributesW(path_utf16.begin()); + if (attr == INVALID_FILE_ATTRIBUTES) + return getStatus(INVALID_HANDLE_VALUE, result); + + DWORD Flags = FILE_FLAG_BACKUP_SEMANTICS; + // Handle reparse points. + if (!Follow && (attr & FILE_ATTRIBUTE_REPARSE_POINT)) + Flags |= FILE_FLAG_OPEN_REPARSE_POINT; + + ScopedFileHandle h( + ::CreateFileW(path_utf16.begin(), 0, // Attributes only. + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, Flags, 0)); + if (!h) + return getStatus(INVALID_HANDLE_VALUE, result); + + return getStatus(h, result); +} + +std::error_code status(int FD, file_status &Result) { + HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); + return getStatus(FileHandle, Result); +} + +std::error_code status(file_t FileHandle, file_status &Result) { + return getStatus(FileHandle, Result); +} + +unsigned getUmask() { + return 0; +} + +std::error_code setPermissions(const Twine &Path, perms Permissions) { + SmallVector<wchar_t, 128> PathUTF16; + if (std::error_code EC = widenPath(Path, PathUTF16)) + return EC; + + DWORD Attributes = ::GetFileAttributesW(PathUTF16.begin()); + if (Attributes == INVALID_FILE_ATTRIBUTES) + return mapWindowsError(GetLastError()); + + // There are many Windows file attributes that are not to do with the file + // permissions (e.g. FILE_ATTRIBUTE_HIDDEN). We need to be careful to preserve + // them. + if (Permissions & all_write) { + Attributes &= ~FILE_ATTRIBUTE_READONLY; + if (Attributes == 0) + // FILE_ATTRIBUTE_NORMAL indicates no other attributes are set. + Attributes |= FILE_ATTRIBUTE_NORMAL; + } + else { + Attributes |= FILE_ATTRIBUTE_READONLY; + // FILE_ATTRIBUTE_NORMAL is not compatible with any other attributes, so + // remove it, if it is present. + Attributes &= ~FILE_ATTRIBUTE_NORMAL; + } + + if (!::SetFileAttributesW(PathUTF16.begin(), Attributes)) + return mapWindowsError(GetLastError()); + + return std::error_code(); +} + +std::error_code setPermissions(int FD, perms Permissions) { + // FIXME Not implemented. + return std::make_error_code(std::errc::not_supported); +} + +std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime, + TimePoint<> ModificationTime) { + FILETIME AccessFT = toFILETIME(AccessTime); + FILETIME ModifyFT = toFILETIME(ModificationTime); + HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); + if (!SetFileTime(FileHandle, NULL, &AccessFT, &ModifyFT)) + return mapWindowsError(::GetLastError()); + return std::error_code(); +} + +std::error_code mapped_file_region::init(sys::fs::file_t OrigFileHandle, + uint64_t Offset, mapmode Mode) { + this->Mode = Mode; + if (OrigFileHandle == INVALID_HANDLE_VALUE) + return make_error_code(errc::bad_file_descriptor); + + DWORD flprotect; + switch (Mode) { + case readonly: flprotect = PAGE_READONLY; break; + case readwrite: flprotect = PAGE_READWRITE; break; + case priv: flprotect = PAGE_WRITECOPY; break; + } + + HANDLE FileMappingHandle = + ::CreateFileMappingW(OrigFileHandle, 0, flprotect, + Hi_32(Size), + Lo_32(Size), + 0); + if (FileMappingHandle == NULL) { + std::error_code ec = mapWindowsError(GetLastError()); + return ec; + } + + DWORD dwDesiredAccess; + switch (Mode) { + case readonly: dwDesiredAccess = FILE_MAP_READ; break; + case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break; + case priv: dwDesiredAccess = FILE_MAP_COPY; break; + } + Mapping = ::MapViewOfFile(FileMappingHandle, + dwDesiredAccess, + Offset >> 32, + Offset & 0xffffffff, + Size); + if (Mapping == NULL) { + std::error_code ec = mapWindowsError(GetLastError()); + ::CloseHandle(FileMappingHandle); + return ec; + } + + if (Size == 0) { + MEMORY_BASIC_INFORMATION mbi; + SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi)); + if (Result == 0) { + std::error_code ec = mapWindowsError(GetLastError()); + ::UnmapViewOfFile(Mapping); + ::CloseHandle(FileMappingHandle); + return ec; + } + Size = mbi.RegionSize; + } + + // Close the file mapping handle, as it's kept alive by the file mapping. But + // neither the file mapping nor the file mapping handle keep the file handle + // alive, so we need to keep a reference to the file in case all other handles + // are closed and the file is deleted, which may cause invalid data to be read + // from the file. + ::CloseHandle(FileMappingHandle); + if (!::DuplicateHandle(::GetCurrentProcess(), OrigFileHandle, + ::GetCurrentProcess(), &FileHandle, 0, 0, + DUPLICATE_SAME_ACCESS)) { + std::error_code ec = mapWindowsError(GetLastError()); + ::UnmapViewOfFile(Mapping); + return ec; + } + + return std::error_code(); +} + +mapped_file_region::mapped_file_region(sys::fs::file_t fd, mapmode mode, + size_t length, uint64_t offset, + std::error_code &ec) + : Size(length), Mapping() { + ec = init(fd, offset, mode); + if (ec) + Mapping = 0; +} + +static bool hasFlushBufferKernelBug() { + static bool Ret{GetWindowsOSVersion() < llvm::VersionTuple(10, 0, 0, 17763)}; + return Ret; +} + +static bool isEXE(StringRef Magic) { + static const char PEMagic[] = {'P', 'E', '\0', '\0'}; + if (Magic.startswith(StringRef("MZ")) && Magic.size() >= 0x3c + 4) { + uint32_t off = read32le(Magic.data() + 0x3c); + // PE/COFF file, either EXE or DLL. + if (Magic.substr(off).startswith(StringRef(PEMagic, sizeof(PEMagic)))) + return true; + } + return false; +} + +mapped_file_region::~mapped_file_region() { + if (Mapping) { + + bool Exe = isEXE(StringRef((char *)Mapping, Size)); + + ::UnmapViewOfFile(Mapping); + + if (Mode == mapmode::readwrite && Exe && hasFlushBufferKernelBug()) { + // There is a Windows kernel bug, the exact trigger conditions of which + // are not well understood. When triggered, dirty pages are not properly + // flushed and subsequent process's attempts to read a file can return + // invalid data. Calling FlushFileBuffers on the write handle is + // sufficient to ensure that this bug is not triggered. + // The bug only occurs when writing an executable and executing it right + // after, under high I/O pressure. + ::FlushFileBuffers(FileHandle); + } + + ::CloseHandle(FileHandle); + } +} + +size_t mapped_file_region::size() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Size; +} + +char *mapped_file_region::data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<char*>(Mapping); +} + +const char *mapped_file_region::const_data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<const char*>(Mapping); +} + +int mapped_file_region::alignment() { + SYSTEM_INFO SysInfo; + ::GetSystemInfo(&SysInfo); + return SysInfo.dwAllocationGranularity; +} + +static basic_file_status status_from_find_data(WIN32_FIND_DATAW *FindData) { + return basic_file_status(file_type_from_attrs(FindData->dwFileAttributes), + perms_from_attrs(FindData->dwFileAttributes), + FindData->ftLastAccessTime.dwHighDateTime, + FindData->ftLastAccessTime.dwLowDateTime, + FindData->ftLastWriteTime.dwHighDateTime, + FindData->ftLastWriteTime.dwLowDateTime, + FindData->nFileSizeHigh, FindData->nFileSizeLow); +} + +std::error_code detail::directory_iterator_construct(detail::DirIterState &IT, + StringRef Path, + bool FollowSymlinks) { + SmallVector<wchar_t, 128> PathUTF16; + + if (std::error_code EC = widenPath(Path, PathUTF16)) + return EC; + + // Convert path to the format that Windows is happy with. + if (PathUTF16.size() > 0 && + !is_separator(PathUTF16[Path.size() - 1]) && + PathUTF16[Path.size() - 1] != L':') { + PathUTF16.push_back(L'\\'); + PathUTF16.push_back(L'*'); + } else { + PathUTF16.push_back(L'*'); + } + + // Get the first directory entry. + WIN32_FIND_DATAW FirstFind; + ScopedFindHandle FindHandle(::FindFirstFileExW( + c_str(PathUTF16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch, + NULL, FIND_FIRST_EX_LARGE_FETCH)); + if (!FindHandle) + return mapWindowsError(::GetLastError()); + + size_t FilenameLen = ::wcslen(FirstFind.cFileName); + while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') || + (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' && + FirstFind.cFileName[1] == L'.')) + if (!::FindNextFileW(FindHandle, &FirstFind)) { + DWORD LastError = ::GetLastError(); + // Check for end. + if (LastError == ERROR_NO_MORE_FILES) + return detail::directory_iterator_destruct(IT); + return mapWindowsError(LastError); + } else + FilenameLen = ::wcslen(FirstFind.cFileName); + + // Construct the current directory entry. + SmallString<128> DirectoryEntryNameUTF8; + if (std::error_code EC = + UTF16ToUTF8(FirstFind.cFileName, ::wcslen(FirstFind.cFileName), + DirectoryEntryNameUTF8)) + return EC; + + IT.IterationHandle = intptr_t(FindHandle.take()); + SmallString<128> DirectoryEntryPath(Path); + path::append(DirectoryEntryPath, DirectoryEntryNameUTF8); + IT.CurrentEntry = + directory_entry(DirectoryEntryPath, FollowSymlinks, + file_type_from_attrs(FirstFind.dwFileAttributes), + status_from_find_data(&FirstFind)); + + return std::error_code(); +} + +std::error_code detail::directory_iterator_destruct(detail::DirIterState &IT) { + if (IT.IterationHandle != 0) + // Closes the handle if it's valid. + ScopedFindHandle close(HANDLE(IT.IterationHandle)); + IT.IterationHandle = 0; + IT.CurrentEntry = directory_entry(); + return std::error_code(); +} + +std::error_code detail::directory_iterator_increment(detail::DirIterState &IT) { + WIN32_FIND_DATAW FindData; + if (!::FindNextFileW(HANDLE(IT.IterationHandle), &FindData)) { + DWORD LastError = ::GetLastError(); + // Check for end. + if (LastError == ERROR_NO_MORE_FILES) + return detail::directory_iterator_destruct(IT); + return mapWindowsError(LastError); + } + + size_t FilenameLen = ::wcslen(FindData.cFileName); + if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') || + (FilenameLen == 2 && FindData.cFileName[0] == L'.' && + FindData.cFileName[1] == L'.')) + return directory_iterator_increment(IT); + + SmallString<128> DirectoryEntryPathUTF8; + if (std::error_code EC = + UTF16ToUTF8(FindData.cFileName, ::wcslen(FindData.cFileName), + DirectoryEntryPathUTF8)) + return EC; + + IT.CurrentEntry.replace_filename( + Twine(DirectoryEntryPathUTF8), + file_type_from_attrs(FindData.dwFileAttributes), + status_from_find_data(&FindData)); + return std::error_code(); +} + +ErrorOr<basic_file_status> directory_entry::status() const { + return Status; +} + +static std::error_code nativeFileToFd(Expected<HANDLE> H, int &ResultFD, + OpenFlags Flags) { + int CrtOpenFlags = 0; + if (Flags & OF_Append) + CrtOpenFlags |= _O_APPEND; + + if (Flags & OF_Text) + CrtOpenFlags |= _O_TEXT; + + ResultFD = -1; + if (!H) + return errorToErrorCode(H.takeError()); + + ResultFD = ::_open_osfhandle(intptr_t(*H), CrtOpenFlags); + if (ResultFD == -1) { + ::CloseHandle(*H); + return mapWindowsError(ERROR_INVALID_HANDLE); + } + return std::error_code(); +} + +static DWORD nativeDisposition(CreationDisposition Disp, OpenFlags Flags) { + // This is a compatibility hack. Really we should respect the creation + // disposition, but a lot of old code relied on the implicit assumption that + // OF_Append implied it would open an existing file. Since the disposition is + // now explicit and defaults to CD_CreateAlways, this assumption would cause + // any usage of OF_Append to append to a new file, even if the file already + // existed. A better solution might have two new creation dispositions: + // CD_AppendAlways and CD_AppendNew. This would also address the problem of + // OF_Append being used on a read-only descriptor, which doesn't make sense. + if (Flags & OF_Append) + return OPEN_ALWAYS; + + switch (Disp) { + case CD_CreateAlways: + return CREATE_ALWAYS; + case CD_CreateNew: + return CREATE_NEW; + case CD_OpenAlways: + return OPEN_ALWAYS; + case CD_OpenExisting: + return OPEN_EXISTING; + } + llvm_unreachable("unreachable!"); +} + +static DWORD nativeAccess(FileAccess Access, OpenFlags Flags) { + DWORD Result = 0; + if (Access & FA_Read) + Result |= GENERIC_READ; + if (Access & FA_Write) + Result |= GENERIC_WRITE; + if (Flags & OF_Delete) + Result |= DELETE; + if (Flags & OF_UpdateAtime) + Result |= FILE_WRITE_ATTRIBUTES; + return Result; +} + +static std::error_code openNativeFileInternal(const Twine &Name, + file_t &ResultFile, DWORD Disp, + DWORD Access, DWORD Flags, + bool Inherit = false) { + SmallVector<wchar_t, 128> PathUTF16; + if (std::error_code EC = widenPath(Name, PathUTF16)) + return EC; + + SECURITY_ATTRIBUTES SA; + SA.nLength = sizeof(SA); + SA.lpSecurityDescriptor = nullptr; + SA.bInheritHandle = Inherit; + + HANDLE H = + ::CreateFileW(PathUTF16.begin(), Access, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, &SA, + Disp, Flags, NULL); + if (H == INVALID_HANDLE_VALUE) { + DWORD LastError = ::GetLastError(); + std::error_code EC = mapWindowsError(LastError); + // Provide a better error message when trying to open directories. + // This only runs if we failed to open the file, so there is probably + // no performances issues. + if (LastError != ERROR_ACCESS_DENIED) + return EC; + if (is_directory(Name)) + return make_error_code(errc::is_a_directory); + return EC; + } + ResultFile = H; + return std::error_code(); +} + +Expected<file_t> openNativeFile(const Twine &Name, CreationDisposition Disp, + FileAccess Access, OpenFlags Flags, + unsigned Mode) { + // Verify that we don't have both "append" and "excl". + assert((!(Disp == CD_CreateNew) || !(Flags & OF_Append)) && + "Cannot specify both 'CreateNew' and 'Append' file creation flags!"); + + DWORD NativeDisp = nativeDisposition(Disp, Flags); + DWORD NativeAccess = nativeAccess(Access, Flags); + + bool Inherit = false; + if (Flags & OF_ChildInherit) + Inherit = true; + + file_t Result; + std::error_code EC = openNativeFileInternal( + Name, Result, NativeDisp, NativeAccess, FILE_ATTRIBUTE_NORMAL, Inherit); + if (EC) + return errorCodeToError(EC); + + if (Flags & OF_UpdateAtime) { + FILETIME FileTime; + SYSTEMTIME SystemTime; + GetSystemTime(&SystemTime); + if (SystemTimeToFileTime(&SystemTime, &FileTime) == 0 || + SetFileTime(Result, NULL, &FileTime, NULL) == 0) { + DWORD LastError = ::GetLastError(); + ::CloseHandle(Result); + return errorCodeToError(mapWindowsError(LastError)); + } + } + + if (Flags & OF_Delete) { + if ((EC = setDeleteDisposition(Result, true))) { + ::CloseHandle(Result); + return errorCodeToError(EC); + } + } + return Result; +} + +std::error_code openFile(const Twine &Name, int &ResultFD, + CreationDisposition Disp, FileAccess Access, + OpenFlags Flags, unsigned int Mode) { + Expected<file_t> Result = openNativeFile(Name, Disp, Access, Flags); + if (!Result) + return errorToErrorCode(Result.takeError()); + + return nativeFileToFd(*Result, ResultFD, Flags); +} + +static std::error_code directoryRealPath(const Twine &Name, + SmallVectorImpl<char> &RealPath) { + file_t File; + std::error_code EC = openNativeFileInternal( + Name, File, OPEN_EXISTING, GENERIC_READ, FILE_FLAG_BACKUP_SEMANTICS); + if (EC) + return EC; + + EC = realPathFromHandle(File, RealPath); + ::CloseHandle(File); + return EC; +} + +std::error_code openFileForRead(const Twine &Name, int &ResultFD, + OpenFlags Flags, + SmallVectorImpl<char> *RealPath) { + Expected<HANDLE> NativeFile = openNativeFileForRead(Name, Flags, RealPath); + return nativeFileToFd(std::move(NativeFile), ResultFD, OF_None); +} + +Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags, + SmallVectorImpl<char> *RealPath) { + Expected<file_t> Result = + openNativeFile(Name, CD_OpenExisting, FA_Read, Flags); + + // Fetch the real name of the file, if the user asked + if (Result && RealPath) + realPathFromHandle(*Result, *RealPath); + + return Result; +} + +file_t convertFDToNativeFile(int FD) { + return reinterpret_cast<HANDLE>(::_get_osfhandle(FD)); +} + +file_t getStdinHandle() { return ::GetStdHandle(STD_INPUT_HANDLE); } +file_t getStdoutHandle() { return ::GetStdHandle(STD_OUTPUT_HANDLE); } +file_t getStderrHandle() { return ::GetStdHandle(STD_ERROR_HANDLE); } + +Expected<size_t> readNativeFileImpl(file_t FileHandle, + MutableArrayRef<char> Buf, + OVERLAPPED *Overlap) { + // ReadFile can only read 2GB at a time. The caller should check the number of + // bytes and read in a loop until termination. + DWORD BytesToRead = + std::min(size_t(std::numeric_limits<DWORD>::max()), Buf.size()); + DWORD BytesRead = 0; + if (::ReadFile(FileHandle, Buf.data(), BytesToRead, &BytesRead, Overlap)) + return BytesRead; + DWORD Err = ::GetLastError(); + // EOF is not an error. + if (Err == ERROR_BROKEN_PIPE || Err == ERROR_HANDLE_EOF) + return BytesRead; + return errorCodeToError(mapWindowsError(Err)); +} + +Expected<size_t> readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf) { + return readNativeFileImpl(FileHandle, Buf, /*Overlap=*/nullptr); +} + +Expected<size_t> readNativeFileSlice(file_t FileHandle, + MutableArrayRef<char> Buf, + uint64_t Offset) { + OVERLAPPED Overlapped = {}; + Overlapped.Offset = uint32_t(Offset); + Overlapped.OffsetHigh = uint32_t(Offset >> 32); + return readNativeFileImpl(FileHandle, Buf, &Overlapped); +} + +std::error_code closeFile(file_t &F) { + file_t TmpF = F; + F = kInvalidFile; + if (!::CloseHandle(TmpF)) + return mapWindowsError(::GetLastError()); + return std::error_code(); +} + +std::error_code remove_directories(const Twine &path, bool IgnoreErrors) { + // Convert to utf-16. + SmallVector<wchar_t, 128> Path16; + std::error_code EC = widenPath(path, Path16); + if (EC && !IgnoreErrors) + return EC; + + // SHFileOperation() accepts a list of paths, and so must be double null- + // terminated to indicate the end of the list. The buffer is already null + // terminated, but since that null character is not considered part of the + // vector's size, pushing another one will just consume that byte. So we + // need to push 2 null terminators. + Path16.push_back(0); + Path16.push_back(0); + + SHFILEOPSTRUCTW shfos = {}; + shfos.wFunc = FO_DELETE; + shfos.pFrom = Path16.data(); + shfos.fFlags = FOF_NO_UI; + + int result = ::SHFileOperationW(&shfos); + if (result != 0 && !IgnoreErrors) + return mapWindowsError(result); + return std::error_code(); +} + +static void expandTildeExpr(SmallVectorImpl<char> &Path) { + // Path does not begin with a tilde expression. + if (Path.empty() || Path[0] != '~') + return; + + StringRef PathStr(Path.begin(), Path.size()); + PathStr = PathStr.drop_front(); + StringRef Expr = PathStr.take_until([](char c) { return path::is_separator(c); }); + + if (!Expr.empty()) { + // This is probably a ~username/ expression. Don't support this on Windows. + return; + } + + SmallString<128> HomeDir; + if (!path::home_directory(HomeDir)) { + // For some reason we couldn't get the home directory. Just exit. + return; + } + + // Overwrite the first character and insert the rest. + Path[0] = HomeDir[0]; + Path.insert(Path.begin() + 1, HomeDir.begin() + 1, HomeDir.end()); +} + +void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) { + dest.clear(); + if (path.isTriviallyEmpty()) + return; + + path.toVector(dest); + expandTildeExpr(dest); + + return; +} + +std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest, + bool expand_tilde) { + dest.clear(); + if (path.isTriviallyEmpty()) + return std::error_code(); + + if (expand_tilde) { + SmallString<128> Storage; + path.toVector(Storage); + expandTildeExpr(Storage); + return real_path(Storage, dest, false); + } + + if (is_directory(path)) + return directoryRealPath(path, dest); + + int fd; + if (std::error_code EC = + llvm::sys::fs::openFileForRead(path, fd, OF_None, &dest)) + return EC; + ::close(fd); + return std::error_code(); +} + +} // end namespace fs + +namespace path { +static bool getKnownFolderPath(KNOWNFOLDERID folderId, + SmallVectorImpl<char> &result) { + wchar_t *path = nullptr; + if (::SHGetKnownFolderPath(folderId, KF_FLAG_CREATE, nullptr, &path) != S_OK) + return false; + + bool ok = !UTF16ToUTF8(path, ::wcslen(path), result); + ::CoTaskMemFree(path); + return ok; +} + +bool home_directory(SmallVectorImpl<char> &result) { + return getKnownFolderPath(FOLDERID_Profile, result); +} + +static bool getTempDirEnvVar(const wchar_t *Var, SmallVectorImpl<char> &Res) { + SmallVector<wchar_t, 1024> Buf; + size_t Size = 1024; + do { + Buf.reserve(Size); + Size = GetEnvironmentVariableW(Var, Buf.data(), Buf.capacity()); + if (Size == 0) + return false; + + // Try again with larger buffer. + } while (Size > Buf.capacity()); + Buf.set_size(Size); + + return !windows::UTF16ToUTF8(Buf.data(), Size, Res); +} + +static bool getTempDirEnvVar(SmallVectorImpl<char> &Res) { + const wchar_t *EnvironmentVariables[] = {L"TMP", L"TEMP", L"USERPROFILE"}; + for (auto *Env : EnvironmentVariables) { + if (getTempDirEnvVar(Env, Res)) + return true; + } + return false; +} + +void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) { + (void)ErasedOnReboot; + Result.clear(); + + // Check whether the temporary directory is specified by an environment var. + // This matches GetTempPath logic to some degree. GetTempPath is not used + // directly as it cannot handle evn var longer than 130 chars on Windows 7 + // (fixed on Windows 8). + if (getTempDirEnvVar(Result)) { + assert(!Result.empty() && "Unexpected empty path"); + native(Result); // Some Unix-like shells use Unix path separator in $TMP. + fs::make_absolute(Result); // Make it absolute if not already. + return; + } + + // Fall back to a system default. + const char *DefaultResult = "C:\\Temp"; + Result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); +} +} // end namespace path + +namespace windows { +std::error_code CodePageToUTF16(unsigned codepage, + llvm::StringRef original, + llvm::SmallVectorImpl<wchar_t> &utf16) { + if (!original.empty()) { + int len = ::MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, original.begin(), + original.size(), utf16.begin(), 0); + + if (len == 0) { + return mapWindowsError(::GetLastError()); + } + + utf16.reserve(len + 1); + utf16.set_size(len); + + len = ::MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, original.begin(), + original.size(), utf16.begin(), utf16.size()); + + if (len == 0) { + return mapWindowsError(::GetLastError()); + } + } + + // Make utf16 null terminated. + utf16.push_back(0); + utf16.pop_back(); + + return std::error_code(); +} + +std::error_code UTF8ToUTF16(llvm::StringRef utf8, + llvm::SmallVectorImpl<wchar_t> &utf16) { + return CodePageToUTF16(CP_UTF8, utf8, utf16); +} + +std::error_code CurCPToUTF16(llvm::StringRef curcp, + llvm::SmallVectorImpl<wchar_t> &utf16) { + return CodePageToUTF16(CP_ACP, curcp, utf16); +} + +static +std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16, + size_t utf16_len, + llvm::SmallVectorImpl<char> &converted) { + if (utf16_len) { + // Get length. + int len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, converted.begin(), + 0, NULL, NULL); + + if (len == 0) { + return mapWindowsError(::GetLastError()); + } + + converted.reserve(len); + converted.set_size(len); + + // Now do the actual conversion. + len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, converted.data(), + converted.size(), NULL, NULL); + + if (len == 0) { + return mapWindowsError(::GetLastError()); + } + } + + // Make the new string null terminated. + converted.push_back(0); + converted.pop_back(); + + return std::error_code(); +} + +std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl<char> &utf8) { + return UTF16ToCodePage(CP_UTF8, utf16, utf16_len, utf8); +} + +std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl<char> &curcp) { + return UTF16ToCodePage(CP_ACP, utf16, utf16_len, curcp); +} + +} // end namespace windows +} // end namespace sys +} // end namespace llvm diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc new file mode 100644 index 0000000000000..4b91f9f7fc667 --- /dev/null +++ b/llvm/lib/Support/Windows/Process.inc @@ -0,0 +1,485 @@ +//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Process class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/WindowsError.h" +#include <malloc.h> + +// The Windows.h header must be after LLVM and standard headers. +#include "WindowsSupport.h" + +#include <direct.h> +#include <io.h> +#include <psapi.h> +#include <shellapi.h> + +#if !defined(__MINGW32__) + #pragma comment(lib, "psapi.lib") + #pragma comment(lib, "shell32.lib") +#endif + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +#ifdef __MINGW32__ +// This ban should be lifted when MinGW 1.0+ has defined this value. +# define _HEAPOK (-2) +#endif + +using namespace llvm; + +// This function retrieves the page size using GetNativeSystemInfo() and is +// present solely so it can be called once to initialize the self_process member +// below. +static unsigned computePageSize() { + // GetNativeSystemInfo() provides the physical page size which may differ + // from GetSystemInfo() in 32-bit applications running under WOW64. + SYSTEM_INFO info; + GetNativeSystemInfo(&info); + // FIXME: FileOffset in MapViewOfFile() should be aligned to not dwPageSize, + // but dwAllocationGranularity. + return static_cast<unsigned>(info.dwPageSize); +} + +Expected<unsigned> Process::getPageSize() { + static unsigned Ret = computePageSize(); + return Ret; +} + +size_t +Process::GetMallocUsage() +{ + _HEAPINFO hinfo; + hinfo._pentry = NULL; + + size_t size = 0; + + while (_heapwalk(&hinfo) == _HEAPOK) + size += hinfo._size; + + return size; +} + +void Process::GetTimeUsage(TimePoint<> &elapsed, std::chrono::nanoseconds &user_time, + std::chrono::nanoseconds &sys_time) { + elapsed = std::chrono::system_clock::now();; + + FILETIME ProcCreate, ProcExit, KernelTime, UserTime; + if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime, + &UserTime) == 0) + return; + + user_time = toDuration(UserTime); + sys_time = toDuration(KernelTime); +} + +// Some LLVM programs such as bugpoint produce core files as a normal part of +// their operation. To prevent the disk from filling up, this configuration +// item does what's necessary to prevent their generation. +void Process::PreventCoreFiles() { + // Windows does have the concept of core files, called minidumps. However, + // disabling minidumps for a particular application extends past the lifetime + // of that application, which is the incorrect behavior for this API. + // Additionally, the APIs require elevated privileges to disable and re- + // enable minidumps, which makes this untenable. For more information, see + // WerAddExcludedApplication and WerRemoveExcludedApplication (Vista and + // later). + // + // Windows also has modal pop-up message boxes. As this method is used by + // bugpoint, preventing these pop-ups is additionally important. + SetErrorMode(SEM_FAILCRITICALERRORS | + SEM_NOGPFAULTERRORBOX | + SEM_NOOPENFILEERRORBOX); + + coreFilesPrevented = true; +} + +/// Returns the environment variable \arg Name's value as a string encoded in +/// UTF-8. \arg Name is assumed to be in UTF-8 encoding. +Optional<std::string> Process::GetEnv(StringRef Name) { + // Convert the argument to UTF-16 to pass it to _wgetenv(). + SmallVector<wchar_t, 128> NameUTF16; + if (windows::UTF8ToUTF16(Name, NameUTF16)) + return None; + + // Environment variable can be encoded in non-UTF8 encoding, and there's no + // way to know what the encoding is. The only reliable way to look up + // multibyte environment variable is to use GetEnvironmentVariableW(). + SmallVector<wchar_t, MAX_PATH> Buf; + size_t Size = MAX_PATH; + do { + Buf.reserve(Size); + SetLastError(NO_ERROR); + Size = + GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity()); + if (Size == 0 && GetLastError() == ERROR_ENVVAR_NOT_FOUND) + return None; + + // Try again with larger buffer. + } while (Size > Buf.capacity()); + Buf.set_size(Size); + + // Convert the result from UTF-16 to UTF-8. + SmallVector<char, MAX_PATH> Res; + if (windows::UTF16ToUTF8(Buf.data(), Size, Res)) + return None; + return std::string(Res.data()); +} + +/// Perform wildcard expansion of Arg, or just push it into Args if it doesn't +/// have wildcards or doesn't match any files. +static std::error_code WildcardExpand(StringRef Arg, + SmallVectorImpl<const char *> &Args, + StringSaver &Saver) { + std::error_code EC; + + // Don't expand Arg if it does not contain any wildcard characters. This is + // the common case. Also don't wildcard expand /?. Always treat it as an + // option. + if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" || + Arg == "-?") { + Args.push_back(Arg.data()); + return EC; + } + + // Convert back to UTF-16 so we can call FindFirstFileW. + SmallVector<wchar_t, MAX_PATH> ArgW; + EC = windows::UTF8ToUTF16(Arg, ArgW); + if (EC) + return EC; + + // Search for matching files. + // FIXME: This assumes the wildcard is only in the file name and not in the + // directory portion of the file path. For example, it doesn't handle + // "*\foo.c" nor "s?c\bar.cpp". + WIN32_FIND_DATAW FileData; + HANDLE FindHandle = FindFirstFileW(ArgW.data(), &FileData); + if (FindHandle == INVALID_HANDLE_VALUE) { + Args.push_back(Arg.data()); + return EC; + } + + // Extract any directory part of the argument. + SmallString<MAX_PATH> Dir = Arg; + sys::path::remove_filename(Dir); + const int DirSize = Dir.size(); + + do { + SmallString<MAX_PATH> FileName; + EC = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName), + FileName); + if (EC) + break; + + // Append FileName to Dir, and remove it afterwards. + llvm::sys::path::append(Dir, FileName); + Args.push_back(Saver.save(StringRef(Dir)).data()); + Dir.resize(DirSize); + } while (FindNextFileW(FindHandle, &FileData)); + + FindClose(FindHandle); + return EC; +} + +static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) { + // The first argument may contain just the name of the executable (e.g., + // "clang") rather than the full path, so swap it with the full path. + wchar_t ModuleName[MAX_PATH]; + size_t Length = ::GetModuleFileNameW(NULL, ModuleName, MAX_PATH); + if (Length == 0 || Length == MAX_PATH) { + return mapWindowsError(GetLastError()); + } + + // If the first argument is a shortened (8.3) name (which is possible even + // if we got the module name), the driver will have trouble distinguishing it + // (e.g., clang.exe v. clang++.exe), so expand it now. + Length = GetLongPathNameW(ModuleName, ModuleName, MAX_PATH); + if (Length == 0) + return mapWindowsError(GetLastError()); + if (Length > MAX_PATH) { + // We're not going to try to deal with paths longer than MAX_PATH, so we'll + // treat this as an error. GetLastError() returns ERROR_SUCCESS, which + // isn't useful, so we'll hardcode an appropriate error value. + return mapWindowsError(ERROR_INSUFFICIENT_BUFFER); + } + + std::error_code EC = windows::UTF16ToUTF8(ModuleName, Length, Filename); + if (EC) + return EC; + + StringRef Base = sys::path::filename(Filename.data()); + Filename.assign(Base.begin(), Base.end()); + return std::error_code(); +} + +std::error_code +windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args, + BumpPtrAllocator &Alloc) { + const wchar_t *CmdW = GetCommandLineW(); + assert(CmdW); + std::error_code EC; + SmallString<MAX_PATH> Cmd; + EC = windows::UTF16ToUTF8(CmdW, wcslen(CmdW), Cmd); + if (EC) + return EC; + + SmallVector<const char *, 20> TmpArgs; + StringSaver Saver(Alloc); + cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false); + + for (const char *Arg : TmpArgs) { + EC = WildcardExpand(Arg, Args, Saver); + if (EC) + return EC; + } + + SmallVector<char, MAX_PATH> Arg0(Args[0], Args[0] + strlen(Args[0])); + SmallVector<char, MAX_PATH> Filename; + sys::path::remove_filename(Arg0); + EC = GetExecutableName(Filename); + if (EC) + return EC; + sys::path::append(Arg0, Filename); + Args[0] = Saver.save(Arg0).data(); + return std::error_code(); +} + +std::error_code Process::FixupStandardFileDescriptors() { + return std::error_code(); +} + +std::error_code Process::SafelyCloseFileDescriptor(int FD) { + if (::close(FD) < 0) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +bool Process::StandardInIsUserInput() { + return FileDescriptorIsDisplayed(0); +} + +bool Process::StandardOutIsDisplayed() { + return FileDescriptorIsDisplayed(1); +} + +bool Process::StandardErrIsDisplayed() { + return FileDescriptorIsDisplayed(2); +} + +bool Process::FileDescriptorIsDisplayed(int fd) { + DWORD Mode; // Unused + return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0); +} + +unsigned Process::StandardOutColumns() { + unsigned Columns = 0; + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) + Columns = csbi.dwSize.X; + return Columns; +} + +unsigned Process::StandardErrColumns() { + unsigned Columns = 0; + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi)) + Columns = csbi.dwSize.X; + return Columns; +} + +// The terminal always has colors. +bool Process::FileDescriptorHasColors(int fd) { + return FileDescriptorIsDisplayed(fd); +} + +bool Process::StandardOutHasColors() { + return FileDescriptorHasColors(1); +} + +bool Process::StandardErrHasColors() { + return FileDescriptorHasColors(2); +} + +static bool UseANSI = false; +void Process::UseANSIEscapeCodes(bool enable) { +#if defined(ENABLE_VIRTUAL_TERMINAL_PROCESSING) + if (enable) { + HANDLE Console = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD Mode; + GetConsoleMode(Console, &Mode); + Mode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING; + SetConsoleMode(Console, Mode); + } +#endif + UseANSI = enable; +} + +namespace { +class DefaultColors +{ + private: + WORD defaultColor; + public: + DefaultColors() + :defaultColor(GetCurrentColor()) {} + static unsigned GetCurrentColor() { + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) + return csbi.wAttributes; + return 0; + } + WORD operator()() const { return defaultColor; } +}; + +DefaultColors defaultColors; + +WORD fg_color(WORD color) { + return color & (FOREGROUND_BLUE | FOREGROUND_GREEN | + FOREGROUND_INTENSITY | FOREGROUND_RED); +} + +WORD bg_color(WORD color) { + return color & (BACKGROUND_BLUE | BACKGROUND_GREEN | + BACKGROUND_INTENSITY | BACKGROUND_RED); +} +} + +bool Process::ColorNeedsFlush() { + return !UseANSI; +} + +const char *Process::OutputBold(bool bg) { + if (UseANSI) return "\033[1m"; + + WORD colors = DefaultColors::GetCurrentColor(); + if (bg) + colors |= BACKGROUND_INTENSITY; + else + colors |= FOREGROUND_INTENSITY; + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors); + return 0; +} + +const char *Process::OutputColor(char code, bool bold, bool bg) { + if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7]; + + WORD current = DefaultColors::GetCurrentColor(); + WORD colors; + if (bg) { + colors = ((code&1) ? BACKGROUND_RED : 0) | + ((code&2) ? BACKGROUND_GREEN : 0 ) | + ((code&4) ? BACKGROUND_BLUE : 0); + if (bold) + colors |= BACKGROUND_INTENSITY; + colors |= fg_color(current); + } else { + colors = ((code&1) ? FOREGROUND_RED : 0) | + ((code&2) ? FOREGROUND_GREEN : 0 ) | + ((code&4) ? FOREGROUND_BLUE : 0); + if (bold) + colors |= FOREGROUND_INTENSITY; + colors |= bg_color(current); + } + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors); + return 0; +} + +static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) { + CONSOLE_SCREEN_BUFFER_INFO info; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &info); + return info.wAttributes; +} + +const char *Process::OutputReverse() { + if (UseANSI) return "\033[7m"; + + const WORD attributes + = GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE)); + + const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN | + FOREGROUND_RED | FOREGROUND_INTENSITY; + const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN | + BACKGROUND_RED | BACKGROUND_INTENSITY; + const WORD color_mask = foreground_mask | background_mask; + + WORD new_attributes = + ((attributes & FOREGROUND_BLUE )?BACKGROUND_BLUE :0) | + ((attributes & FOREGROUND_GREEN )?BACKGROUND_GREEN :0) | + ((attributes & FOREGROUND_RED )?BACKGROUND_RED :0) | + ((attributes & FOREGROUND_INTENSITY)?BACKGROUND_INTENSITY:0) | + ((attributes & BACKGROUND_BLUE )?FOREGROUND_BLUE :0) | + ((attributes & BACKGROUND_GREEN )?FOREGROUND_GREEN :0) | + ((attributes & BACKGROUND_RED )?FOREGROUND_RED :0) | + ((attributes & BACKGROUND_INTENSITY)?FOREGROUND_INTENSITY:0) | + 0; + new_attributes = (attributes & ~color_mask) | (new_attributes & color_mask); + + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), new_attributes); + return 0; +} + +const char *Process::ResetColor() { + if (UseANSI) return "\033[0m"; + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors()); + return 0; +} + +// Include GetLastError() in a fatal error message. +static void ReportLastErrorFatal(const char *Msg) { + std::string ErrMsg; + MakeErrMsg(&ErrMsg, Msg); + report_fatal_error(ErrMsg); +} + +unsigned Process::GetRandomNumber() { + HCRYPTPROV HCPC; + if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT)) + ReportLastErrorFatal("Could not acquire a cryptographic context"); + + ScopedCryptContext CryptoProvider(HCPC); + unsigned Ret; + if (!::CryptGenRandom(CryptoProvider, sizeof(Ret), + reinterpret_cast<BYTE *>(&Ret))) + ReportLastErrorFatal("Could not generate a random number"); + return Ret; +} + +typedef NTSTATUS(WINAPI* RtlGetVersionPtr)(PRTL_OSVERSIONINFOW); +#define STATUS_SUCCESS ((NTSTATUS)0x00000000L) + +llvm::VersionTuple llvm::GetWindowsOSVersion() { + HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); + if (hMod) { + auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); + if (getVer) { + RTL_OSVERSIONINFOEXW info{}; + info.dwOSVersionInfoSize = sizeof(info); + if (getVer((PRTL_OSVERSIONINFOW)&info) == STATUS_SUCCESS) { + return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0, + info.dwBuildNumber); + } + } + } + return llvm::VersionTuple(0, 0, 0, 0); +} + +bool llvm::RunningWindows8OrGreater() { + // Windows 8 is version 6.2, service pack 0. + return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0); +} diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc new file mode 100644 index 0000000000000..a23ed95fc3904 --- /dev/null +++ b/llvm/lib/Support/Windows/Program.inc @@ -0,0 +1,523 @@ +//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Program class. +// +//===----------------------------------------------------------------------===// + +#include "WindowsSupport.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/WindowsError.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> +#include <fcntl.h> +#include <io.h> +#include <malloc.h> +#include <numeric> + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +namespace llvm { + +ProcessInfo::ProcessInfo() : Pid(0), Process(0), ReturnCode(0) {} + +ErrorOr<std::string> sys::findProgramByName(StringRef Name, + ArrayRef<StringRef> Paths) { + assert(!Name.empty() && "Must have a name!"); + + if (Name.find_first_of("/\\") != StringRef::npos) + return std::string(Name); + + const wchar_t *Path = nullptr; + std::wstring PathStorage; + if (!Paths.empty()) { + PathStorage.reserve(Paths.size() * MAX_PATH); + for (unsigned i = 0; i < Paths.size(); ++i) { + if (i) + PathStorage.push_back(L';'); + StringRef P = Paths[i]; + SmallVector<wchar_t, MAX_PATH> TmpPath; + if (std::error_code EC = windows::UTF8ToUTF16(P, TmpPath)) + return EC; + PathStorage.append(TmpPath.begin(), TmpPath.end()); + } + Path = PathStorage.c_str(); + } + + SmallVector<wchar_t, MAX_PATH> U16Name; + if (std::error_code EC = windows::UTF8ToUTF16(Name, U16Name)) + return EC; + + SmallVector<StringRef, 12> PathExts; + PathExts.push_back(""); + PathExts.push_back(".exe"); // FIXME: This must be in %PATHEXT%. + if (const char *PathExtEnv = std::getenv("PATHEXT")) + SplitString(PathExtEnv, PathExts, ";"); + + SmallVector<wchar_t, MAX_PATH> U16Result; + DWORD Len = MAX_PATH; + for (StringRef Ext : PathExts) { + SmallVector<wchar_t, MAX_PATH> U16Ext; + if (std::error_code EC = windows::UTF8ToUTF16(Ext, U16Ext)) + return EC; + + do { + U16Result.reserve(Len); + // Lets attach the extension manually. That is needed for files + // with a point in name like aaa.bbb. SearchPathW will not add extension + // from its argument to such files because it thinks they already had one. + SmallVector<wchar_t, MAX_PATH> U16NameExt; + if (std::error_code EC = + windows::UTF8ToUTF16(Twine(Name + Ext).str(), U16NameExt)) + return EC; + + Len = ::SearchPathW(Path, c_str(U16NameExt), nullptr, + U16Result.capacity(), U16Result.data(), nullptr); + } while (Len > U16Result.capacity()); + + if (Len != 0) + break; // Found it. + } + + if (Len == 0) + return mapWindowsError(::GetLastError()); + + U16Result.set_size(Len); + + SmallVector<char, MAX_PATH> U8Result; + if (std::error_code EC = + windows::UTF16ToUTF8(U16Result.data(), U16Result.size(), U8Result)) + return EC; + + return std::string(U8Result.begin(), U8Result.end()); +} + +bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix) { + if (!ErrMsg) + return true; + char *buffer = NULL; + DWORD LastError = GetLastError(); + DWORD R = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, LastError, 0, (LPSTR)&buffer, 1, NULL); + if (R) + *ErrMsg = prefix + ": " + buffer; + else + *ErrMsg = prefix + ": Unknown error"; + *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")"; + + LocalFree(buffer); + return R != 0; +} + +static HANDLE RedirectIO(Optional<StringRef> Path, int fd, + std::string *ErrMsg) { + HANDLE h; + if (!Path) { + if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), + GetCurrentProcess(), &h, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + return INVALID_HANDLE_VALUE; + return h; + } + + std::string fname; + if (Path->empty()) + fname = "NUL"; + else + fname = *Path; + + SECURITY_ATTRIBUTES sa; + sa.nLength = sizeof(sa); + sa.lpSecurityDescriptor = 0; + sa.bInheritHandle = TRUE; + + SmallVector<wchar_t, 128> fnameUnicode; + if (Path->empty()) { + // Don't play long-path tricks on "NUL". + if (windows::UTF8ToUTF16(fname, fnameUnicode)) + return INVALID_HANDLE_VALUE; + } else { + if (path::widenPath(fname, fnameUnicode)) + return INVALID_HANDLE_VALUE; + } + h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) { + MakeErrMsg(ErrMsg, fname + ": Can't open file for " + + (fd ? "input" : "output")); + } + + return h; +} + +} + +static bool Execute(ProcessInfo &PI, StringRef Program, + ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env, + ArrayRef<Optional<StringRef>> Redirects, + unsigned MemoryLimit, std::string *ErrMsg) { + if (!sys::fs::can_execute(Program)) { + if (ErrMsg) + *ErrMsg = "program not executable"; + return false; + } + + // can_execute may succeed by looking at Program + ".exe". CreateProcessW + // will implicitly add the .exe if we provide a command line without an + // executable path, but since we use an explicit executable, we have to add + // ".exe" ourselves. + SmallString<64> ProgramStorage; + if (!sys::fs::exists(Program)) + Program = Twine(Program + ".exe").toStringRef(ProgramStorage); + + // Windows wants a command line, not an array of args, to pass to the new + // process. We have to concatenate them all, while quoting the args that + // have embedded spaces (or are empty). + std::string Command = flattenWindowsCommandLine(Args); + + // The pointer to the environment block for the new process. + std::vector<wchar_t> EnvBlock; + + if (Env) { + // An environment block consists of a null-terminated block of + // null-terminated strings. Convert the array of environment variables to + // an environment block by concatenating them. + for (const auto E : *Env) { + SmallVector<wchar_t, MAX_PATH> EnvString; + if (std::error_code ec = windows::UTF8ToUTF16(E, EnvString)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16"); + return false; + } + + EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end()); + EnvBlock.push_back(0); + } + EnvBlock.push_back(0); + } + + // Create a child process. + STARTUPINFOW si; + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + si.hStdInput = INVALID_HANDLE_VALUE; + si.hStdOutput = INVALID_HANDLE_VALUE; + si.hStdError = INVALID_HANDLE_VALUE; + + if (!Redirects.empty()) { + si.dwFlags = STARTF_USESTDHANDLES; + + si.hStdInput = RedirectIO(Redirects[0], 0, ErrMsg); + if (si.hStdInput == INVALID_HANDLE_VALUE) { + MakeErrMsg(ErrMsg, "can't redirect stdin"); + return false; + } + si.hStdOutput = RedirectIO(Redirects[1], 1, ErrMsg); + if (si.hStdOutput == INVALID_HANDLE_VALUE) { + CloseHandle(si.hStdInput); + MakeErrMsg(ErrMsg, "can't redirect stdout"); + return false; + } + if (Redirects[1] && Redirects[2] && *Redirects[1] == *Redirects[2]) { + // If stdout and stderr should go to the same place, redirect stderr + // to the handle already open for stdout. + if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput, + GetCurrentProcess(), &si.hStdError, + 0, TRUE, DUPLICATE_SAME_ACCESS)) { + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + MakeErrMsg(ErrMsg, "can't dup stderr to stdout"); + return false; + } + } else { + // Just redirect stderr + si.hStdError = RedirectIO(Redirects[2], 2, ErrMsg); + if (si.hStdError == INVALID_HANDLE_VALUE) { + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + MakeErrMsg(ErrMsg, "can't redirect stderr"); + return false; + } + } + } + + PROCESS_INFORMATION pi; + memset(&pi, 0, sizeof(pi)); + + fflush(stdout); + fflush(stderr); + + SmallVector<wchar_t, MAX_PATH> ProgramUtf16; + if (std::error_code ec = path::widenPath(Program, ProgramUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert application name to UTF-16")); + return false; + } + + SmallVector<wchar_t, MAX_PATH> CommandUtf16; + if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert command-line to UTF-16")); + return false; + } + + BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, + TRUE, CREATE_UNICODE_ENVIRONMENT, + EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, + &pi); + DWORD err = GetLastError(); + + // Regardless of whether the process got created or not, we are done with + // the handles we created for it to inherit. + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + CloseHandle(si.hStdError); + + // Now return an error if the process didn't get created. + if (!rc) { + SetLastError(err); + MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") + + Program.str() + "'"); + return false; + } + + PI.Pid = pi.dwProcessId; + PI.Process = pi.hProcess; + + // Make sure these get closed no matter what. + ScopedCommonHandle hThread(pi.hThread); + + // Assign the process to a job if a memory limit is defined. + ScopedJobHandle hJob; + if (MemoryLimit != 0) { + hJob = CreateJobObjectW(0, 0); + bool success = false; + if (hJob) { + JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli; + memset(&jeli, 0, sizeof(jeli)); + jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY; + jeli.ProcessMemoryLimit = uintptr_t(MemoryLimit) * 1048576; + if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation, + &jeli, sizeof(jeli))) { + if (AssignProcessToJobObject(hJob, pi.hProcess)) + success = true; + } + } + if (!success) { + SetLastError(GetLastError()); + MakeErrMsg(ErrMsg, std::string("Unable to set memory limit")); + TerminateProcess(pi.hProcess, 1); + WaitForSingleObject(pi.hProcess, INFINITE); + return false; + } + } + + return true; +} + +static bool argNeedsQuotes(StringRef Arg) { + if (Arg.empty()) + return true; + return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|\n"); +} + +static std::string quoteSingleArg(StringRef Arg) { + std::string Result; + Result.push_back('"'); + + while (!Arg.empty()) { + size_t FirstNonBackslash = Arg.find_first_not_of('\\'); + size_t BackslashCount = FirstNonBackslash; + if (FirstNonBackslash == StringRef::npos) { + // The entire remainder of the argument is backslashes. Escape all of + // them and just early out. + BackslashCount = Arg.size(); + Result.append(BackslashCount * 2, '\\'); + break; + } + + if (Arg[FirstNonBackslash] == '\"') { + // This is an embedded quote. Escape all preceding backslashes, then + // add one additional backslash to escape the quote. + Result.append(BackslashCount * 2 + 1, '\\'); + Result.push_back('\"'); + } else { + // This is just a normal character. Don't escape any of the preceding + // backslashes, just append them as they are and then append the + // character. + Result.append(BackslashCount, '\\'); + Result.push_back(Arg[FirstNonBackslash]); + } + + // Drop all the backslashes, plus the following character. + Arg = Arg.drop_front(FirstNonBackslash + 1); + } + + Result.push_back('"'); + return Result; +} + +namespace llvm { +std::string sys::flattenWindowsCommandLine(ArrayRef<StringRef> Args) { + std::string Command; + for (StringRef Arg : Args) { + if (argNeedsQuotes(Arg)) + Command += quoteSingleArg(Arg); + else + Command += Arg; + + Command.push_back(' '); + } + + return Command; +} + +ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, + bool WaitUntilChildTerminates, std::string *ErrMsg) { + assert(PI.Pid && "invalid pid to wait on, process not started?"); + assert((PI.Process && PI.Process != INVALID_HANDLE_VALUE) && + "invalid process handle to wait on, process not started?"); + DWORD milliSecondsToWait = 0; + if (WaitUntilChildTerminates) + milliSecondsToWait = INFINITE; + else if (SecondsToWait > 0) + milliSecondsToWait = SecondsToWait * 1000; + + ProcessInfo WaitResult = PI; + DWORD WaitStatus = WaitForSingleObject(PI.Process, milliSecondsToWait); + if (WaitStatus == WAIT_TIMEOUT) { + if (SecondsToWait) { + if (!TerminateProcess(PI.Process, 1)) { + if (ErrMsg) + MakeErrMsg(ErrMsg, "Failed to terminate timed-out program"); + + // -2 indicates a crash or timeout as opposed to failure to execute. + WaitResult.ReturnCode = -2; + CloseHandle(PI.Process); + return WaitResult; + } + WaitForSingleObject(PI.Process, INFINITE); + CloseHandle(PI.Process); + } else { + // Non-blocking wait. + return ProcessInfo(); + } + } + + // Get its exit status. + DWORD status; + BOOL rc = GetExitCodeProcess(PI.Process, &status); + DWORD err = GetLastError(); + if (err != ERROR_INVALID_HANDLE) + CloseHandle(PI.Process); + + if (!rc) { + SetLastError(err); + if (ErrMsg) + MakeErrMsg(ErrMsg, "Failed getting status for program"); + + // -2 indicates a crash or timeout as opposed to failure to execute. + WaitResult.ReturnCode = -2; + return WaitResult; + } + + if (!status) + return WaitResult; + + // Pass 10(Warning) and 11(Error) to the callee as negative value. + if ((status & 0xBFFF0000U) == 0x80000000U) + WaitResult.ReturnCode = static_cast<int>(status); + else if (status & 0xFF) + WaitResult.ReturnCode = status & 0x7FFFFFFF; + else + WaitResult.ReturnCode = 1; + + return WaitResult; +} + +std::error_code sys::ChangeStdinToBinary() { + int result = _setmode(_fileno(stdin), _O_BINARY); + if (result == -1) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +std::error_code sys::ChangeStdoutToBinary() { + int result = _setmode(_fileno(stdout), _O_BINARY); + if (result == -1) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + +std::error_code +llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, + WindowsEncodingMethod Encoding) { + std::error_code EC; + llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OF_Text); + if (EC) + return EC; + + if (Encoding == WEM_UTF8) { + OS << Contents; + } else if (Encoding == WEM_CurrentCodePage) { + SmallVector<wchar_t, 1> ArgsUTF16; + SmallVector<char, 1> ArgsCurCP; + + if ((EC = windows::UTF8ToUTF16(Contents, ArgsUTF16))) + return EC; + + if ((EC = windows::UTF16ToCurCP( + ArgsUTF16.data(), ArgsUTF16.size(), ArgsCurCP))) + return EC; + + OS.write(ArgsCurCP.data(), ArgsCurCP.size()); + } else if (Encoding == WEM_UTF16) { + SmallVector<wchar_t, 1> ArgsUTF16; + + if ((EC = windows::UTF8ToUTF16(Contents, ArgsUTF16))) + return EC; + + // Endianness guessing + char BOM[2]; + uint16_t src = UNI_UTF16_BYTE_ORDER_MARK_NATIVE; + memcpy(BOM, &src, 2); + OS.write(BOM, 2); + OS.write((char *)ArgsUTF16.data(), ArgsUTF16.size() << 1); + } else { + llvm_unreachable("Unknown encoding"); + } + + if (OS.has_error()) + return make_error_code(errc::io_error); + + return EC; +} + +bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef<StringRef> Args) { + // The documented max length of the command line passed to CreateProcess. + static const size_t MaxCommandStringLength = 32768; + SmallVector<StringRef, 8> FullArgs; + FullArgs.push_back(Program); + FullArgs.append(Args.begin(), Args.end()); + std::string Result = flattenWindowsCommandLine(FullArgs); + return (Result.size() + 1) <= MaxCommandStringLength; +} +} diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc new file mode 100644 index 0000000000000..d962daf793489 --- /dev/null +++ b/llvm/lib/Support/Windows/Signals.inc @@ -0,0 +1,877 @@ +//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of the Signals class. +// +//===----------------------------------------------------------------------===// +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/WindowsError.h" +#include <algorithm> +#include <io.h> +#include <signal.h> +#include <stdio.h> + +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +// The Windows.h header must be after LLVM and standard headers. +#include "WindowsSupport.h" + +#ifdef __MINGW32__ + #include <imagehlp.h> +#else + #include <crtdbg.h> + #include <dbghelp.h> +#endif +#include <psapi.h> + +#ifdef _MSC_VER + #pragma comment(lib, "psapi.lib") +#elif __MINGW32__ + // The version of g++ that comes with MinGW does *not* properly understand + // the ll format specifier for printf. However, MinGW passes the format + // specifiers on to the MSVCRT entirely, and the CRT understands the ll + // specifier. So these warnings are spurious in this case. Since we compile + // with -Wall, this will generate these warnings which should be ignored. So + // we will turn off the warnings for this just file. However, MinGW also does + // not support push and pop for diagnostics, so we have to manually turn it + // back on at the end of the file. + #pragma GCC diagnostic ignored "-Wformat" + #pragma GCC diagnostic ignored "-Wformat-extra-args" + + #if !defined(__MINGW64_VERSION_MAJOR) + // MinGW.org does not have updated support for the 64-bit versions of the + // DebugHlp APIs. So we will have to load them manually. The structures and + // method signatures were pulled from DbgHelp.h in the Windows Platform SDK, + // and adjusted for brevity. + typedef struct _IMAGEHLP_LINE64 { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD64 Address; + } IMAGEHLP_LINE64, *PIMAGEHLP_LINE64; + + typedef struct _IMAGEHLP_SYMBOL64 { + DWORD SizeOfStruct; + DWORD64 Address; + DWORD Size; + DWORD Flags; + DWORD MaxNameLength; + CHAR Name[1]; + } IMAGEHLP_SYMBOL64, *PIMAGEHLP_SYMBOL64; + + typedef struct _tagADDRESS64 { + DWORD64 Offset; + WORD Segment; + ADDRESS_MODE Mode; + } ADDRESS64, *LPADDRESS64; + + typedef struct _KDHELP64 { + DWORD64 Thread; + DWORD ThCallbackStack; + DWORD ThCallbackBStore; + DWORD NextCallback; + DWORD FramePointer; + DWORD64 KiCallUserMode; + DWORD64 KeUserCallbackDispatcher; + DWORD64 SystemRangeStart; + DWORD64 KiUserExceptionDispatcher; + DWORD64 StackBase; + DWORD64 StackLimit; + DWORD64 Reserved[5]; + } KDHELP64, *PKDHELP64; + + typedef struct _tagSTACKFRAME64 { + ADDRESS64 AddrPC; + ADDRESS64 AddrReturn; + ADDRESS64 AddrFrame; + ADDRESS64 AddrStack; + ADDRESS64 AddrBStore; + PVOID FuncTableEntry; + DWORD64 Params[4]; + BOOL Far; + BOOL Virtual; + DWORD64 Reserved[3]; + KDHELP64 KdHelp; + } STACKFRAME64, *LPSTACKFRAME64; + #endif // !defined(__MINGW64_VERSION_MAJOR) +#endif // __MINGW32__ + +typedef BOOL (__stdcall *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess, + DWORD64 qwBaseAddress, PVOID lpBuffer, DWORD nSize, + LPDWORD lpNumberOfBytesRead); + +typedef PVOID (__stdcall *PFUNCTION_TABLE_ACCESS_ROUTINE64)( HANDLE ahProcess, + DWORD64 AddrBase); + +typedef DWORD64 (__stdcall *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess, + DWORD64 Address); + +typedef DWORD64 (__stdcall *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess, + HANDLE hThread, LPADDRESS64 lpaddr); + +typedef BOOL(WINAPI *fpMiniDumpWriteDump)(HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, + PMINIDUMP_EXCEPTION_INFORMATION, + PMINIDUMP_USER_STREAM_INFORMATION, + PMINIDUMP_CALLBACK_INFORMATION); +static fpMiniDumpWriteDump fMiniDumpWriteDump; + +typedef BOOL (WINAPI *fpStackWalk64)(DWORD, HANDLE, HANDLE, LPSTACKFRAME64, + PVOID, PREAD_PROCESS_MEMORY_ROUTINE64, + PFUNCTION_TABLE_ACCESS_ROUTINE64, + PGET_MODULE_BASE_ROUTINE64, + PTRANSLATE_ADDRESS_ROUTINE64); +static fpStackWalk64 fStackWalk64; + +typedef DWORD64 (WINAPI *fpSymGetModuleBase64)(HANDLE, DWORD64); +static fpSymGetModuleBase64 fSymGetModuleBase64; + +typedef BOOL (WINAPI *fpSymGetSymFromAddr64)(HANDLE, DWORD64, + PDWORD64, PIMAGEHLP_SYMBOL64); +static fpSymGetSymFromAddr64 fSymGetSymFromAddr64; + +typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64, + PDWORD, PIMAGEHLP_LINE64); +static fpSymGetLineFromAddr64 fSymGetLineFromAddr64; + +typedef BOOL(WINAPI *fpSymGetModuleInfo64)(HANDLE hProcess, DWORD64 dwAddr, + PIMAGEHLP_MODULE64 ModuleInfo); +static fpSymGetModuleInfo64 fSymGetModuleInfo64; + +typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64); +static fpSymFunctionTableAccess64 fSymFunctionTableAccess64; + +typedef DWORD (WINAPI *fpSymSetOptions)(DWORD); +static fpSymSetOptions fSymSetOptions; + +typedef BOOL (WINAPI *fpSymInitialize)(HANDLE, PCSTR, BOOL); +static fpSymInitialize fSymInitialize; + +typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID); +static fpEnumerateLoadedModules fEnumerateLoadedModules; + +static bool load64BitDebugHelp(void) { + HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); + if (hLib) { + fMiniDumpWriteDump = (fpMiniDumpWriteDump) + ::GetProcAddress(hLib, "MiniDumpWriteDump"); + fStackWalk64 = (fpStackWalk64) + ::GetProcAddress(hLib, "StackWalk64"); + fSymGetModuleBase64 = (fpSymGetModuleBase64) + ::GetProcAddress(hLib, "SymGetModuleBase64"); + fSymGetSymFromAddr64 = (fpSymGetSymFromAddr64) + ::GetProcAddress(hLib, "SymGetSymFromAddr64"); + fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64) + ::GetProcAddress(hLib, "SymGetLineFromAddr64"); + fSymGetModuleInfo64 = (fpSymGetModuleInfo64) + ::GetProcAddress(hLib, "SymGetModuleInfo64"); + fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64) + ::GetProcAddress(hLib, "SymFunctionTableAccess64"); + fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions"); + fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize"); + fEnumerateLoadedModules = (fpEnumerateLoadedModules) + ::GetProcAddress(hLib, "EnumerateLoadedModules64"); + } + return fStackWalk64 && fSymInitialize && fSymSetOptions && fMiniDumpWriteDump; +} + +using namespace llvm; + +// Forward declare. +static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep); +static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType); + +// The function to call if ctrl-c is pressed. +static void (*InterruptFunction)() = 0; + +static std::vector<std::string> *FilesToRemove = NULL; +static bool RegisteredUnhandledExceptionFilter = false; +static bool CleanupExecuted = false; +static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; + +// Windows creates a new thread to execute the console handler when an event +// (such as CTRL/C) occurs. This causes concurrency issues with the above +// globals which this critical section addresses. +static CRITICAL_SECTION CriticalSection; +static bool CriticalSectionInitialized = false; + +static StringRef Argv0; + +enum { +#if defined(_M_X64) + NativeMachineType = IMAGE_FILE_MACHINE_AMD64 +#elif defined(_M_ARM64) + NativeMachineType = IMAGE_FILE_MACHINE_ARM64 +#elif defined(_M_IX86) + NativeMachineType = IMAGE_FILE_MACHINE_I386 +#elif defined(_M_ARM) + NativeMachineType = IMAGE_FILE_MACHINE_ARMNT +#else + NativeMachineType = IMAGE_FILE_MACHINE_UNKNOWN +#endif +}; + +static bool printStackTraceWithLLVMSymbolizer(llvm::raw_ostream &OS, + HANDLE hProcess, HANDLE hThread, + STACKFRAME64 &StackFrameOrig, + CONTEXT *ContextOrig) { + // StackWalk64 modifies the incoming stack frame and context, so copy them. + STACKFRAME64 StackFrame = StackFrameOrig; + + // Copy the register context so that we don't modify it while we unwind. We + // could use InitializeContext + CopyContext, but that's only required to get + // at AVX registers, which typically aren't needed by StackWalk64. Reduce the + // flag set to indicate that there's less data. + CONTEXT Context = *ContextOrig; + Context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; + + static void *StackTrace[256]; + size_t Depth = 0; + while (fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame, + &Context, 0, fSymFunctionTableAccess64, + fSymGetModuleBase64, 0)) { + if (StackFrame.AddrFrame.Offset == 0) + break; + StackTrace[Depth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset; + if (Depth >= array_lengthof(StackTrace)) + break; + } + + return printSymbolizedStackTrace(Argv0, &StackTrace[0], Depth, OS); +} + +namespace { +struct FindModuleData { + void **StackTrace; + int Depth; + const char **Modules; + intptr_t *Offsets; + StringSaver *StrPool; +}; +} + +static BOOL CALLBACK findModuleCallback(PCSTR ModuleName, + DWORD64 ModuleBase, ULONG ModuleSize, + void *VoidData) { + FindModuleData *Data = (FindModuleData*)VoidData; + intptr_t Beg = ModuleBase; + intptr_t End = Beg + ModuleSize; + for (int I = 0; I < Data->Depth; I++) { + if (Data->Modules[I]) + continue; + intptr_t Addr = (intptr_t)Data->StackTrace[I]; + if (Beg <= Addr && Addr < End) { + Data->Modules[I] = Data->StrPool->save(ModuleName).data(); + Data->Offsets[I] = Addr - Beg; + } + } + return TRUE; +} + +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool) { + if (!fEnumerateLoadedModules) + return false; + FindModuleData Data; + Data.StackTrace = StackTrace; + Data.Depth = Depth; + Data.Modules = Modules; + Data.Offsets = Offsets; + Data.StrPool = &StrPool; + fEnumerateLoadedModules(GetCurrentProcess(), findModuleCallback, &Data); + return true; +} + +static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess, + HANDLE hThread, STACKFRAME64 &StackFrame, + CONTEXT *Context) { + // Initialize the symbol handler. + fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES); + fSymInitialize(hProcess, NULL, TRUE); + + // Try llvm-symbolizer first. llvm-symbolizer knows how to deal with both PDBs + // and DWARF, so it should do a good job regardless of what debug info or + // linker is in use. + if (printStackTraceWithLLVMSymbolizer(OS, hProcess, hThread, StackFrame, + Context)) { + return; + } + + while (true) { + if (!fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame, + Context, 0, fSymFunctionTableAccess64, + fSymGetModuleBase64, 0)) { + break; + } + + if (StackFrame.AddrFrame.Offset == 0) + break; + + using namespace llvm; + // Print the PC in hexadecimal. + DWORD64 PC = StackFrame.AddrPC.Offset; +#if defined(_M_X64) || defined(_M_ARM64) + OS << format("0x%016llX", PC); +#elif defined(_M_IX86) || defined(_M_ARM) + OS << format("0x%08lX", static_cast<DWORD>(PC)); +#endif + +// Print the parameters. Assume there are four. +#if defined(_M_X64) || defined(_M_ARM64) + OS << format(" (0x%016llX 0x%016llX 0x%016llX 0x%016llX)", + StackFrame.Params[0], StackFrame.Params[1], StackFrame.Params[2], + StackFrame.Params[3]); +#elif defined(_M_IX86) || defined(_M_ARM) + OS << format(" (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", + static_cast<DWORD>(StackFrame.Params[0]), + static_cast<DWORD>(StackFrame.Params[1]), + static_cast<DWORD>(StackFrame.Params[2]), + static_cast<DWORD>(StackFrame.Params[3])); +#endif + // Verify the PC belongs to a module in this process. + if (!fSymGetModuleBase64(hProcess, PC)) { + OS << " <unknown module>\n"; + continue; + } + + // Print the symbol name. + char buffer[512]; + IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer); + memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64)); + symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); + symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64); + + DWORD64 dwDisp; + if (!fSymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) { + OS << '\n'; + continue; + } + + buffer[511] = 0; + if (dwDisp > 0) + OS << format(", %s() + 0x%llX bytes(s)", (const char*)symbol->Name, + dwDisp); + else + OS << format(", %s", (const char*)symbol->Name); + + // Print the source file and line number information. + IMAGEHLP_LINE64 line = {}; + DWORD dwLineDisp; + line.SizeOfStruct = sizeof(line); + if (fSymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) { + OS << format(", %s, line %lu", line.FileName, line.LineNumber); + if (dwLineDisp > 0) + OS << format(" + 0x%lX byte(s)", dwLineDisp); + } + + OS << '\n'; + } +} + +namespace llvm { + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only Win32 specific code +//=== and must not be UNIX code +//===----------------------------------------------------------------------===// + +#ifdef _MSC_VER +/// Emulates hitting "retry" from an "abort, retry, ignore" CRT debug report +/// dialog. "retry" raises an exception which ultimately triggers our stack +/// dumper. +static LLVM_ATTRIBUTE_UNUSED int +AvoidMessageBoxHook(int ReportType, char *Message, int *Return) { + // Set *Return to the retry code for the return value of _CrtDbgReport: + // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx + // This may also trigger just-in-time debugging via DebugBreak(). + if (Return) + *Return = 1; + // Don't call _CrtDbgReport. + return TRUE; +} + +#endif + +extern "C" void HandleAbort(int Sig) { + if (Sig == SIGABRT) { + LLVM_BUILTIN_TRAP; + } +} + +static void InitializeThreading() { + if (CriticalSectionInitialized) + return; + + // Now's the time to create the critical section. This is the first time + // through here, and there's only one thread. + InitializeCriticalSection(&CriticalSection); + CriticalSectionInitialized = true; +} + +static void RegisterHandler() { + // If we cannot load up the APIs (which would be unexpected as they should + // exist on every version of Windows we support), we will bail out since + // there would be nothing to report. + if (!load64BitDebugHelp()) { + assert(false && "These APIs should always be available"); + return; + } + + if (RegisteredUnhandledExceptionFilter) { + EnterCriticalSection(&CriticalSection); + return; + } + + InitializeThreading(); + + // Enter it immediately. Now if someone hits CTRL/C, the console handler + // can't proceed until the globals are updated. + EnterCriticalSection(&CriticalSection); + + RegisteredUnhandledExceptionFilter = true; + OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter); + SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE); + + // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or + // else multi-threading problems will ensue. +} + +// The public API +bool sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { + RegisterHandler(); + + if (CleanupExecuted) { + if (ErrMsg) + *ErrMsg = "Process terminating -- cannot register for removal"; + return true; + } + + if (FilesToRemove == NULL) + FilesToRemove = new std::vector<std::string>; + + FilesToRemove->push_back(Filename); + + LeaveCriticalSection(&CriticalSection); + return false; +} + +// The public API +void sys::DontRemoveFileOnSignal(StringRef Filename) { + if (FilesToRemove == NULL) + return; + + RegisterHandler(); + + std::vector<std::string>::reverse_iterator I = + find(reverse(*FilesToRemove), Filename); + if (I != FilesToRemove->rend()) + FilesToRemove->erase(I.base()-1); + + LeaveCriticalSection(&CriticalSection); +} + +void sys::DisableSystemDialogsOnCrash() { + // Crash to stack trace handler on abort. + signal(SIGABRT, HandleAbort); + + // The following functions are not reliably accessible on MinGW. +#ifdef _MSC_VER + // We're already handling writing a "something went wrong" message. + _set_abort_behavior(0, _WRITE_ABORT_MSG); + // Disable Dr. Watson. + _set_abort_behavior(0, _CALL_REPORTFAULT); + _CrtSetReportHook(AvoidMessageBoxHook); +#endif + + // Disable standard error dialog box. + SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX | + SEM_NOOPENFILEERRORBOX); + _set_error_mode(_OUT_TO_STDERR); +} + +/// When an error signal (such as SIGABRT or SIGSEGV) is delivered to the +/// process, print a stack trace and then exit. +void sys::PrintStackTraceOnErrorSignal(StringRef Argv0, + bool DisableCrashReporting) { + ::Argv0 = Argv0; + + if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) + Process::PreventCoreFiles(); + + DisableSystemDialogsOnCrash(); + RegisterHandler(); + LeaveCriticalSection(&CriticalSection); +} +} + +#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR) +// Provide a prototype for RtlCaptureContext, mingw32 from mingw.org is +// missing it but mingw-w64 has it. +extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord); +#endif + +void llvm::sys::PrintStackTrace(raw_ostream &OS) { + STACKFRAME64 StackFrame = {}; + CONTEXT Context = {}; + ::RtlCaptureContext(&Context); +#if defined(_M_X64) + StackFrame.AddrPC.Offset = Context.Rip; + StackFrame.AddrStack.Offset = Context.Rsp; + StackFrame.AddrFrame.Offset = Context.Rbp; +#elif defined(_M_IX86) + StackFrame.AddrPC.Offset = Context.Eip; + StackFrame.AddrStack.Offset = Context.Esp; + StackFrame.AddrFrame.Offset = Context.Ebp; +#elif defined(_M_ARM64) + StackFrame.AddrPC.Offset = Context.Pc; + StackFrame.AddrStack.Offset = Context.Sp; + StackFrame.AddrFrame.Offset = Context.Fp; +#elif defined(_M_ARM) + StackFrame.AddrPC.Offset = Context.Pc; + StackFrame.AddrStack.Offset = Context.Sp; + StackFrame.AddrFrame.Offset = Context.R11; +#endif + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Mode = AddrModeFlat; + PrintStackTraceForThread(OS, GetCurrentProcess(), GetCurrentThread(), + StackFrame, &Context); +} + + +void llvm::sys::SetInterruptFunction(void (*IF)()) { + RegisterHandler(); + InterruptFunction = IF; + LeaveCriticalSection(&CriticalSection); +} + +void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { + // Unimplemented. +} + +void llvm::sys::SetPipeSignalFunction(void (*Handler)()) { + // Unimplemented. +} + +/// Add a function to be called when a signal is delivered to the process. The +/// handler can have a cookie passed to it to identify what instance of the +/// handler it is. +void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr, + void *Cookie) { + insertSignalHandler(FnPtr, Cookie); + RegisterHandler(); + LeaveCriticalSection(&CriticalSection); +} + +static void Cleanup() { + if (CleanupExecuted) + return; + + EnterCriticalSection(&CriticalSection); + + // Prevent other thread from registering new files and directories for + // removal, should we be executing because of the console handler callback. + CleanupExecuted = true; + + // FIXME: open files cannot be deleted. + if (FilesToRemove != NULL) + while (!FilesToRemove->empty()) { + llvm::sys::fs::remove(FilesToRemove->back()); + FilesToRemove->pop_back(); + } + llvm::sys::RunSignalHandlers(); + LeaveCriticalSection(&CriticalSection); +} + +void llvm::sys::RunInterruptHandlers() { + // The interrupt handler may be called from an interrupt, but it may also be + // called manually (such as the case of report_fatal_error with no registered + // error handler). We must ensure that the critical section is properly + // initialized. + InitializeThreading(); + Cleanup(); +} + +/// Find the Windows Registry Key for a given location. +/// +/// \returns a valid HKEY if the location exists, else NULL. +static HKEY FindWERKey(const llvm::Twine &RegistryLocation) { + HKEY Key; + if (ERROR_SUCCESS != ::RegOpenKeyExA(HKEY_LOCAL_MACHINE, + RegistryLocation.str().c_str(), 0, + KEY_QUERY_VALUE | KEY_READ, &Key)) + return NULL; + + return Key; +} + +/// Populate ResultDirectory with the value for "DumpFolder" for a given +/// Windows Registry key. +/// +/// \returns true if a valid value for DumpFolder exists, false otherwise. +static bool GetDumpFolder(HKEY Key, + llvm::SmallVectorImpl<char> &ResultDirectory) { + using llvm::sys::windows::UTF16ToUTF8; + + if (!Key) + return false; + + DWORD BufferLengthBytes = 0; + + if (ERROR_SUCCESS != ::RegGetValueW(Key, 0, L"DumpFolder", REG_EXPAND_SZ, + NULL, NULL, &BufferLengthBytes)) + return false; + + SmallVector<wchar_t, MAX_PATH> Buffer(BufferLengthBytes); + + if (ERROR_SUCCESS != ::RegGetValueW(Key, 0, L"DumpFolder", REG_EXPAND_SZ, + NULL, Buffer.data(), &BufferLengthBytes)) + return false; + + DWORD ExpandBufferSize = ::ExpandEnvironmentStringsW(Buffer.data(), NULL, 0); + + if (!ExpandBufferSize) + return false; + + SmallVector<wchar_t, MAX_PATH> ExpandBuffer(ExpandBufferSize); + + if (ExpandBufferSize != ::ExpandEnvironmentStringsW(Buffer.data(), + ExpandBuffer.data(), + ExpandBufferSize)) + return false; + + if (UTF16ToUTF8(ExpandBuffer.data(), ExpandBufferSize - 1, ResultDirectory)) + return false; + + return true; +} + +/// Populate ResultType with a valid MINIDUMP_TYPE based on the value of +/// "DumpType" for a given Windows Registry key. +/// +/// According to +/// https://msdn.microsoft.com/en-us/library/windows/desktop/bb787181(v=vs.85).aspx +/// valid values for DumpType are: +/// * 0: Custom dump +/// * 1: Mini dump +/// * 2: Full dump +/// If "Custom dump" is specified then the "CustomDumpFlags" field is read +/// containing a bitwise combination of MINIDUMP_TYPE values. +/// +/// \returns true if a valid value for ResultType can be set, false otherwise. +static bool GetDumpType(HKEY Key, MINIDUMP_TYPE &ResultType) { + if (!Key) + return false; + + DWORD DumpType; + DWORD TypeSize = sizeof(DumpType); + if (ERROR_SUCCESS != ::RegGetValueW(Key, NULL, L"DumpType", RRF_RT_REG_DWORD, + NULL, &DumpType, + &TypeSize)) + return false; + + switch (DumpType) { + case 0: { + DWORD Flags = 0; + if (ERROR_SUCCESS != ::RegGetValueW(Key, NULL, L"CustomDumpFlags", + RRF_RT_REG_DWORD, NULL, &Flags, + &TypeSize)) + return false; + + ResultType = static_cast<MINIDUMP_TYPE>(Flags); + break; + } + case 1: + ResultType = MiniDumpNormal; + break; + case 2: + ResultType = MiniDumpWithFullMemory; + break; + default: + return false; + } + return true; +} + +/// Write a Windows dump file containing process information that can be +/// used for post-mortem debugging. +/// +/// \returns zero error code if a mini dump created, actual error code +/// otherwise. +static std::error_code WINAPI +WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) { + using namespace llvm; + using namespace llvm::sys; + + std::string MainExecutableName = fs::getMainExecutable(nullptr, nullptr); + StringRef ProgramName; + + if (MainExecutableName.empty()) { + // If we can't get the executable filename, + // things are in worse shape than we realize + // and we should just bail out. + return mapWindowsError(::GetLastError()); + } + + ProgramName = path::filename(MainExecutableName.c_str()); + + // The Windows Registry location as specified at + // https://msdn.microsoft.com/en-us/library/windows/desktop/bb787181%28v=vs.85%29.aspx + // "Collecting User-Mode Dumps" that may optionally be set to collect crash + // dumps in a specified location. + StringRef LocalDumpsRegistryLocation = + "SOFTWARE\\Microsoft\\Windows\\Windows Error Reporting\\LocalDumps"; + + // The key pointing to the Registry location that may contain global crash + // dump settings. This will be NULL if the location can not be found. + ScopedRegHandle DefaultLocalDumpsKey(FindWERKey(LocalDumpsRegistryLocation)); + + // The key pointing to the Registry location that may contain + // application-specific crash dump settings. This will be NULL if the + // location can not be found. + ScopedRegHandle AppSpecificKey( + FindWERKey(Twine(LocalDumpsRegistryLocation) + "\\" + ProgramName)); + + // Look to see if a dump type is specified in the registry; first with the + // app-specific key and failing that with the global key. If none are found + // default to a normal dump (GetDumpType will return false either if the key + // is NULL or if there is no valid DumpType value at its location). + MINIDUMP_TYPE DumpType; + if (!GetDumpType(AppSpecificKey, DumpType)) + if (!GetDumpType(DefaultLocalDumpsKey, DumpType)) + DumpType = MiniDumpNormal; + + // Look to see if a dump location is specified in the registry; first with the + // app-specific key and failing that with the global key. If none are found + // we'll just create the dump file in the default temporary file location + // (GetDumpFolder will return false either if the key is NULL or if there is + // no valid DumpFolder value at its location). + bool ExplicitDumpDirectorySet = true; + SmallString<MAX_PATH> DumpDirectory; + if (!GetDumpFolder(AppSpecificKey, DumpDirectory)) + if (!GetDumpFolder(DefaultLocalDumpsKey, DumpDirectory)) + ExplicitDumpDirectorySet = false; + + int FD; + SmallString<MAX_PATH> DumpPath; + + if (ExplicitDumpDirectorySet) { + if (std::error_code EC = fs::create_directories(DumpDirectory)) + return EC; + if (std::error_code EC = fs::createUniqueFile( + Twine(DumpDirectory) + "\\" + ProgramName + ".%%%%%%.dmp", FD, + DumpPath)) + return EC; + } else if (std::error_code EC = + fs::createTemporaryFile(ProgramName, "dmp", FD, DumpPath)) + return EC; + + // Our support functions return a file descriptor but Windows wants a handle. + ScopedCommonHandle FileHandle(reinterpret_cast<HANDLE>(_get_osfhandle(FD))); + + if (!fMiniDumpWriteDump(::GetCurrentProcess(), ::GetCurrentProcessId(), + FileHandle, DumpType, ExceptionInfo, NULL, NULL)) + return mapWindowsError(::GetLastError()); + + llvm::errs() << "Wrote crash dump file \"" << DumpPath << "\"\n"; + return std::error_code(); +} + +static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { + Cleanup(); + + // We'll automatically write a Minidump file here to help diagnose + // the nasty sorts of crashes that aren't 100% reproducible from a set of + // inputs (or in the event that the user is unable or unwilling to provide a + // reproducible case). + if (!llvm::sys::Process::AreCoreFilesPrevented()) { + MINIDUMP_EXCEPTION_INFORMATION ExceptionInfo; + ExceptionInfo.ThreadId = ::GetCurrentThreadId(); + ExceptionInfo.ExceptionPointers = ep; + ExceptionInfo.ClientPointers = FALSE; + + if (std::error_code EC = WriteWindowsDumpFile(&ExceptionInfo)) + llvm::errs() << "Could not write crash dump file: " << EC.message() + << "\n"; + } + + // Initialize the STACKFRAME structure. + STACKFRAME64 StackFrame = {}; + +#if defined(_M_X64) + StackFrame.AddrPC.Offset = ep->ContextRecord->Rip; + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp; + StackFrame.AddrFrame.Mode = AddrModeFlat; +#elif defined(_M_IX86) + StackFrame.AddrPC.Offset = ep->ContextRecord->Eip; + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Offset = ep->ContextRecord->Esp; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp; + StackFrame.AddrFrame.Mode = AddrModeFlat; +#elif defined(_M_ARM64) || defined(_M_ARM) + StackFrame.AddrPC.Offset = ep->ContextRecord->Pc; + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Offset = ep->ContextRecord->Sp; + StackFrame.AddrStack.Mode = AddrModeFlat; +#if defined(_M_ARM64) + StackFrame.AddrFrame.Offset = ep->ContextRecord->Fp; +#else + StackFrame.AddrFrame.Offset = ep->ContextRecord->R11; +#endif + StackFrame.AddrFrame.Mode = AddrModeFlat; +#endif + + HANDLE hProcess = GetCurrentProcess(); + HANDLE hThread = GetCurrentThread(); + PrintStackTraceForThread(llvm::errs(), hProcess, hThread, StackFrame, + ep->ContextRecord); + + _exit(ep->ExceptionRecord->ExceptionCode); +} + +static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) { + // We are running in our very own thread, courtesy of Windows. + EnterCriticalSection(&CriticalSection); + Cleanup(); + + // If an interrupt function has been set, go and run one it; otherwise, + // the process dies. + void (*IF)() = InterruptFunction; + InterruptFunction = 0; // Don't run it on another CTRL-C. + + if (IF) { + // Note: if the interrupt function throws an exception, there is nothing + // to catch it in this thread so it will kill the process. + IF(); // Run it now. + LeaveCriticalSection(&CriticalSection); + return TRUE; // Don't kill the process. + } + + // Allow normal processing to take place; i.e., the process dies. + LeaveCriticalSection(&CriticalSection); + return FALSE; +} + +#if __MINGW32__ + // We turned these warnings off for this file so that MinGW-g++ doesn't + // complain about the ll format specifiers used. Now we are turning the + // warnings back on. If MinGW starts to support diagnostic stacks, we can + // replace this with a pop. + #pragma GCC diagnostic warning "-Wformat" + #pragma GCC diagnostic warning "-Wformat-extra-args" +#endif diff --git a/llvm/lib/Support/Windows/ThreadLocal.inc b/llvm/lib/Support/Windows/ThreadLocal.inc new file mode 100644 index 0000000000000..1e0ed955e9abe --- /dev/null +++ b/llvm/lib/Support/Windows/ThreadLocal.inc @@ -0,0 +1,51 @@ +//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Win32 specific (non-pthread) ThreadLocal class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Win32 code that +//=== is guaranteed to work on *all* Win32 variants. +//===----------------------------------------------------------------------===// + +#include "WindowsSupport.h" +#include "llvm/Support/ThreadLocal.h" + +namespace llvm { + +sys::ThreadLocalImpl::ThreadLocalImpl() : data() { + static_assert(sizeof(DWORD) <= sizeof(data), "size too big"); + DWORD* tls = reinterpret_cast<DWORD*>(&data); + *tls = TlsAlloc(); + assert(*tls != TLS_OUT_OF_INDEXES); +} + +sys::ThreadLocalImpl::~ThreadLocalImpl() { + DWORD* tls = reinterpret_cast<DWORD*>(&data); + TlsFree(*tls); +} + +void *sys::ThreadLocalImpl::getInstance() { + DWORD* tls = reinterpret_cast<DWORD*>(&data); + return TlsGetValue(*tls); +} + +void sys::ThreadLocalImpl::setInstance(const void* d){ + DWORD* tls = reinterpret_cast<DWORD*>(&data); + int errorcode = TlsSetValue(*tls, const_cast<void*>(d)); + assert(errorcode != 0); + (void)errorcode; +} + +void sys::ThreadLocalImpl::removeInstance() { + setInstance(0); +} + +} diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc new file mode 100644 index 0000000000000..96649472cc90b --- /dev/null +++ b/llvm/lib/Support/Windows/Threading.inc @@ -0,0 +1,124 @@ +//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the Win32 specific implementation of Threading functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" + +#include "WindowsSupport.h" +#include <process.h> + +// Windows will at times define MemoryFence. +#ifdef MemoryFence +#undef MemoryFence +#endif + +namespace { + struct ThreadInfo { + void(*func)(void*); + void *param; + }; +} + +static unsigned __stdcall ThreadCallback(void *param) { + struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param); + info->func(info->param); + + return 0; +} + +void llvm::llvm_execute_on_thread(void(*Fn)(void*), void *UserData, + unsigned RequestedStackSize) { + struct ThreadInfo param = { Fn, UserData }; + + HANDLE hThread = (HANDLE)::_beginthreadex(NULL, + RequestedStackSize, ThreadCallback, + ¶m, 0, NULL); + + if (hThread) { + // We actually don't care whether the wait succeeds or fails, in + // the same way we don't care whether the pthread_join call succeeds + // or fails. There's not much we could do if this were to fail. But + // on success, this call will wait until the thread finishes executing + // before returning. + (void)::WaitForSingleObject(hThread, INFINITE); + ::CloseHandle(hThread); + } +} + +uint64_t llvm::get_threadid() { + return uint64_t(::GetCurrentThreadId()); +} + +uint32_t llvm::get_max_thread_name_length() { return 0; } + +#if defined(_MSC_VER) +static void SetThreadName(DWORD Id, LPCSTR Name) { + constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; + +#pragma pack(push, 8) + struct THREADNAME_INFO { + DWORD dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to thread name + DWORD dwThreadId; // Thread ID (-1 == current thread) + DWORD dwFlags; // Reserved. Do not use. + }; +#pragma pack(pop) + + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = Name; + info.dwThreadId = Id; + info.dwFlags = 0; + + __try { + ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), + (ULONG_PTR *)&info); + } + __except (EXCEPTION_EXECUTE_HANDLER) { + } +} +#endif + +void llvm::set_thread_name(const Twine &Name) { +#if defined(_MSC_VER) + // Make sure the input is null terminated. + SmallString<64> Storage; + StringRef NameStr = Name.toNullTerminatedStringRef(Storage); + SetThreadName(::GetCurrentThreadId(), NameStr.data()); +#endif +} + +void llvm::get_thread_name(SmallVectorImpl<char> &Name) { + // "Name" is not an inherent property of a thread on Windows. In fact, when + // you "set" the name, you are only firing a one-time message to a debugger + // which it interprets as a program setting its threads' name. We may be + // able to get fancy by creating a TLS entry when someone calls + // set_thread_name so that subsequent calls to get_thread_name return this + // value. + Name.clear(); +} + +SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { + // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority + // Begin background processing mode. The system lowers the resource scheduling + // priorities of the thread so that it can perform background work without + // significantly affecting activity in the foreground. + // End background processing mode. The system restores the resource scheduling + // priorities of the thread as they were before the thread entered background + // processing mode. + return SetThreadPriority(GetCurrentThread(), + Priority == ThreadPriority::Background + ? THREAD_MODE_BACKGROUND_BEGIN + : THREAD_MODE_BACKGROUND_END) + ? SetThreadPriorityResult::SUCCESS + : SetThreadPriorityResult::FAILURE; +} diff --git a/llvm/lib/Support/Windows/Watchdog.inc b/llvm/lib/Support/Windows/Watchdog.inc new file mode 100644 index 0000000000000..a362c999de769 --- /dev/null +++ b/llvm/lib/Support/Windows/Watchdog.inc @@ -0,0 +1,23 @@ +//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the generic Windows implementation of the Watchdog class. +// +//===----------------------------------------------------------------------===// + +// TODO: implement. +// Currently this is only used by PrettyStackTrace which is also unimplemented +// on Windows. Roughly, a Windows implementation would use CreateWaitableTimer +// and a second thread to run the TimerAPCProc. + +namespace llvm { + namespace sys { + Watchdog::Watchdog(unsigned int seconds) {} + Watchdog::~Watchdog() {} + } +} diff --git a/llvm/lib/Support/Windows/WindowsSupport.h b/llvm/lib/Support/Windows/WindowsSupport.h new file mode 100644 index 0000000000000..2e2e97430b76e --- /dev/null +++ b/llvm/lib/Support/Windows/WindowsSupport.h @@ -0,0 +1,235 @@ +//===- WindowsSupport.h - Common Windows Include File -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines things specific to Windows implementations. In addition to +// providing some helpers for working with win32 APIs, this header wraps +// <windows.h> with some portability macros. Always include WindowsSupport.h +// instead of including <windows.h> directly. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Win32 code that +//=== is guaranteed to work on *all* Win32 variants. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_WINDOWSSUPPORT_H +#define LLVM_SUPPORT_WINDOWSSUPPORT_H + +// mingw-w64 tends to define it as 0x0502 in its headers. +#undef _WIN32_WINNT +#undef _WIN32_IE + +// Require at least Windows 7 API. +#define _WIN32_WINNT 0x0601 +#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed. +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/VersionTuple.h" +#include <cassert> +#include <string> +#include <system_error> +#include <windows.h> + +// Must be included after windows.h +#include <wincrypt.h> + +namespace llvm { + +/// Determines if the program is running on Windows 8 or newer. This +/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended +/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't +/// yet have VersionHelpers.h, so we have our own helper. +bool RunningWindows8OrGreater(); + +/// Returns the Windows version as Major.Minor.0.BuildNumber. Uses +/// RtlGetVersion or GetVersionEx under the hood depending on what is available. +/// GetVersionEx is deprecated, but this API exposes the build number which can +/// be useful for working around certain kernel bugs. +llvm::VersionTuple GetWindowsOSVersion(); + +bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix); + +template <typename HandleTraits> +class ScopedHandle { + typedef typename HandleTraits::handle_type handle_type; + handle_type Handle; + + ScopedHandle(const ScopedHandle &other) = delete; + void operator=(const ScopedHandle &other) = delete; +public: + ScopedHandle() + : Handle(HandleTraits::GetInvalid()) {} + + explicit ScopedHandle(handle_type h) + : Handle(h) {} + + ~ScopedHandle() { + if (HandleTraits::IsValid(Handle)) + HandleTraits::Close(Handle); + } + + handle_type take() { + handle_type t = Handle; + Handle = HandleTraits::GetInvalid(); + return t; + } + + ScopedHandle &operator=(handle_type h) { + if (HandleTraits::IsValid(Handle)) + HandleTraits::Close(Handle); + Handle = h; + return *this; + } + + // True if Handle is valid. + explicit operator bool() const { + return HandleTraits::IsValid(Handle) ? true : false; + } + + operator handle_type() const { + return Handle; + } +}; + +struct CommonHandleTraits { + typedef HANDLE handle_type; + + static handle_type GetInvalid() { + return INVALID_HANDLE_VALUE; + } + + static void Close(handle_type h) { + ::CloseHandle(h); + } + + static bool IsValid(handle_type h) { + return h != GetInvalid(); + } +}; + +struct JobHandleTraits : CommonHandleTraits { + static handle_type GetInvalid() { + return NULL; + } +}; + +struct CryptContextTraits : CommonHandleTraits { + typedef HCRYPTPROV handle_type; + + static handle_type GetInvalid() { + return 0; + } + + static void Close(handle_type h) { + ::CryptReleaseContext(h, 0); + } + + static bool IsValid(handle_type h) { + return h != GetInvalid(); + } +}; + +struct RegTraits : CommonHandleTraits { + typedef HKEY handle_type; + + static handle_type GetInvalid() { + return NULL; + } + + static void Close(handle_type h) { + ::RegCloseKey(h); + } + + static bool IsValid(handle_type h) { + return h != GetInvalid(); + } +}; + +struct FindHandleTraits : CommonHandleTraits { + static void Close(handle_type h) { + ::FindClose(h); + } +}; + +struct FileHandleTraits : CommonHandleTraits {}; + +typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle; +typedef ScopedHandle<FileHandleTraits> ScopedFileHandle; +typedef ScopedHandle<CryptContextTraits> ScopedCryptContext; +typedef ScopedHandle<RegTraits> ScopedRegHandle; +typedef ScopedHandle<FindHandleTraits> ScopedFindHandle; +typedef ScopedHandle<JobHandleTraits> ScopedJobHandle; + +template <class T> +class SmallVectorImpl; + +template <class T> +typename SmallVectorImpl<T>::const_pointer +c_str(SmallVectorImpl<T> &str) { + str.push_back(0); + str.pop_back(); + return str.data(); +} + +namespace sys { + +inline std::chrono::nanoseconds toDuration(FILETIME Time) { + ULARGE_INTEGER TimeInteger; + TimeInteger.LowPart = Time.dwLowDateTime; + TimeInteger.HighPart = Time.dwHighDateTime; + + // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond) + return std::chrono::nanoseconds(100 * TimeInteger.QuadPart); +} + +inline TimePoint<> toTimePoint(FILETIME Time) { + ULARGE_INTEGER TimeInteger; + TimeInteger.LowPart = Time.dwLowDateTime; + TimeInteger.HighPart = Time.dwHighDateTime; + + // Adjust for different epoch + TimeInteger.QuadPart -= 11644473600ll * 10000000; + + // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond) + return TimePoint<>(std::chrono::nanoseconds(100 * TimeInteger.QuadPart)); +} + +inline FILETIME toFILETIME(TimePoint<> TP) { + ULARGE_INTEGER TimeInteger; + TimeInteger.QuadPart = TP.time_since_epoch().count() / 100; + TimeInteger.QuadPart += 11644473600ll * 10000000; + + FILETIME Time; + Time.dwLowDateTime = TimeInteger.LowPart; + Time.dwHighDateTime = TimeInteger.HighPart; + return Time; +} + +namespace windows { +// Returns command line arguments. Unlike arguments given to main(), +// this function guarantees that the returned arguments are encoded in +// UTF-8 regardless of the current code page setting. +std::error_code GetCommandLineArguments(SmallVectorImpl<const char *> &Args, + BumpPtrAllocator &Alloc); +} // end namespace windows +} // end namespace sys +} // end namespace llvm. + +#endif diff --git a/llvm/lib/Support/Windows/explicit_symbols.inc b/llvm/lib/Support/Windows/explicit_symbols.inc new file mode 100644 index 0000000000000..0a4fda1d4e8c8 --- /dev/null +++ b/llvm/lib/Support/Windows/explicit_symbols.inc @@ -0,0 +1,96 @@ +/* in libgcc.a */ + +#ifdef HAVE__ALLOCA + EXPLICIT_SYMBOL(_alloca) + EXPLICIT_SYMBOL2(alloca, _alloca) +#endif +#ifdef HAVE___ALLOCA + EXPLICIT_SYMBOL(__alloca) +#endif +#ifdef HAVE___CHKSTK + EXPLICIT_SYMBOL(__chkstk) +#endif +#ifdef HAVE___CHKSTK_MS + EXPLICIT_SYMBOL(__chkstk_ms) +#endif +#ifdef HAVE____CHKSTK + EXPLICIT_SYMBOL(___chkstk) +#endif +#ifdef HAVE____CHKSTK_MS + EXPLICIT_SYMBOL(___chkstk_ms) +#endif +#ifdef HAVE___MAIN + EXPLICIT_SYMBOL(__main) // FIXME: Don't call it. +#endif + +#ifdef HAVE___ASHLDI3 + EXPLICIT_SYMBOL(__ashldi3) +#endif +#ifdef HAVE___ASHRDI3 + EXPLICIT_SYMBOL(__ashrdi3) +#endif +#ifdef HAVE___CMPDI2 // FIXME: unused + EXPLICIT_SYMBOL(__cmpdi2) +#endif +#ifdef HAVE___DIVDI3 + EXPLICIT_SYMBOL(__divdi3) +#endif +#ifdef HAVE___FIXDFDI + EXPLICIT_SYMBOL(__fixdfdi) +#endif +#ifdef HAVE___FIXSFDI + EXPLICIT_SYMBOL(__fixsfdi) +#endif +#ifdef HAVE___FIXUNSDFDI + EXPLICIT_SYMBOL(__fixunsdfdi) +#endif +#ifdef HAVE___FIXUNSSFDI + EXPLICIT_SYMBOL(__fixunssfdi) +#endif +#ifdef HAVE___FLOATDIDF + EXPLICIT_SYMBOL(__floatdidf) +#endif +#ifdef HAVE___FLOATDISF + EXPLICIT_SYMBOL(__floatdisf) +#endif +#ifdef HAVE___LSHRDI3 + EXPLICIT_SYMBOL(__lshrdi3) +#endif +#ifdef HAVE___MODDI3 + EXPLICIT_SYMBOL(__moddi3) +#endif +#ifdef HAVE___UDIVDI3 + EXPLICIT_SYMBOL(__udivdi3) +#endif +#ifdef HAVE___UMODDI3 + EXPLICIT_SYMBOL(__umoddi3) +#endif + +/* msvcrt */ +#if defined(_MSC_VER) + EXPLICIT_SYMBOL2(alloca, _alloca_probe) + +#ifdef _M_IX86 +#define INLINE_DEF_FLOAT_SYMBOL(SYM, ARGC) INLINE_DEF_SYMBOL##ARGC(float, SYM) + INLINE_DEF_FLOAT_SYMBOL(acosf, 1) + INLINE_DEF_FLOAT_SYMBOL(asinf, 1) + INLINE_DEF_FLOAT_SYMBOL(atanf, 1) + INLINE_DEF_FLOAT_SYMBOL(atan2f, 2) + INLINE_DEF_FLOAT_SYMBOL(ceilf, 1) + INLINE_DEF_FLOAT_SYMBOL(cosf, 1) + INLINE_DEF_FLOAT_SYMBOL(coshf, 1) + INLINE_DEF_FLOAT_SYMBOL(expf, 1) + INLINE_DEF_FLOAT_SYMBOL(floorf, 1) + INLINE_DEF_FLOAT_SYMBOL(fmodf, 2) + INLINE_DEF_FLOAT_SYMBOL(logf, 1) + INLINE_DEF_FLOAT_SYMBOL(powf, 2) + INLINE_DEF_FLOAT_SYMBOL(sinf, 1) + INLINE_DEF_FLOAT_SYMBOL(sinhf, 1) + INLINE_DEF_FLOAT_SYMBOL(sqrtf, 1) + INLINE_DEF_FLOAT_SYMBOL(tanf, 1) + INLINE_DEF_FLOAT_SYMBOL(tanhf, 1) + +#undef INLINE_DEF_FLOAT_SYMBOL +#endif + +#endif diff --git a/llvm/lib/Support/WithColor.cpp b/llvm/lib/Support/WithColor.cpp new file mode 100644 index 0000000000000..345dd9cf39492 --- /dev/null +++ b/llvm/lib/Support/WithColor.cpp @@ -0,0 +1,120 @@ +//===- WithColor.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +cl::OptionCategory llvm::ColorCategory("Color Options"); + +static cl::opt<cl::boolOrDefault> + UseColor("color", cl::cat(ColorCategory), + cl::desc("Use colors in output (default=autodetect)"), + cl::init(cl::BOU_UNSET)); + +WithColor::WithColor(raw_ostream &OS, HighlightColor Color, bool DisableColors) + : OS(OS), DisableColors(DisableColors) { + // Detect color from terminal type unless the user passed the --color option. + if (colorsEnabled()) { + switch (Color) { + case HighlightColor::Address: + OS.changeColor(raw_ostream::YELLOW); + break; + case HighlightColor::String: + OS.changeColor(raw_ostream::GREEN); + break; + case HighlightColor::Tag: + OS.changeColor(raw_ostream::BLUE); + break; + case HighlightColor::Attribute: + OS.changeColor(raw_ostream::CYAN); + break; + case HighlightColor::Enumerator: + OS.changeColor(raw_ostream::MAGENTA); + break; + case HighlightColor::Macro: + OS.changeColor(raw_ostream::RED); + break; + case HighlightColor::Error: + OS.changeColor(raw_ostream::RED, true); + break; + case HighlightColor::Warning: + OS.changeColor(raw_ostream::MAGENTA, true); + break; + case HighlightColor::Note: + OS.changeColor(raw_ostream::BLACK, true); + break; + case HighlightColor::Remark: + OS.changeColor(raw_ostream::BLUE, true); + break; + } + } +} + +raw_ostream &WithColor::error() { return error(errs()); } + +raw_ostream &WithColor::warning() { return warning(errs()); } + +raw_ostream &WithColor::note() { return note(errs()); } + +raw_ostream &WithColor::remark() { return remark(errs()); } + +raw_ostream &WithColor::error(raw_ostream &OS, StringRef Prefix, + bool DisableColors) { + if (!Prefix.empty()) + OS << Prefix << ": "; + return WithColor(OS, HighlightColor::Error, DisableColors).get() + << "error: "; +} + +raw_ostream &WithColor::warning(raw_ostream &OS, StringRef Prefix, + bool DisableColors) { + if (!Prefix.empty()) + OS << Prefix << ": "; + return WithColor(OS, HighlightColor::Warning, DisableColors).get() + << "warning: "; +} + +raw_ostream &WithColor::note(raw_ostream &OS, StringRef Prefix, + bool DisableColors) { + if (!Prefix.empty()) + OS << Prefix << ": "; + return WithColor(OS, HighlightColor::Note, DisableColors).get() << "note: "; +} + +raw_ostream &WithColor::remark(raw_ostream &OS, StringRef Prefix, + bool DisableColors) { + if (!Prefix.empty()) + OS << Prefix << ": "; + return WithColor(OS, HighlightColor::Remark, DisableColors).get() + << "remark: "; +} + +bool WithColor::colorsEnabled() { + if (DisableColors) + return false; + if (UseColor == cl::BOU_UNSET) + return OS.has_colors(); + return UseColor == cl::BOU_TRUE; +} + +WithColor &WithColor::changeColor(raw_ostream::Colors Color, bool Bold, + bool BG) { + if (colorsEnabled()) + OS.changeColor(Color, Bold, BG); + return *this; +} + +WithColor &WithColor::resetColor() { + if (colorsEnabled()) + OS.resetColor(); + return *this; +} + +WithColor::~WithColor() { resetColor(); } diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp new file mode 100644 index 0000000000000..9b2fe9c4418a1 --- /dev/null +++ b/llvm/lib/Support/YAMLParser.cpp @@ -0,0 +1,2442 @@ +//===- YAMLParser.cpp - Simple YAML parser --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a YAML parser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YAMLParser.h" +#include "llvm/ADT/AllocatorList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/Unicode.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <map> +#include <memory> +#include <string> +#include <system_error> +#include <utility> + +using namespace llvm; +using namespace yaml; + +enum UnicodeEncodingForm { + UEF_UTF32_LE, ///< UTF-32 Little Endian + UEF_UTF32_BE, ///< UTF-32 Big Endian + UEF_UTF16_LE, ///< UTF-16 Little Endian + UEF_UTF16_BE, ///< UTF-16 Big Endian + UEF_UTF8, ///< UTF-8 or ascii. + UEF_Unknown ///< Not a valid Unicode encoding. +}; + +/// EncodingInfo - Holds the encoding type and length of the byte order mark if +/// it exists. Length is in {0, 2, 3, 4}. +using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; + +/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode +/// encoding form of \a Input. +/// +/// @param Input A string of length 0 or more. +/// @returns An EncodingInfo indicating the Unicode encoding form of the input +/// and how long the byte order mark is if one exists. +static EncodingInfo getUnicodeEncoding(StringRef Input) { + if (Input.empty()) + return std::make_pair(UEF_Unknown, 0); + + switch (uint8_t(Input[0])) { + case 0x00: + if (Input.size() >= 4) { + if ( Input[1] == 0 + && uint8_t(Input[2]) == 0xFE + && uint8_t(Input[3]) == 0xFF) + return std::make_pair(UEF_UTF32_BE, 4); + if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) + return std::make_pair(UEF_UTF32_BE, 0); + } + + if (Input.size() >= 2 && Input[1] != 0) + return std::make_pair(UEF_UTF16_BE, 0); + return std::make_pair(UEF_Unknown, 0); + case 0xFF: + if ( Input.size() >= 4 + && uint8_t(Input[1]) == 0xFE + && Input[2] == 0 + && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 4); + + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) + return std::make_pair(UEF_UTF16_LE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xFE: + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) + return std::make_pair(UEF_UTF16_BE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xEF: + if ( Input.size() >= 3 + && uint8_t(Input[1]) == 0xBB + && uint8_t(Input[2]) == 0xBF) + return std::make_pair(UEF_UTF8, 3); + return std::make_pair(UEF_Unknown, 0); + } + + // It could still be utf-32 or utf-16. + if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 0); + + if (Input.size() >= 2 && Input[1] == 0) + return std::make_pair(UEF_UTF16_LE, 0); + + return std::make_pair(UEF_UTF8, 0); +} + +/// Pin the vtables to this file. +void Node::anchor() {} +void NullNode::anchor() {} +void ScalarNode::anchor() {} +void BlockScalarNode::anchor() {} +void KeyValueNode::anchor() {} +void MappingNode::anchor() {} +void SequenceNode::anchor() {} +void AliasNode::anchor() {} + +namespace llvm { +namespace yaml { + +/// Token - A single YAML token. +struct Token { + enum TokenKind { + TK_Error, // Uninitialized token. + TK_StreamStart, + TK_StreamEnd, + TK_VersionDirective, + TK_TagDirective, + TK_DocumentStart, + TK_DocumentEnd, + TK_BlockEntry, + TK_BlockEnd, + TK_BlockSequenceStart, + TK_BlockMappingStart, + TK_FlowEntry, + TK_FlowSequenceStart, + TK_FlowSequenceEnd, + TK_FlowMappingStart, + TK_FlowMappingEnd, + TK_Key, + TK_Value, + TK_Scalar, + TK_BlockScalar, + TK_Alias, + TK_Anchor, + TK_Tag + } Kind = TK_Error; + + /// A string of length 0 or more whose begin() points to the logical location + /// of the token in the input. + StringRef Range; + + /// The value of a block scalar node. + std::string Value; + + Token() = default; +}; + +} // end namespace yaml +} // end namespace llvm + +using TokenQueueT = BumpPtrList<Token>; + +namespace { + +/// This struct is used to track simple keys. +/// +/// Simple keys are handled by creating an entry in SimpleKeys for each Token +/// which could legally be the start of a simple key. When peekNext is called, +/// if the Token To be returned is referenced by a SimpleKey, we continue +/// tokenizing until that potential simple key has either been found to not be +/// a simple key (we moved on to the next line or went further than 1024 chars). +/// Or when we run into a Value, and then insert a Key token (and possibly +/// others) before the SimpleKey's Tok. +struct SimpleKey { + TokenQueueT::iterator Tok; + unsigned Column; + unsigned Line; + unsigned FlowLevel; + bool IsRequired; + + bool operator ==(const SimpleKey &Other) { + return Tok == Other.Tok; + } +}; + +} // end anonymous namespace + +/// The Unicode scalar value of a UTF-8 minimal well-formed code unit +/// subsequence and the subsequence's length in code units (uint8_t). +/// A length of 0 represents an error. +using UTF8Decoded = std::pair<uint32_t, unsigned>; + +static UTF8Decoded decodeUTF8(StringRef Range) { + StringRef::iterator Position= Range.begin(); + StringRef::iterator End = Range.end(); + // 1 byte: [0x00, 0x7f] + // Bit pattern: 0xxxxxxx + if ((*Position & 0x80) == 0) { + return std::make_pair(*Position, 1); + } + // 2 bytes: [0x80, 0x7ff] + // Bit pattern: 110xxxxx 10xxxxxx + if (Position + 1 != End && + ((*Position & 0xE0) == 0xC0) && + ((*(Position + 1) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x1F) << 6) | + (*(Position + 1) & 0x3F); + if (codepoint >= 0x80) + return std::make_pair(codepoint, 2); + } + // 3 bytes: [0x8000, 0xffff] + // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx + if (Position + 2 != End && + ((*Position & 0xF0) == 0xE0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x0F) << 12) | + ((*(Position + 1) & 0x3F) << 6) | + (*(Position + 2) & 0x3F); + // Codepoints between 0xD800 and 0xDFFF are invalid, as + // they are high / low surrogate halves used by UTF-16. + if (codepoint >= 0x800 && + (codepoint < 0xD800 || codepoint > 0xDFFF)) + return std::make_pair(codepoint, 3); + } + // 4 bytes: [0x10000, 0x10FFFF] + // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + if (Position + 3 != End && + ((*Position & 0xF8) == 0xF0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80) && + ((*(Position + 3) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x07) << 18) | + ((*(Position + 1) & 0x3F) << 12) | + ((*(Position + 2) & 0x3F) << 6) | + (*(Position + 3) & 0x3F); + if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) + return std::make_pair(codepoint, 4); + } + return std::make_pair(0, 0); +} + +namespace llvm { +namespace yaml { + +/// Scans YAML tokens from a MemoryBuffer. +class Scanner { +public: + Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, + std::error_code *EC = nullptr); + Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, + std::error_code *EC = nullptr); + + /// Parse the next token and return it without popping it. + Token &peekNext(); + + /// Parse the next token and pop it from the queue. + Token getNext(); + + void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, + ArrayRef<SMRange> Ranges = None) { + SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); + } + + void setError(const Twine &Message, StringRef::iterator Position) { + if (Current >= End) + Current = End - 1; + + // propagate the error if possible + if (EC) + *EC = make_error_code(std::errc::invalid_argument); + + // Don't print out more errors after the first one we encounter. The rest + // are just the result of the first, and have no meaning. + if (!Failed) + printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); + Failed = true; + } + + void setError(const Twine &Message) { + setError(Message, Current); + } + + /// Returns true if an error occurred while parsing. + bool failed() { + return Failed; + } + +private: + void init(MemoryBufferRef Buffer); + + StringRef currentInput() { + return StringRef(Current, End - Current); + } + + /// Decode a UTF-8 minimal well-formed code unit subsequence starting + /// at \a Position. + /// + /// If the UTF-8 code units starting at Position do not form a well-formed + /// code unit subsequence, then the Unicode scalar value is 0, and the length + /// is 0. + UTF8Decoded decodeUTF8(StringRef::iterator Position) { + return ::decodeUTF8(StringRef(Position, End - Position)); + } + + // The following functions are based on the gramar rules in the YAML spec. The + // style of the function names it meant to closely match how they are written + // in the spec. The number within the [] is the number of the grammar rule in + // the spec. + // + // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. + // + // c- + // A production starting and ending with a special character. + // b- + // A production matching a single line break. + // nb- + // A production starting and ending with a non-break character. + // s- + // A production starting and ending with a white space character. + // ns- + // A production starting and ending with a non-space character. + // l- + // A production matching complete line(s). + + /// Skip a single nb-char[27] starting at Position. + /// + /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] + /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] + /// + /// @returns The code unit after the nb-char, or Position if it's not an + /// nb-char. + StringRef::iterator skip_nb_char(StringRef::iterator Position); + + /// Skip a single b-break[28] starting at Position. + /// + /// A b-break is 0xD 0xA | 0xD | 0xA + /// + /// @returns The code unit after the b-break, or Position if it's not a + /// b-break. + StringRef::iterator skip_b_break(StringRef::iterator Position); + + /// Skip a single s-space[31] starting at Position. + /// + /// An s-space is 0x20 + /// + /// @returns The code unit after the s-space, or Position if it's not a + /// s-space. + StringRef::iterator skip_s_space(StringRef::iterator Position); + + /// Skip a single s-white[33] starting at Position. + /// + /// A s-white is 0x20 | 0x9 + /// + /// @returns The code unit after the s-white, or Position if it's not a + /// s-white. + StringRef::iterator skip_s_white(StringRef::iterator Position); + + /// Skip a single ns-char[34] starting at Position. + /// + /// A ns-char is nb-char - s-white + /// + /// @returns The code unit after the ns-char, or Position if it's not a + /// ns-char. + StringRef::iterator skip_ns_char(StringRef::iterator Position); + + using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); + + /// Skip minimal well-formed code unit subsequences until Func + /// returns its input. + /// + /// @returns The code unit after the last minimal well-formed code unit + /// subsequence that Func accepted. + StringRef::iterator skip_while( SkipWhileFunc Func + , StringRef::iterator Position); + + /// Skip minimal well-formed code unit subsequences until Func returns its + /// input. + void advanceWhile(SkipWhileFunc Func); + + /// Scan ns-uri-char[39]s starting at Cur. + /// + /// This updates Cur and Column while scanning. + void scan_ns_uri_char(); + + /// Consume a minimal well-formed code unit subsequence starting at + /// \a Cur. Return false if it is not the same Unicode scalar value as + /// \a Expected. This updates \a Column. + bool consume(uint32_t Expected); + + /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. + void skip(uint32_t Distance); + + /// Return true if the minimal well-formed code unit subsequence at + /// Pos is whitespace or a new line + bool isBlankOrBreak(StringRef::iterator Position); + + /// Consume a single b-break[28] if it's present at the current position. + /// + /// Return false if the code unit at the current position isn't a line break. + bool consumeLineBreakIfPresent(); + + /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. + void saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired); + + /// Remove simple keys that can no longer be valid simple keys. + /// + /// Invalid simple keys are not on the current line or are further than 1024 + /// columns back. + void removeStaleSimpleKeyCandidates(); + + /// Remove all simple keys on FlowLevel \a Level. + void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); + + /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd + /// tokens if needed. + bool unrollIndent(int ToColumn); + + /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint + /// if needed. + bool rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint); + + /// Skip a single-line comment when the comment starts at the current + /// position of the scanner. + void skipComment(); + + /// Skip whitespace and comments until the start of the next token. + void scanToNextToken(); + + /// Must be the first token generated. + bool scanStreamStart(); + + /// Generate tokens needed to close out the stream. + bool scanStreamEnd(); + + /// Scan a %BLAH directive. + bool scanDirective(); + + /// Scan a ... or ---. + bool scanDocumentIndicator(bool IsStart); + + /// Scan a [ or { and generate the proper flow collection start token. + bool scanFlowCollectionStart(bool IsSequence); + + /// Scan a ] or } and generate the proper flow collection end token. + bool scanFlowCollectionEnd(bool IsSequence); + + /// Scan the , that separates entries in a flow collection. + bool scanFlowEntry(); + + /// Scan the - that starts block sequence entries. + bool scanBlockEntry(); + + /// Scan an explicit ? indicating a key. + bool scanKey(); + + /// Scan an explicit : indicating a value. + bool scanValue(); + + /// Scan a quoted scalar. + bool scanFlowScalar(bool IsDoubleQuoted); + + /// Scan an unquoted scalar. + bool scanPlainScalar(); + + /// Scan an Alias or Anchor starting with * or &. + bool scanAliasOrAnchor(bool IsAlias); + + /// Scan a block scalar starting with | or >. + bool scanBlockScalar(bool IsLiteral); + + /// Scan a chomping indicator in a block scalar header. + char scanBlockChompingIndicator(); + + /// Scan an indentation indicator in a block scalar header. + unsigned scanBlockIndentationIndicator(); + + /// Scan a block scalar header. + /// + /// Return false if an error occurred. + bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, + bool &IsDone); + + /// Look for the indentation level of a block scalar. + /// + /// Return false if an error occurred. + bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, + unsigned &LineBreaks, bool &IsDone); + + /// Scan the indentation of a text line in a block scalar. + /// + /// Return false if an error occurred. + bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, + bool &IsDone); + + /// Scan a tag of the form !stuff. + bool scanTag(); + + /// Dispatch to the next scanning function based on \a *Cur. + bool fetchMoreTokens(); + + /// The SourceMgr used for diagnostics and buffer management. + SourceMgr &SM; + + /// The original input. + MemoryBufferRef InputBuffer; + + /// The current position of the scanner. + StringRef::iterator Current; + + /// The end of the input (one past the last character). + StringRef::iterator End; + + /// Current YAML indentation level in spaces. + int Indent; + + /// Current column number in Unicode code points. + unsigned Column; + + /// Current line number. + unsigned Line; + + /// How deep we are in flow style containers. 0 Means at block level. + unsigned FlowLevel; + + /// Are we at the start of the stream? + bool IsStartOfStream; + + /// Can the next token be the start of a simple key? + bool IsSimpleKeyAllowed; + + /// True if an error has occurred. + bool Failed; + + /// Should colors be used when printing out the diagnostic messages? + bool ShowColors; + + /// Queue of tokens. This is required to queue up tokens while looking + /// for the end of a simple key. And for cases where a single character + /// can produce multiple tokens (e.g. BlockEnd). + TokenQueueT TokenQueue; + + /// Indentation levels. + SmallVector<int, 4> Indents; + + /// Potential simple keys. + SmallVector<SimpleKey, 4> SimpleKeys; + + std::error_code *EC; +}; + +} // end namespace yaml +} // end namespace llvm + +/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. +static void encodeUTF8( uint32_t UnicodeScalarValue + , SmallVectorImpl<char> &Result) { + if (UnicodeScalarValue <= 0x7F) { + Result.push_back(UnicodeScalarValue & 0x7F); + } else if (UnicodeScalarValue <= 0x7FF) { + uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + } else if (UnicodeScalarValue <= 0xFFFF) { + uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + } else if (UnicodeScalarValue <= 0x10FFFF) { + uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + Result.push_back(FourthByte); + } +} + +bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); + switch (T.Kind) { + case Token::TK_StreamStart: + OS << "Stream-Start: "; + break; + case Token::TK_StreamEnd: + OS << "Stream-End: "; + break; + case Token::TK_VersionDirective: + OS << "Version-Directive: "; + break; + case Token::TK_TagDirective: + OS << "Tag-Directive: "; + break; + case Token::TK_DocumentStart: + OS << "Document-Start: "; + break; + case Token::TK_DocumentEnd: + OS << "Document-End: "; + break; + case Token::TK_BlockEntry: + OS << "Block-Entry: "; + break; + case Token::TK_BlockEnd: + OS << "Block-End: "; + break; + case Token::TK_BlockSequenceStart: + OS << "Block-Sequence-Start: "; + break; + case Token::TK_BlockMappingStart: + OS << "Block-Mapping-Start: "; + break; + case Token::TK_FlowEntry: + OS << "Flow-Entry: "; + break; + case Token::TK_FlowSequenceStart: + OS << "Flow-Sequence-Start: "; + break; + case Token::TK_FlowSequenceEnd: + OS << "Flow-Sequence-End: "; + break; + case Token::TK_FlowMappingStart: + OS << "Flow-Mapping-Start: "; + break; + case Token::TK_FlowMappingEnd: + OS << "Flow-Mapping-End: "; + break; + case Token::TK_Key: + OS << "Key: "; + break; + case Token::TK_Value: + OS << "Value: "; + break; + case Token::TK_Scalar: + OS << "Scalar: "; + break; + case Token::TK_BlockScalar: + OS << "Block Scalar: "; + break; + case Token::TK_Alias: + OS << "Alias: "; + break; + case Token::TK_Anchor: + OS << "Anchor: "; + break; + case Token::TK_Tag: + OS << "Tag: "; + break; + case Token::TK_Error: + break; + } + OS << T.Range << "\n"; + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +bool yaml::scanTokens(StringRef Input) { + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +std::string yaml::escape(StringRef Input, bool EscapePrintable) { + std::string EscapedInput; + for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { + if (*i == '\\') + EscapedInput += "\\\\"; + else if (*i == '"') + EscapedInput += "\\\""; + else if (*i == 0) + EscapedInput += "\\0"; + else if (*i == 0x07) + EscapedInput += "\\a"; + else if (*i == 0x08) + EscapedInput += "\\b"; + else if (*i == 0x09) + EscapedInput += "\\t"; + else if (*i == 0x0A) + EscapedInput += "\\n"; + else if (*i == 0x0B) + EscapedInput += "\\v"; + else if (*i == 0x0C) + EscapedInput += "\\f"; + else if (*i == 0x0D) + EscapedInput += "\\r"; + else if (*i == 0x1B) + EscapedInput += "\\e"; + else if ((unsigned char)*i < 0x20) { // Control characters not handled above. + std::string HexStr = utohexstr(*i); + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. + UTF8Decoded UnicodeScalarValue + = decodeUTF8(StringRef(i, Input.end() - i)); + if (UnicodeScalarValue.second == 0) { + // Found invalid char. + SmallString<4> Val; + encodeUTF8(0xFFFD, Val); + EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); + // FIXME: Error reporting. + return EscapedInput; + } + if (UnicodeScalarValue.first == 0x85) + EscapedInput += "\\N"; + else if (UnicodeScalarValue.first == 0xA0) + EscapedInput += "\\_"; + else if (UnicodeScalarValue.first == 0x2028) + EscapedInput += "\\L"; + else if (UnicodeScalarValue.first == 0x2029) + EscapedInput += "\\P"; + else if (!EscapePrintable && + sys::unicode::isPrintable(UnicodeScalarValue.first)) + EscapedInput += StringRef(i, UnicodeScalarValue.second); + else { + std::string HexStr = utohexstr(UnicodeScalarValue.first); + if (HexStr.size() <= 2) + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 4) + EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 8) + EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; + } + i += UnicodeScalarValue.second - 1; + } else + EscapedInput.push_back(*i); + } + return EscapedInput; +} + +Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, + std::error_code *EC) + : SM(sm), ShowColors(ShowColors), EC(EC) { + init(MemoryBufferRef(Input, "YAML")); +} + +Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, + std::error_code *EC) + : SM(SM_), ShowColors(ShowColors), EC(EC) { + init(Buffer); +} + +void Scanner::init(MemoryBufferRef Buffer) { + InputBuffer = Buffer; + Current = InputBuffer.getBufferStart(); + End = InputBuffer.getBufferEnd(); + Indent = -1; + Column = 0; + Line = 0; + FlowLevel = 0; + IsStartOfStream = true; + IsSimpleKeyAllowed = true; + Failed = false; + std::unique_ptr<MemoryBuffer> InputBufferOwner = + MemoryBuffer::getMemBuffer(Buffer); + SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); +} + +Token &Scanner::peekNext() { + // If the current token is a possible simple key, keep parsing until we + // can confirm. + bool NeedMore = false; + while (true) { + if (TokenQueue.empty() || NeedMore) { + if (!fetchMoreTokens()) { + TokenQueue.clear(); + TokenQueue.push_back(Token()); + return TokenQueue.front(); + } + } + assert(!TokenQueue.empty() && + "fetchMoreTokens lied about getting tokens!"); + + removeStaleSimpleKeyCandidates(); + SimpleKey SK; + SK.Tok = TokenQueue.begin(); + if (!is_contained(SimpleKeys, SK)) + break; + else + NeedMore = true; + } + return TokenQueue.front(); +} + +Token Scanner::getNext() { + Token Ret = peekNext(); + // TokenQueue can be empty if there was an error getting the next token. + if (!TokenQueue.empty()) + TokenQueue.pop_front(); + + // There cannot be any referenced Token's if the TokenQueue is empty. So do a + // quick deallocation of them all. + if (TokenQueue.empty()) + TokenQueue.resetAlloc(); + + return Ret; +} + +StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { + if (Position == End) + return Position; + // Check 7 bit c-printable - b-char. + if ( *Position == 0x09 + || (*Position >= 0x20 && *Position <= 0x7E)) + return Position + 1; + + // Check for valid UTF-8. + if (uint8_t(*Position) & 0x80) { + UTF8Decoded u8d = decodeUTF8(Position); + if ( u8d.second != 0 + && u8d.first != 0xFEFF + && ( u8d.first == 0x85 + || ( u8d.first >= 0xA0 + && u8d.first <= 0xD7FF) + || ( u8d.first >= 0xE000 + && u8d.first <= 0xFFFD) + || ( u8d.first >= 0x10000 + && u8d.first <= 0x10FFFF))) + return Position + u8d.second; + } + return Position; +} + +StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == 0x0D) { + if (Position + 1 != End && *(Position + 1) == 0x0A) + return Position + 2; + return Position + 1; + } + + if (*Position == 0x0A) + return Position + 1; + return Position; +} + +StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ') + return Position + 1; + return Position; +} + +StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position + 1; + return Position; +} + +StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position; + return skip_nb_char(Position); +} + +StringRef::iterator Scanner::skip_while( SkipWhileFunc Func + , StringRef::iterator Position) { + while (true) { + StringRef::iterator i = (this->*Func)(Position); + if (i == Position) + break; + Position = i; + } + return Position; +} + +void Scanner::advanceWhile(SkipWhileFunc Func) { + auto Final = skip_while(Func, Current); + Column += Final - Current; + Current = Final; +} + +static bool is_ns_hex_digit(const char C) { + return (C >= '0' && C <= '9') + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +static bool is_ns_word_char(const char C) { + return C == '-' + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +void Scanner::scan_ns_uri_char() { + while (true) { + if (Current == End) + break; + if (( *Current == '%' + && Current + 2 < End + && is_ns_hex_digit(*(Current + 1)) + && is_ns_hex_digit(*(Current + 2))) + || is_ns_word_char(*Current) + || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") + != StringRef::npos) { + ++Current; + ++Column; + } else + break; + } +} + +bool Scanner::consume(uint32_t Expected) { + if (Expected >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (Current == End) + return false; + if (uint8_t(*Current) >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (uint8_t(*Current) == Expected) { + ++Current; + ++Column; + return true; + } + return false; +} + +void Scanner::skip(uint32_t Distance) { + Current += Distance; + Column += Distance; + assert(Current <= End && "Skipped past the end"); +} + +bool Scanner::isBlankOrBreak(StringRef::iterator Position) { + if (Position == End) + return false; + return *Position == ' ' || *Position == '\t' || *Position == '\r' || + *Position == '\n'; +} + +bool Scanner::consumeLineBreakIfPresent() { + auto Next = skip_b_break(Current); + if (Next == Current) + return false; + Column = 0; + ++Line; + Current = Next; + return true; +} + +void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired) { + if (IsSimpleKeyAllowed) { + SimpleKey SK; + SK.Tok = Tok; + SK.Line = Line; + SK.Column = AtColumn; + SK.IsRequired = IsRequired; + SK.FlowLevel = FlowLevel; + SimpleKeys.push_back(SK); + } +} + +void Scanner::removeStaleSimpleKeyCandidates() { + for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); + i != SimpleKeys.end();) { + if (i->Line != Line || i->Column + 1024 < Column) { + if (i->IsRequired) + setError( "Could not find expected : for simple key" + , i->Tok->Range.begin()); + i = SimpleKeys.erase(i); + } else + ++i; + } +} + +void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { + if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) + SimpleKeys.pop_back(); +} + +bool Scanner::unrollIndent(int ToColumn) { + Token T; + // Indentation is ignored in flow. + if (FlowLevel != 0) + return true; + + while (Indent > ToColumn) { + T.Kind = Token::TK_BlockEnd; + T.Range = StringRef(Current, 1); + TokenQueue.push_back(T); + Indent = Indents.pop_back_val(); + } + + return true; +} + +bool Scanner::rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint) { + if (FlowLevel) + return true; + if (Indent < ToColumn) { + Indents.push_back(Indent); + Indent = ToColumn; + + Token T; + T.Kind = Kind; + T.Range = StringRef(Current, 0); + TokenQueue.insert(InsertPoint, T); + } + return true; +} + +void Scanner::skipComment() { + if (*Current != '#') + return; + while (true) { + // This may skip more than one byte, thus Column is only incremented + // for code points. + StringRef::iterator I = skip_nb_char(Current); + if (I == Current) + break; + Current = I; + ++Column; + } +} + +void Scanner::scanToNextToken() { + while (true) { + while (*Current == ' ' || *Current == '\t') { + skip(1); + } + + skipComment(); + + // Skip EOL. + StringRef::iterator i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + ++Line; + Column = 0; + // New lines may start a simple key. + if (!FlowLevel) + IsSimpleKeyAllowed = true; + } +} + +bool Scanner::scanStreamStart() { + IsStartOfStream = false; + + EncodingInfo EI = getUnicodeEncoding(currentInput()); + + Token T; + T.Kind = Token::TK_StreamStart; + T.Range = StringRef(Current, EI.second); + TokenQueue.push_back(T); + Current += EI.second; + return true; +} + +bool Scanner::scanStreamEnd() { + // Force an ending new line if one isn't present. + if (Column != 0) { + Column = 0; + ++Line; + } + + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = Token::TK_StreamEnd; + T.Range = StringRef(Current, 0); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanDirective() { + // Reset the indentation level. + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + StringRef::iterator Start = Current; + consume('%'); + StringRef::iterator NameStart = Current; + Current = skip_while(&Scanner::skip_ns_char, Current); + StringRef Name(NameStart, Current - NameStart); + Current = skip_while(&Scanner::skip_s_white, Current); + + Token T; + if (Name == "YAML") { + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_VersionDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; + } else if(Name == "TAG") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Current = skip_while(&Scanner::skip_s_white, Current); + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_TagDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; + } + return false; +} + +bool Scanner::scanDocumentIndicator(bool IsStart) { + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; + T.Range = StringRef(Current, 3); + skip(3); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanFlowCollectionStart(bool IsSequence) { + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceStart + : Token::TK_FlowMappingStart; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + + // [ and { may begin a simple key. + saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); + + // And may also be followed by a simple key. + IsSimpleKeyAllowed = true; + ++FlowLevel; + return true; +} + +bool Scanner::scanFlowCollectionEnd(bool IsSequence) { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = false; + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceEnd + : Token::TK_FlowMappingEnd; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + if (FlowLevel) + --FlowLevel; + return true; +} + +bool Scanner::scanFlowEntry() { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_FlowEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanBlockEntry() { + rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_BlockEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanKey() { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = !FlowLevel; + + Token T; + T.Kind = Token::TK_Key; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanValue() { + // If the previous token could have been a simple key, insert the key token + // into the token queue. + if (!SimpleKeys.empty()) { + SimpleKey SK = SimpleKeys.pop_back_val(); + Token T; + T.Kind = Token::TK_Key; + T.Range = SK.Tok->Range; + TokenQueueT::iterator i, e; + for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { + if (i == SK.Tok) + break; + } + assert(i != e && "SimpleKey not in token queue!"); + i = TokenQueue.insert(i, T); + + // We may also need to add a Block-Mapping-Start token. + rollIndent(SK.Column, Token::TK_BlockMappingStart, i); + + IsSimpleKeyAllowed = false; + } else { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + IsSimpleKeyAllowed = !FlowLevel; + } + + Token T; + T.Kind = Token::TK_Value; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +// Forbidding inlining improves performance by roughly 20%. +// FIXME: Remove once llvm optimizes this to the faster version without hints. +LLVM_ATTRIBUTE_NOINLINE static bool +wasEscaped(StringRef::iterator First, StringRef::iterator Position); + +// Returns whether a character at 'Position' was escaped with a leading '\'. +// 'First' specifies the position of the first character in the string. +static bool wasEscaped(StringRef::iterator First, + StringRef::iterator Position) { + assert(Position - 1 >= First); + StringRef::iterator I = Position - 1; + // We calculate the number of consecutive '\'s before the current position + // by iterating backwards through our string. + while (I >= First && *I == '\\') --I; + // (Position - 1 - I) now contains the number of '\'s before the current + // position. If it is odd, the character at 'Position' was escaped. + return (Position - 1 - I) % 2 == 1; +} + +bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + if (IsDoubleQuoted) { + do { + ++Current; + while (Current != End && *Current != '"') + ++Current; + // Repeat until the previous character was not a '\' or was an escaped + // backslash. + } while ( Current != End + && *(Current - 1) == '\\' + && wasEscaped(Start + 1, Current)); + } else { + skip(1); + while (true) { + // Skip a ' followed by another '. + if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { + skip(2); + continue; + } else if (*Current == '\'') + break; + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + Column = 0; + ++Line; + } else { + if (i == End) + break; + Current = i; + ++Column; + } + } + } + + if (Current == End) { + setError("Expected quote at end of scalar", Current); + return false; + } + + skip(1); // Skip ending quote. + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanPlainScalar() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + unsigned LeadingBlanks = 0; + assert(Indent >= -1 && "Indent must be >= -1 !"); + unsigned indent = static_cast<unsigned>(Indent + 1); + while (true) { + if (*Current == '#') + break; + + while (!isBlankOrBreak(Current)) { + if ( FlowLevel && *Current == ':' + && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { + setError("Found unexpected ':' while scanning a plain scalar", Current); + return false; + } + + // Check for the end of the plain scalar. + if ( (*Current == ':' && isBlankOrBreak(Current + 1)) + || ( FlowLevel + && (StringRef(Current, 1).find_first_of(",:?[]{}") + != StringRef::npos))) + break; + + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + // Are we at the end? + if (!isBlankOrBreak(Current)) + break; + + // Eat blanks. + StringRef::iterator Tmp = Current; + while (isBlankOrBreak(Tmp)) { + StringRef::iterator i = skip_s_white(Tmp); + if (i != Tmp) { + if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { + setError("Found invalid tab character in indentation", Tmp); + return false; + } + Tmp = i; + ++Column; + } else { + i = skip_b_break(Tmp); + if (!LeadingBlanks) + LeadingBlanks = 1; + Tmp = i; + Column = 0; + ++Line; + } + } + + if (!FlowLevel && Column < indent) + break; + + Current = Tmp; + } + if (Start == Current) { + setError("Got empty plain scalar", Start); + return false; + } + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Plain scalars can be simple keys. + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanAliasOrAnchor(bool IsAlias) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); + while(true) { + if ( *Current == '[' || *Current == ']' + || *Current == '{' || *Current == '}' + || *Current == ',' + || *Current == ':') + break; + StringRef::iterator i = skip_ns_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty alias or anchor", Start); + return false; + } + + Token T; + T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Alias and anchors can be simple keys. + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +char Scanner::scanBlockChompingIndicator() { + char Indicator = ' '; + if (Current != End && (*Current == '+' || *Current == '-')) { + Indicator = *Current; + skip(1); + } + return Indicator; +} + +/// Get the number of line breaks after chomping. +/// +/// Return the number of trailing line breaks to emit, depending on +/// \p ChompingIndicator. +static unsigned getChompedLineBreaks(char ChompingIndicator, + unsigned LineBreaks, StringRef Str) { + if (ChompingIndicator == '-') // Strip all line breaks. + return 0; + if (ChompingIndicator == '+') // Keep all line breaks. + return LineBreaks; + // Clip trailing lines. + return Str.empty() ? 0 : 1; +} + +unsigned Scanner::scanBlockIndentationIndicator() { + unsigned Indent = 0; + if (Current != End && (*Current >= '1' && *Current <= '9')) { + Indent = unsigned(*Current - '0'); + skip(1); + } + return Indent; +} + +bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, + unsigned &IndentIndicator, bool &IsDone) { + auto Start = Current; + + ChompingIndicator = scanBlockChompingIndicator(); + IndentIndicator = scanBlockIndentationIndicator(); + // Check for the chomping indicator once again. + if (ChompingIndicator == ' ') + ChompingIndicator = scanBlockChompingIndicator(); + Current = skip_while(&Scanner::skip_s_white, Current); + skipComment(); + + if (Current == End) { // EOF, we have an empty scalar. + Token T; + T.Kind = Token::TK_BlockScalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + IsDone = true; + return true; + } + + if (!consumeLineBreakIfPresent()) { + setError("Expected a line break after block scalar header", Current); + return false; + } + return true; +} + +bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, + unsigned BlockExitIndent, + unsigned &LineBreaks, bool &IsDone) { + unsigned MaxAllSpaceLineCharacters = 0; + StringRef::iterator LongestAllSpaceLine; + + while (true) { + advanceWhile(&Scanner::skip_s_space); + if (skip_nb_char(Current) != Current) { + // This line isn't empty, so try and find the indentation. + if (Column <= BlockExitIndent) { // End of the block literal. + IsDone = true; + return true; + } + // We found the block's indentation. + BlockIndent = Column; + if (MaxAllSpaceLineCharacters > BlockIndent) { + setError( + "Leading all-spaces line must be smaller than the block indent", + LongestAllSpaceLine); + return false; + } + return true; + } + if (skip_b_break(Current) != Current && + Column > MaxAllSpaceLineCharacters) { + // Record the longest all-space line in case it's longer than the + // discovered block indent. + MaxAllSpaceLineCharacters = Column; + LongestAllSpaceLine = Current; + } + + // Check for EOF. + if (Current == End) { + IsDone = true; + return true; + } + + if (!consumeLineBreakIfPresent()) { + IsDone = true; + return true; + } + ++LineBreaks; + } + return true; +} + +bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, + unsigned BlockExitIndent, bool &IsDone) { + // Skip the indentation. + while (Column < BlockIndent) { + auto I = skip_s_space(Current); + if (I == Current) + break; + Current = I; + ++Column; + } + + if (skip_nb_char(Current) == Current) + return true; + + if (Column <= BlockExitIndent) { // End of the block literal. + IsDone = true; + return true; + } + + if (Column < BlockIndent) { + if (Current != End && *Current == '#') { // Trailing comment. + IsDone = true; + return true; + } + setError("A text line is less indented than the block scalar", Current); + return false; + } + return true; // A normal text line. +} + +bool Scanner::scanBlockScalar(bool IsLiteral) { + // Eat '|' or '>' + assert(*Current == '|' || *Current == '>'); + skip(1); + + char ChompingIndicator; + unsigned BlockIndent; + bool IsDone = false; + if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) + return false; + if (IsDone) + return true; + + auto Start = Current; + unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; + unsigned LineBreaks = 0; + if (BlockIndent == 0) { + if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, + IsDone)) + return false; + } + + // Scan the block's scalars body. + SmallString<256> Str; + while (!IsDone) { + if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) + return false; + if (IsDone) + break; + + // Parse the current line. + auto LineStart = Current; + advanceWhile(&Scanner::skip_nb_char); + if (LineStart != Current) { + Str.append(LineBreaks, '\n'); + Str.append(StringRef(LineStart, Current - LineStart)); + LineBreaks = 0; + } + + // Check for EOF. + if (Current == End) + break; + + if (!consumeLineBreakIfPresent()) + break; + ++LineBreaks; + } + + if (Current == End && !LineBreaks) + // Ensure that there is at least one line break before the end of file. + LineBreaks = 1; + Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); + + // New lines may start a simple key. + if (!FlowLevel) + IsSimpleKeyAllowed = true; + + Token T; + T.Kind = Token::TK_BlockScalar; + T.Range = StringRef(Start, Current - Start); + T.Value = Str.str().str(); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanTag() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); // Eat !. + if (Current == End || isBlankOrBreak(Current)); // An empty tag. + else if (*Current == '<') { + skip(1); + scan_ns_uri_char(); + if (!consume('>')) + return false; + } else { + // FIXME: Actually parse the c-ns-shorthand-tag rule. + Current = skip_while(&Scanner::skip_ns_char, Current); + } + + Token T; + T.Kind = Token::TK_Tag; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Tags can be simple keys. + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::fetchMoreTokens() { + if (IsStartOfStream) + return scanStreamStart(); + + scanToNextToken(); + + if (Current == End) + return scanStreamEnd(); + + removeStaleSimpleKeyCandidates(); + + unrollIndent(Column); + + if (Column == 0 && *Current == '%') + return scanDirective(); + + if (Column == 0 && Current + 4 <= End + && *Current == '-' + && *(Current + 1) == '-' + && *(Current + 2) == '-' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(true); + + if (Column == 0 && Current + 4 <= End + && *Current == '.' + && *(Current + 1) == '.' + && *(Current + 2) == '.' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(false); + + if (*Current == '[') + return scanFlowCollectionStart(true); + + if (*Current == '{') + return scanFlowCollectionStart(false); + + if (*Current == ']') + return scanFlowCollectionEnd(true); + + if (*Current == '}') + return scanFlowCollectionEnd(false); + + if (*Current == ',') + return scanFlowEntry(); + + if (*Current == '-' && isBlankOrBreak(Current + 1)) + return scanBlockEntry(); + + if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanKey(); + + if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanValue(); + + if (*Current == '*') + return scanAliasOrAnchor(true); + + if (*Current == '&') + return scanAliasOrAnchor(false); + + if (*Current == '!') + return scanTag(); + + if (*Current == '|' && !FlowLevel) + return scanBlockScalar(true); + + if (*Current == '>' && !FlowLevel) + return scanBlockScalar(false); + + if (*Current == '\'') + return scanFlowScalar(false); + + if (*Current == '"') + return scanFlowScalar(true); + + // Get a plain scalar. + StringRef FirstChar(Current, 1); + if (!(isBlankOrBreak(Current) + || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) + || (*Current == '-' && !isBlankOrBreak(Current + 1)) + || (!FlowLevel && (*Current == '?' || *Current == ':') + && isBlankOrBreak(Current + 1)) + || (!FlowLevel && *Current == ':' + && Current + 2 < End + && *(Current + 1) == ':' + && !isBlankOrBreak(Current + 2))) + return scanPlainScalar(); + + setError("Unrecognized character while tokenizing."); + return false; +} + +Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, + std::error_code *EC) + : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} + +Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, + std::error_code *EC) + : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} + +Stream::~Stream() = default; + +bool Stream::failed() { return scanner->failed(); } + +void Stream::printError(Node *N, const Twine &Msg) { + scanner->printError( N->getSourceRange().Start + , SourceMgr::DK_Error + , Msg + , N->getSourceRange()); +} + +document_iterator Stream::begin() { + if (CurrentDoc) + report_fatal_error("Can only iterate over the stream once"); + + // Skip Stream-Start. + scanner->getNext(); + + CurrentDoc.reset(new Document(*this)); + return document_iterator(CurrentDoc); +} + +document_iterator Stream::end() { + return document_iterator(); +} + +void Stream::skip() { + for (document_iterator i = begin(), e = end(); i != e; ++i) + i->skip(); +} + +Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, + StringRef T) + : Doc(D), TypeID(Type), Anchor(A), Tag(T) { + SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); + SourceRange = SMRange(Start, Start); +} + +std::string Node::getVerbatimTag() const { + StringRef Raw = getRawTag(); + if (!Raw.empty() && Raw != "!") { + std::string Ret; + if (Raw.find_last_of('!') == 0) { + Ret = Doc->getTagMap().find("!")->second; + Ret += Raw.substr(1); + return Ret; + } else if (Raw.startswith("!!")) { + Ret = Doc->getTagMap().find("!!")->second; + Ret += Raw.substr(2); + return Ret; + } else { + StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); + std::map<StringRef, StringRef>::const_iterator It = + Doc->getTagMap().find(TagHandle); + if (It != Doc->getTagMap().end()) + Ret = It->second; + else { + Token T; + T.Kind = Token::TK_Tag; + T.Range = TagHandle; + setError(Twine("Unknown tag handle ") + TagHandle, T); + } + Ret += Raw.substr(Raw.find_last_of('!') + 1); + return Ret; + } + } + + switch (getType()) { + case NK_Null: + return "tag:yaml.org,2002:null"; + case NK_Scalar: + case NK_BlockScalar: + // TODO: Tag resolution. + return "tag:yaml.org,2002:str"; + case NK_Mapping: + return "tag:yaml.org,2002:map"; + case NK_Sequence: + return "tag:yaml.org,2002:seq"; + } + + return ""; +} + +Token &Node::peekNext() { + return Doc->peekNext(); +} + +Token Node::getNext() { + return Doc->getNext(); +} + +Node *Node::parseBlockNode() { + return Doc->parseBlockNode(); +} + +BumpPtrAllocator &Node::getAllocator() { + return Doc->NodeAllocator; +} + +void Node::setError(const Twine &Msg, Token &Tok) const { + Doc->setError(Msg, Tok); +} + +bool Node::failed() const { + return Doc->failed(); +} + +StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { + // TODO: Handle newlines properly. We need to remove leading whitespace. + if (Value[0] == '"') { // Double quoted. + // Pull off the leading and trailing "s. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + // Search for characters that would require unescaping the value. + StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); + if (i != StringRef::npos) + return unescapeDoubleQuoted(UnquotedValue, i, Storage); + return UnquotedValue; + } else if (Value[0] == '\'') { // Single quoted. + // Pull off the leading and trailing 's. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + StringRef::size_type i = UnquotedValue.find('\''); + if (i != StringRef::npos) { + // We're going to need Storage. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + Storage.push_back('\''); + UnquotedValue = UnquotedValue.substr(i + 2); + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); + } + return UnquotedValue; + } + // Plain or block. + return Value.rtrim(' '); +} + +StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue + , StringRef::size_type i + , SmallVectorImpl<char> &Storage) + const { + // Use Storage to build proper value. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { + // Insert all previous chars into Storage. + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + // Chop off inserted chars. + UnquotedValue = UnquotedValue.substr(i); + + assert(!UnquotedValue.empty() && "Can't be empty!"); + + // Parse escape or line break. + switch (UnquotedValue[0]) { + case '\r': + case '\n': + Storage.push_back('\n'); + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + UnquotedValue = UnquotedValue.substr(1); + break; + default: + if (UnquotedValue.size() == 1) + // TODO: Report error. + break; + UnquotedValue = UnquotedValue.substr(1); + switch (UnquotedValue[0]) { + default: { + Token T; + T.Range = StringRef(UnquotedValue.begin(), 1); + setError("Unrecognized escape code!", T); + return ""; + } + case '\r': + case '\n': + // Remove the new line. + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + // If this was just a single byte newline, it will get skipped + // below. + break; + case '0': + Storage.push_back(0x00); + break; + case 'a': + Storage.push_back(0x07); + break; + case 'b': + Storage.push_back(0x08); + break; + case 't': + case 0x09: + Storage.push_back(0x09); + break; + case 'n': + Storage.push_back(0x0A); + break; + case 'v': + Storage.push_back(0x0B); + break; + case 'f': + Storage.push_back(0x0C); + break; + case 'r': + Storage.push_back(0x0D); + break; + case 'e': + Storage.push_back(0x1B); + break; + case ' ': + Storage.push_back(0x20); + break; + case '"': + Storage.push_back(0x22); + break; + case '/': + Storage.push_back(0x2F); + break; + case '\\': + Storage.push_back(0x5C); + break; + case 'N': + encodeUTF8(0x85, Storage); + break; + case '_': + encodeUTF8(0xA0, Storage); + break; + case 'L': + encodeUTF8(0x2028, Storage); + break; + case 'P': + encodeUTF8(0x2029, Storage); + break; + case 'x': { + if (UnquotedValue.size() < 3) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(2); + break; + } + case 'u': { + if (UnquotedValue.size() < 5) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(4); + break; + } + case 'U': { + if (UnquotedValue.size() < 9) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(8); + break; + } + } + UnquotedValue = UnquotedValue.substr(1); + } + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); +} + +Node *KeyValueNode::getKey() { + if (Key) + return Key; + // Handle implicit null keys. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_Value + || t.Kind == Token::TK_Error) { + return Key = new (getAllocator()) NullNode(Doc); + } + if (t.Kind == Token::TK_Key) + getNext(); // skip TK_Key. + } + + // Handle explicit null keys. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { + return Key = new (getAllocator()) NullNode(Doc); + } + + // We've got a normal key. + return Key = parseBlockNode(); +} + +Node *KeyValueNode::getValue() { + if (Value) + return Value; + getKey()->skip(); + if (failed()) + return Value = new (getAllocator()) NullNode(Doc); + + // Handle implicit null values. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_FlowMappingEnd + || t.Kind == Token::TK_Key + || t.Kind == Token::TK_FlowEntry + || t.Kind == Token::TK_Error) { + return Value = new (getAllocator()) NullNode(Doc); + } + + if (t.Kind != Token::TK_Value) { + setError("Unexpected token in Key Value.", t); + return Value = new (getAllocator()) NullNode(Doc); + } + getNext(); // skip TK_Value. + } + + // Handle explicit null values. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { + return Value = new (getAllocator()) NullNode(Doc); + } + + // We got a normal value. + return Value = parseBlockNode(); +} + +void MappingNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = nullptr; + return; + } + if (CurrentEntry) { + CurrentEntry->skip(); + if (Type == MT_Inline) { + IsAtEnd = true; + CurrentEntry = nullptr; + return; + } + } + Token T = peekNext(); + if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { + // KeyValueNode eats the TK_Key. That way it can detect null keys. + CurrentEntry = new (getAllocator()) KeyValueNode(Doc); + } else if (Type == MT_Block) { + switch (T.Kind) { + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = nullptr; + break; + default: + setError("Unexpected token. Expected Key or Block End", T); + LLVM_FALLTHROUGH; + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = nullptr; + } + } else { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + return increment(); + case Token::TK_FlowMappingEnd: + getNext(); + LLVM_FALLTHROUGH; + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = nullptr; + break; + default: + setError( "Unexpected token. Expected Key, Flow Entry, or Flow " + "Mapping End." + , T); + IsAtEnd = true; + CurrentEntry = nullptr; + } + } +} + +void SequenceNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = nullptr; + return; + } + if (CurrentEntry) + CurrentEntry->skip(); + Token T = peekNext(); + if (SeqType == ST_Block) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (!CurrentEntry) { // An error occurred. + IsAtEnd = true; + CurrentEntry = nullptr; + } + break; + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = nullptr; + break; + default: + setError( "Unexpected token. Expected Block Entry or Block End." + , T); + LLVM_FALLTHROUGH; + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = nullptr; + } + } else if (SeqType == ST_Indentless) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (!CurrentEntry) { // An error occurred. + IsAtEnd = true; + CurrentEntry = nullptr; + } + break; + default: + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = nullptr; + } + } else if (SeqType == ST_Flow) { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + WasPreviousTokenFlowEntry = true; + return increment(); + case Token::TK_FlowSequenceEnd: + getNext(); + LLVM_FALLTHROUGH; + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = nullptr; + break; + case Token::TK_StreamEnd: + case Token::TK_DocumentEnd: + case Token::TK_DocumentStart: + setError("Could not find closing ]!", T); + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = nullptr; + break; + default: + if (!WasPreviousTokenFlowEntry) { + setError("Expected , between entries!", T); + IsAtEnd = true; + CurrentEntry = nullptr; + break; + } + // Otherwise it must be a flow entry. + CurrentEntry = parseBlockNode(); + if (!CurrentEntry) { + IsAtEnd = true; + } + WasPreviousTokenFlowEntry = false; + break; + } + } +} + +Document::Document(Stream &S) : stream(S), Root(nullptr) { + // Tag maps starts with two default mappings. + TagMap["!"] = "!"; + TagMap["!!"] = "tag:yaml.org,2002:"; + + if (parseDirectives()) + expectToken(Token::TK_DocumentStart); + Token &T = peekNext(); + if (T.Kind == Token::TK_DocumentStart) + getNext(); +} + +bool Document::skip() { + if (stream.scanner->failed()) + return false; + if (!Root) + getRoot(); + Root->skip(); + Token &T = peekNext(); + if (T.Kind == Token::TK_StreamEnd) + return false; + if (T.Kind == Token::TK_DocumentEnd) { + getNext(); + return skip(); + } + return true; +} + +Token &Document::peekNext() { + return stream.scanner->peekNext(); +} + +Token Document::getNext() { + return stream.scanner->getNext(); +} + +void Document::setError(const Twine &Message, Token &Location) const { + stream.scanner->setError(Message, Location.Range.begin()); +} + +bool Document::failed() const { + return stream.scanner->failed(); +} + +Node *Document::parseBlockNode() { + Token T = peekNext(); + // Handle properties. + Token AnchorInfo; + Token TagInfo; +parse_property: + switch (T.Kind) { + case Token::TK_Alias: + getNext(); + return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); + case Token::TK_Anchor: + if (AnchorInfo.Kind == Token::TK_Anchor) { + setError("Already encountered an anchor for this node!", T); + return nullptr; + } + AnchorInfo = getNext(); // Consume TK_Anchor. + T = peekNext(); + goto parse_property; + case Token::TK_Tag: + if (TagInfo.Kind == Token::TK_Tag) { + setError("Already encountered a tag for this node!", T); + return nullptr; + } + TagInfo = getNext(); // Consume TK_Tag. + T = peekNext(); + goto parse_property; + default: + break; + } + + switch (T.Kind) { + case Token::TK_BlockEntry: + // We got an unindented BlockEntry sequence. This is not terminated with + // a BlockEnd. + // Don't eat the TK_BlockEntry, SequenceNode needs it. + return new (NodeAllocator) SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , SequenceNode::ST_Indentless); + case Token::TK_BlockSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , SequenceNode::ST_Block); + case Token::TK_BlockMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , MappingNode::MT_Block); + case Token::TK_FlowSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , SequenceNode::ST_Flow); + case Token::TK_FlowMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , MappingNode::MT_Flow); + case Token::TK_Scalar: + getNext(); + return new (NodeAllocator) + ScalarNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , T.Range); + case Token::TK_BlockScalar: { + getNext(); + StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); + StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); + return new (NodeAllocator) + BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), + TagInfo.Range, StrCopy, T.Range); + } + case Token::TK_Key: + // Don't eat the TK_Key, KeyValueNode expects it. + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , TagInfo.Range + , MappingNode::MT_Inline); + case Token::TK_DocumentStart: + case Token::TK_DocumentEnd: + case Token::TK_StreamEnd: + default: + // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not + // !!null null. + return new (NodeAllocator) NullNode(stream.CurrentDoc); + case Token::TK_Error: + return nullptr; + } + llvm_unreachable("Control flow shouldn't reach here."); + return nullptr; +} + +bool Document::parseDirectives() { + bool isDirective = false; + while (true) { + Token T = peekNext(); + if (T.Kind == Token::TK_TagDirective) { + parseTAGDirective(); + isDirective = true; + } else if (T.Kind == Token::TK_VersionDirective) { + parseYAMLDirective(); + isDirective = true; + } else + break; + } + return isDirective; +} + +void Document::parseYAMLDirective() { + getNext(); // Eat %YAML <version> +} + +void Document::parseTAGDirective() { + Token Tag = getNext(); // %TAG <handle> <prefix> + StringRef T = Tag.Range; + // Strip %TAG + T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); + std::size_t HandleEnd = T.find_first_of(" \t"); + StringRef TagHandle = T.substr(0, HandleEnd); + StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); + TagMap[TagHandle] = TagPrefix; +} + +bool Document::expectToken(int TK) { + Token T = getNext(); + if (T.Kind != TK) { + setError("Unexpected token", T); + return false; + } + return true; +} diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp new file mode 100644 index 0000000000000..eba22fd14725a --- /dev/null +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -0,0 +1,1088 @@ +//===- lib/Support/YAMLTraits.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YAMLTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Unicode.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> + +using namespace llvm; +using namespace yaml; + +//===----------------------------------------------------------------------===// +// IO +//===----------------------------------------------------------------------===// + +IO::IO(void *Context) : Ctxt(Context) {} + +IO::~IO() = default; + +void *IO::getContext() const { + return Ctxt; +} + +void IO::setContext(void *Context) { + Ctxt = Context; +} + +//===----------------------------------------------------------------------===// +// Input +//===----------------------------------------------------------------------===// + +Input::Input(StringRef InputContent, void *Ctxt, + SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt) + : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)) { + if (DiagHandler) + SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); + DocIterator = Strm->begin(); +} + +Input::Input(MemoryBufferRef Input, void *Ctxt, + SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt) + : IO(Ctxt), Strm(new Stream(Input, SrcMgr, false, &EC)) { + if (DiagHandler) + SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); + DocIterator = Strm->begin(); +} + +Input::~Input() = default; + +std::error_code Input::error() { return EC; } + +// Pin the vtables to this file. +void Input::HNode::anchor() {} +void Input::EmptyHNode::anchor() {} +void Input::ScalarHNode::anchor() {} +void Input::MapHNode::anchor() {} +void Input::SequenceHNode::anchor() {} + +bool Input::outputting() const { + return false; +} + +bool Input::setCurrentDocument() { + if (DocIterator != Strm->end()) { + Node *N = DocIterator->getRoot(); + if (!N) { + assert(Strm->failed() && "Root is NULL iff parsing failed"); + EC = make_error_code(errc::invalid_argument); + return false; + } + + if (isa<NullNode>(N)) { + // Empty files are allowed and ignored + ++DocIterator; + return setCurrentDocument(); + } + TopNode = createHNodes(N); + CurrentNode = TopNode.get(); + return true; + } + return false; +} + +bool Input::nextDocument() { + return ++DocIterator != Strm->end(); +} + +const Node *Input::getCurrentNode() const { + return CurrentNode ? CurrentNode->_node : nullptr; +} + +bool Input::mapTag(StringRef Tag, bool Default) { + // CurrentNode can be null if setCurrentDocument() was unable to + // parse the document because it was invalid or empty. + if (!CurrentNode) + return false; + + std::string foundTag = CurrentNode->_node->getVerbatimTag(); + if (foundTag.empty()) { + // If no tag found and 'Tag' is the default, say it was found. + return Default; + } + // Return true iff found tag matches supplied tag. + return Tag.equals(foundTag); +} + +void Input::beginMapping() { + if (EC) + return; + // CurrentNode can be null if the document is empty. + MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode); + if (MN) { + MN->ValidKeys.clear(); + } +} + +std::vector<StringRef> Input::keys() { + MapHNode *MN = dyn_cast<MapHNode>(CurrentNode); + std::vector<StringRef> Ret; + if (!MN) { + setError(CurrentNode, "not a mapping"); + return Ret; + } + for (auto &P : MN->Mapping) + Ret.push_back(P.first()); + return Ret; +} + +bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault, + void *&SaveInfo) { + UseDefault = false; + if (EC) + return false; + + // CurrentNode is null for empty documents, which is an error in case required + // nodes are present. + if (!CurrentNode) { + if (Required) + EC = make_error_code(errc::invalid_argument); + return false; + } + + MapHNode *MN = dyn_cast<MapHNode>(CurrentNode); + if (!MN) { + if (Required || !isa<EmptyHNode>(CurrentNode)) + setError(CurrentNode, "not a mapping"); + return false; + } + MN->ValidKeys.push_back(Key); + HNode *Value = MN->Mapping[Key].get(); + if (!Value) { + if (Required) + setError(CurrentNode, Twine("missing required key '") + Key + "'"); + else + UseDefault = true; + return false; + } + SaveInfo = CurrentNode; + CurrentNode = Value; + return true; +} + +void Input::postflightKey(void *saveInfo) { + CurrentNode = reinterpret_cast<HNode *>(saveInfo); +} + +void Input::endMapping() { + if (EC) + return; + // CurrentNode can be null if the document is empty. + MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode); + if (!MN) + return; + for (const auto &NN : MN->Mapping) { + if (!is_contained(MN->ValidKeys, NN.first())) { + setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'"); + break; + } + } +} + +void Input::beginFlowMapping() { beginMapping(); } + +void Input::endFlowMapping() { endMapping(); } + +unsigned Input::beginSequence() { + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) + return SQ->Entries.size(); + if (isa<EmptyHNode>(CurrentNode)) + return 0; + // Treat case where there's a scalar "null" value as an empty sequence. + if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) { + if (isNull(SN->value())) + return 0; + } + // Any other type of HNode is an error. + setError(CurrentNode, "not a sequence"); + return 0; +} + +void Input::endSequence() { +} + +bool Input::preflightElement(unsigned Index, void *&SaveInfo) { + if (EC) + return false; + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { + SaveInfo = CurrentNode; + CurrentNode = SQ->Entries[Index].get(); + return true; + } + return false; +} + +void Input::postflightElement(void *SaveInfo) { + CurrentNode = reinterpret_cast<HNode *>(SaveInfo); +} + +unsigned Input::beginFlowSequence() { return beginSequence(); } + +bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) { + if (EC) + return false; + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { + SaveInfo = CurrentNode; + CurrentNode = SQ->Entries[index].get(); + return true; + } + return false; +} + +void Input::postflightFlowElement(void *SaveInfo) { + CurrentNode = reinterpret_cast<HNode *>(SaveInfo); +} + +void Input::endFlowSequence() { +} + +void Input::beginEnumScalar() { + ScalarMatchFound = false; +} + +bool Input::matchEnumScalar(const char *Str, bool) { + if (ScalarMatchFound) + return false; + if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) { + if (SN->value().equals(Str)) { + ScalarMatchFound = true; + return true; + } + } + return false; +} + +bool Input::matchEnumFallback() { + if (ScalarMatchFound) + return false; + ScalarMatchFound = true; + return true; +} + +void Input::endEnumScalar() { + if (!ScalarMatchFound) { + setError(CurrentNode, "unknown enumerated scalar"); + } +} + +bool Input::beginBitSetScalar(bool &DoClear) { + BitValuesUsed.clear(); + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { + BitValuesUsed.insert(BitValuesUsed.begin(), SQ->Entries.size(), false); + } else { + setError(CurrentNode, "expected sequence of bit values"); + } + DoClear = true; + return true; +} + +bool Input::bitSetMatch(const char *Str, bool) { + if (EC) + return false; + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { + unsigned Index = 0; + for (auto &N : SQ->Entries) { + if (ScalarHNode *SN = dyn_cast<ScalarHNode>(N.get())) { + if (SN->value().equals(Str)) { + BitValuesUsed[Index] = true; + return true; + } + } else { + setError(CurrentNode, "unexpected scalar in sequence of bit values"); + } + ++Index; + } + } else { + setError(CurrentNode, "expected sequence of bit values"); + } + return false; +} + +void Input::endBitSetScalar() { + if (EC) + return; + if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) { + assert(BitValuesUsed.size() == SQ->Entries.size()); + for (unsigned i = 0; i < SQ->Entries.size(); ++i) { + if (!BitValuesUsed[i]) { + setError(SQ->Entries[i].get(), "unknown bit value"); + return; + } + } + } +} + +void Input::scalarString(StringRef &S, QuotingType) { + if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) { + S = SN->value(); + } else { + setError(CurrentNode, "unexpected scalar"); + } +} + +void Input::blockScalarString(StringRef &S) { scalarString(S, QuotingType::None); } + +void Input::scalarTag(std::string &Tag) { + Tag = CurrentNode->_node->getVerbatimTag(); +} + +void Input::setError(HNode *hnode, const Twine &message) { + assert(hnode && "HNode must not be NULL"); + setError(hnode->_node, message); +} + +NodeKind Input::getNodeKind() { + if (isa<ScalarHNode>(CurrentNode)) + return NodeKind::Scalar; + else if (isa<MapHNode>(CurrentNode)) + return NodeKind::Map; + else if (isa<SequenceHNode>(CurrentNode)) + return NodeKind::Sequence; + llvm_unreachable("Unsupported node kind"); +} + +void Input::setError(Node *node, const Twine &message) { + Strm->printError(node, message); + EC = make_error_code(errc::invalid_argument); +} + +std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) { + SmallString<128> StringStorage; + if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) { + StringRef KeyStr = SN->getValue(StringStorage); + if (!StringStorage.empty()) { + // Copy string to permanent storage + KeyStr = StringStorage.str().copy(StringAllocator); + } + return std::make_unique<ScalarHNode>(N, KeyStr); + } else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) { + StringRef ValueCopy = BSN->getValue().copy(StringAllocator); + return std::make_unique<ScalarHNode>(N, ValueCopy); + } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) { + auto SQHNode = std::make_unique<SequenceHNode>(N); + for (Node &SN : *SQ) { + auto Entry = createHNodes(&SN); + if (EC) + break; + SQHNode->Entries.push_back(std::move(Entry)); + } + return std::move(SQHNode); + } else if (MappingNode *Map = dyn_cast<MappingNode>(N)) { + auto mapHNode = std::make_unique<MapHNode>(N); + for (KeyValueNode &KVN : *Map) { + Node *KeyNode = KVN.getKey(); + ScalarNode *Key = dyn_cast<ScalarNode>(KeyNode); + Node *Value = KVN.getValue(); + if (!Key || !Value) { + if (!Key) + setError(KeyNode, "Map key must be a scalar"); + if (!Value) + setError(KeyNode, "Map value must not be empty"); + break; + } + StringStorage.clear(); + StringRef KeyStr = Key->getValue(StringStorage); + if (!StringStorage.empty()) { + // Copy string to permanent storage + KeyStr = StringStorage.str().copy(StringAllocator); + } + auto ValueHNode = createHNodes(Value); + if (EC) + break; + mapHNode->Mapping[KeyStr] = std::move(ValueHNode); + } + return std::move(mapHNode); + } else if (isa<NullNode>(N)) { + return std::make_unique<EmptyHNode>(N); + } else { + setError(N, "unknown node kind"); + return nullptr; + } +} + +void Input::setError(const Twine &Message) { + setError(CurrentNode, Message); +} + +bool Input::canElideEmptySequence() { + return false; +} + +//===----------------------------------------------------------------------===// +// Output +//===----------------------------------------------------------------------===// + +Output::Output(raw_ostream &yout, void *context, int WrapColumn) + : IO(context), Out(yout), WrapColumn(WrapColumn) {} + +Output::~Output() = default; + +bool Output::outputting() const { + return true; +} + +void Output::beginMapping() { + StateStack.push_back(inMapFirstKey); + PaddingBeforeContainer = Padding; + Padding = "\n"; +} + +bool Output::mapTag(StringRef Tag, bool Use) { + if (Use) { + // If this tag is being written inside a sequence we should write the start + // of the sequence before writing the tag, otherwise the tag won't be + // attached to the element in the sequence, but rather the sequence itself. + bool SequenceElement = false; + if (StateStack.size() > 1) { + auto &E = StateStack[StateStack.size() - 2]; + SequenceElement = inSeqAnyElement(E) || inFlowSeqAnyElement(E); + } + if (SequenceElement && StateStack.back() == inMapFirstKey) { + newLineCheck(); + } else { + output(" "); + } + output(Tag); + if (SequenceElement) { + // If we're writing the tag during the first element of a map, the tag + // takes the place of the first element in the sequence. + if (StateStack.back() == inMapFirstKey) { + StateStack.pop_back(); + StateStack.push_back(inMapOtherKey); + } + // Tags inside maps in sequences should act as keys in the map from a + // formatting perspective, so we always want a newline in a sequence. + Padding = "\n"; + } + } + return Use; +} + +void Output::endMapping() { + // If we did not map anything, we should explicitly emit an empty map + if (StateStack.back() == inMapFirstKey) { + Padding = PaddingBeforeContainer; + newLineCheck(); + output("{}"); + Padding = "\n"; + } + StateStack.pop_back(); +} + +std::vector<StringRef> Output::keys() { + report_fatal_error("invalid call"); +} + +bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault, + bool &UseDefault, void *&) { + UseDefault = false; + if (Required || !SameAsDefault || WriteDefaultValues) { + auto State = StateStack.back(); + if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) { + flowKey(Key); + } else { + newLineCheck(); + paddedKey(Key); + } + return true; + } + return false; +} + +void Output::postflightKey(void *) { + if (StateStack.back() == inMapFirstKey) { + StateStack.pop_back(); + StateStack.push_back(inMapOtherKey); + } else if (StateStack.back() == inFlowMapFirstKey) { + StateStack.pop_back(); + StateStack.push_back(inFlowMapOtherKey); + } +} + +void Output::beginFlowMapping() { + StateStack.push_back(inFlowMapFirstKey); + newLineCheck(); + ColumnAtMapFlowStart = Column; + output("{ "); +} + +void Output::endFlowMapping() { + StateStack.pop_back(); + outputUpToEndOfLine(" }"); +} + +void Output::beginDocuments() { + outputUpToEndOfLine("---"); +} + +bool Output::preflightDocument(unsigned index) { + if (index > 0) + outputUpToEndOfLine("\n---"); + return true; +} + +void Output::postflightDocument() { +} + +void Output::endDocuments() { + output("\n...\n"); +} + +unsigned Output::beginSequence() { + StateStack.push_back(inSeqFirstElement); + PaddingBeforeContainer = Padding; + Padding = "\n"; + return 0; +} + +void Output::endSequence() { + // If we did not emit anything, we should explicitly emit an empty sequence + if (StateStack.back() == inSeqFirstElement) { + Padding = PaddingBeforeContainer; + newLineCheck(); + output("[]"); + Padding = "\n"; + } + StateStack.pop_back(); +} + +bool Output::preflightElement(unsigned, void *&) { + return true; +} + +void Output::postflightElement(void *) { + if (StateStack.back() == inSeqFirstElement) { + StateStack.pop_back(); + StateStack.push_back(inSeqOtherElement); + } else if (StateStack.back() == inFlowSeqFirstElement) { + StateStack.pop_back(); + StateStack.push_back(inFlowSeqOtherElement); + } +} + +unsigned Output::beginFlowSequence() { + StateStack.push_back(inFlowSeqFirstElement); + newLineCheck(); + ColumnAtFlowStart = Column; + output("[ "); + NeedFlowSequenceComma = false; + return 0; +} + +void Output::endFlowSequence() { + StateStack.pop_back(); + outputUpToEndOfLine(" ]"); +} + +bool Output::preflightFlowElement(unsigned, void *&) { + if (NeedFlowSequenceComma) + output(", "); + if (WrapColumn && Column > WrapColumn) { + output("\n"); + for (int i = 0; i < ColumnAtFlowStart; ++i) + output(" "); + Column = ColumnAtFlowStart; + output(" "); + } + return true; +} + +void Output::postflightFlowElement(void *) { + NeedFlowSequenceComma = true; +} + +void Output::beginEnumScalar() { + EnumerationMatchFound = false; +} + +bool Output::matchEnumScalar(const char *Str, bool Match) { + if (Match && !EnumerationMatchFound) { + newLineCheck(); + outputUpToEndOfLine(Str); + EnumerationMatchFound = true; + } + return false; +} + +bool Output::matchEnumFallback() { + if (EnumerationMatchFound) + return false; + EnumerationMatchFound = true; + return true; +} + +void Output::endEnumScalar() { + if (!EnumerationMatchFound) + llvm_unreachable("bad runtime enum value"); +} + +bool Output::beginBitSetScalar(bool &DoClear) { + newLineCheck(); + output("[ "); + NeedBitValueComma = false; + DoClear = false; + return true; +} + +bool Output::bitSetMatch(const char *Str, bool Matches) { + if (Matches) { + if (NeedBitValueComma) + output(", "); + output(Str); + NeedBitValueComma = true; + } + return false; +} + +void Output::endBitSetScalar() { + outputUpToEndOfLine(" ]"); +} + +void Output::scalarString(StringRef &S, QuotingType MustQuote) { + newLineCheck(); + if (S.empty()) { + // Print '' for the empty string because leaving the field empty is not + // allowed. + outputUpToEndOfLine("''"); + return; + } + if (MustQuote == QuotingType::None) { + // Only quote if we must. + outputUpToEndOfLine(S); + return; + } + + const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\""; + output(Quote); // Starting quote. + + // When using double-quoted strings (and only in that case), non-printable characters may be + // present, and will be escaped using a variety of unicode-scalar and special short-form + // escapes. This is handled in yaml::escape. + if (MustQuote == QuotingType::Double) { + output(yaml::escape(S, /* EscapePrintable= */ false)); + outputUpToEndOfLine(Quote); + return; + } + + unsigned i = 0; + unsigned j = 0; + unsigned End = S.size(); + const char *Base = S.data(); + + // When using single-quoted strings, any single quote ' must be doubled to be escaped. + while (j < End) { + if (S[j] == '\'') { // Escape quotes. + output(StringRef(&Base[i], j - i)); // "flush". + output(StringLiteral("''")); // Print it as '' + i = j + 1; + } + ++j; + } + output(StringRef(&Base[i], j - i)); + outputUpToEndOfLine(Quote); // Ending quote. +} + +void Output::blockScalarString(StringRef &S) { + if (!StateStack.empty()) + newLineCheck(); + output(" |"); + outputNewLine(); + + unsigned Indent = StateStack.empty() ? 1 : StateStack.size(); + + auto Buffer = MemoryBuffer::getMemBuffer(S, "", false); + for (line_iterator Lines(*Buffer, false); !Lines.is_at_end(); ++Lines) { + for (unsigned I = 0; I < Indent; ++I) { + output(" "); + } + output(*Lines); + outputNewLine(); + } +} + +void Output::scalarTag(std::string &Tag) { + if (Tag.empty()) + return; + newLineCheck(); + output(Tag); + output(" "); +} + +void Output::setError(const Twine &message) { +} + +bool Output::canElideEmptySequence() { + // Normally, with an optional key/value where the value is an empty sequence, + // the whole key/value can be not written. But, that produces wrong yaml + // if the key/value is the only thing in the map and the map is used in + // a sequence. This detects if the this sequence is the first key/value + // in map that itself is embedded in a sequnce. + if (StateStack.size() < 2) + return true; + if (StateStack.back() != inMapFirstKey) + return true; + return !inSeqAnyElement(StateStack[StateStack.size() - 2]); +} + +void Output::output(StringRef s) { + Column += s.size(); + Out << s; +} + +void Output::outputUpToEndOfLine(StringRef s) { + output(s); + if (StateStack.empty() || (!inFlowSeqAnyElement(StateStack.back()) && + !inFlowMapAnyKey(StateStack.back()))) + Padding = "\n"; +} + +void Output::outputNewLine() { + Out << "\n"; + Column = 0; +} + +// if seq at top, indent as if map, then add "- " +// if seq in middle, use "- " if firstKey, else use " " +// + +void Output::newLineCheck() { + if (Padding != "\n") { + output(Padding); + Padding = {}; + return; + } + outputNewLine(); + Padding = {}; + + if (StateStack.size() == 0) + return; + + unsigned Indent = StateStack.size() - 1; + bool OutputDash = false; + + if (StateStack.back() == inSeqFirstElement || + StateStack.back() == inSeqOtherElement) { + OutputDash = true; + } else if ((StateStack.size() > 1) && + ((StateStack.back() == inMapFirstKey) || + inFlowSeqAnyElement(StateStack.back()) || + (StateStack.back() == inFlowMapFirstKey)) && + inSeqAnyElement(StateStack[StateStack.size() - 2])) { + --Indent; + OutputDash = true; + } + + for (unsigned i = 0; i < Indent; ++i) { + output(" "); + } + if (OutputDash) { + output("- "); + } + +} + +void Output::paddedKey(StringRef key) { + output(key); + output(":"); + const char *spaces = " "; + if (key.size() < strlen(spaces)) + Padding = &spaces[key.size()]; + else + Padding = " "; +} + +void Output::flowKey(StringRef Key) { + if (StateStack.back() == inFlowMapOtherKey) + output(", "); + if (WrapColumn && Column > WrapColumn) { + output("\n"); + for (int I = 0; I < ColumnAtMapFlowStart; ++I) + output(" "); + Column = ColumnAtMapFlowStart; + output(" "); + } + output(Key); + output(": "); +} + +NodeKind Output::getNodeKind() { report_fatal_error("invalid call"); } + +bool Output::inSeqAnyElement(InState State) { + return State == inSeqFirstElement || State == inSeqOtherElement; +} + +bool Output::inFlowSeqAnyElement(InState State) { + return State == inFlowSeqFirstElement || State == inFlowSeqOtherElement; +} + +bool Output::inMapAnyKey(InState State) { + return State == inMapFirstKey || State == inMapOtherKey; +} + +bool Output::inFlowMapAnyKey(InState State) { + return State == inFlowMapFirstKey || State == inFlowMapOtherKey; +} + +//===----------------------------------------------------------------------===// +// traits for built-in types +//===----------------------------------------------------------------------===// + +void ScalarTraits<bool>::output(const bool &Val, void *, raw_ostream &Out) { + Out << (Val ? "true" : "false"); +} + +StringRef ScalarTraits<bool>::input(StringRef Scalar, void *, bool &Val) { + if (Scalar.equals("true")) { + Val = true; + return StringRef(); + } else if (Scalar.equals("false")) { + Val = false; + return StringRef(); + } + return "invalid boolean"; +} + +void ScalarTraits<StringRef>::output(const StringRef &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<StringRef>::input(StringRef Scalar, void *, + StringRef &Val) { + Val = Scalar; + return StringRef(); +} + +void ScalarTraits<std::string>::output(const std::string &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<std::string>::input(StringRef Scalar, void *, + std::string &Val) { + Val = Scalar.str(); + return StringRef(); +} + +void ScalarTraits<uint8_t>::output(const uint8_t &Val, void *, + raw_ostream &Out) { + // use temp uin32_t because ostream thinks uint8_t is a character + uint32_t Num = Val; + Out << Num; +} + +StringRef ScalarTraits<uint8_t>::input(StringRef Scalar, void *, uint8_t &Val) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 0, n)) + return "invalid number"; + if (n > 0xFF) + return "out of range number"; + Val = n; + return StringRef(); +} + +void ScalarTraits<uint16_t>::output(const uint16_t &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<uint16_t>::input(StringRef Scalar, void *, + uint16_t &Val) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 0, n)) + return "invalid number"; + if (n > 0xFFFF) + return "out of range number"; + Val = n; + return StringRef(); +} + +void ScalarTraits<uint32_t>::output(const uint32_t &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<uint32_t>::input(StringRef Scalar, void *, + uint32_t &Val) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 0, n)) + return "invalid number"; + if (n > 0xFFFFFFFFUL) + return "out of range number"; + Val = n; + return StringRef(); +} + +void ScalarTraits<uint64_t>::output(const uint64_t &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<uint64_t>::input(StringRef Scalar, void *, + uint64_t &Val) { + unsigned long long N; + if (getAsUnsignedInteger(Scalar, 0, N)) + return "invalid number"; + Val = N; + return StringRef(); +} + +void ScalarTraits<int8_t>::output(const int8_t &Val, void *, raw_ostream &Out) { + // use temp in32_t because ostream thinks int8_t is a character + int32_t Num = Val; + Out << Num; +} + +StringRef ScalarTraits<int8_t>::input(StringRef Scalar, void *, int8_t &Val) { + long long N; + if (getAsSignedInteger(Scalar, 0, N)) + return "invalid number"; + if ((N > 127) || (N < -128)) + return "out of range number"; + Val = N; + return StringRef(); +} + +void ScalarTraits<int16_t>::output(const int16_t &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<int16_t>::input(StringRef Scalar, void *, int16_t &Val) { + long long N; + if (getAsSignedInteger(Scalar, 0, N)) + return "invalid number"; + if ((N > INT16_MAX) || (N < INT16_MIN)) + return "out of range number"; + Val = N; + return StringRef(); +} + +void ScalarTraits<int32_t>::output(const int32_t &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<int32_t>::input(StringRef Scalar, void *, int32_t &Val) { + long long N; + if (getAsSignedInteger(Scalar, 0, N)) + return "invalid number"; + if ((N > INT32_MAX) || (N < INT32_MIN)) + return "out of range number"; + Val = N; + return StringRef(); +} + +void ScalarTraits<int64_t>::output(const int64_t &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits<int64_t>::input(StringRef Scalar, void *, int64_t &Val) { + long long N; + if (getAsSignedInteger(Scalar, 0, N)) + return "invalid number"; + Val = N; + return StringRef(); +} + +void ScalarTraits<double>::output(const double &Val, void *, raw_ostream &Out) { + Out << format("%g", Val); +} + +StringRef ScalarTraits<double>::input(StringRef Scalar, void *, double &Val) { + if (to_float(Scalar, Val)) + return StringRef(); + return "invalid floating point number"; +} + +void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) { + Out << format("%g", Val); +} + +StringRef ScalarTraits<float>::input(StringRef Scalar, void *, float &Val) { + if (to_float(Scalar, Val)) + return StringRef(); + return "invalid floating point number"; +} + +void ScalarTraits<Hex8>::output(const Hex8 &Val, void *, raw_ostream &Out) { + uint8_t Num = Val; + Out << format("0x%02X", Num); +} + +StringRef ScalarTraits<Hex8>::input(StringRef Scalar, void *, Hex8 &Val) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 0, n)) + return "invalid hex8 number"; + if (n > 0xFF) + return "out of range hex8 number"; + Val = n; + return StringRef(); +} + +void ScalarTraits<Hex16>::output(const Hex16 &Val, void *, raw_ostream &Out) { + uint16_t Num = Val; + Out << format("0x%04X", Num); +} + +StringRef ScalarTraits<Hex16>::input(StringRef Scalar, void *, Hex16 &Val) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 0, n)) + return "invalid hex16 number"; + if (n > 0xFFFF) + return "out of range hex16 number"; + Val = n; + return StringRef(); +} + +void ScalarTraits<Hex32>::output(const Hex32 &Val, void *, raw_ostream &Out) { + uint32_t Num = Val; + Out << format("0x%08X", Num); +} + +StringRef ScalarTraits<Hex32>::input(StringRef Scalar, void *, Hex32 &Val) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 0, n)) + return "invalid hex32 number"; + if (n > 0xFFFFFFFFUL) + return "out of range hex32 number"; + Val = n; + return StringRef(); +} + +void ScalarTraits<Hex64>::output(const Hex64 &Val, void *, raw_ostream &Out) { + uint64_t Num = Val; + Out << format("0x%016llX", Num); +} + +StringRef ScalarTraits<Hex64>::input(StringRef Scalar, void *, Hex64 &Val) { + unsigned long long Num; + if (getAsUnsignedInteger(Scalar, 0, Num)) + return "invalid hex64 number"; + Val = Num; + return StringRef(); +} diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp new file mode 100644 index 0000000000000..a83d0f441a4bd --- /dev/null +++ b/llvm/lib/Support/Z3Solver.cpp @@ -0,0 +1,900 @@ +//== Z3Solver.cpp -----------------------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" +#include "llvm/Support/SMTAPI.h" +#include <set> + +using namespace llvm; + +#if LLVM_WITH_Z3 + +#include <z3.h> + +namespace { + +/// Configuration class for Z3 +class Z3Config { + friend class Z3Context; + + Z3_config Config; + +public: + Z3Config() : Config(Z3_mk_config()) { + // Enable model finding + Z3_set_param_value(Config, "model", "true"); + // Disable proof generation + Z3_set_param_value(Config, "proof", "false"); + // Set timeout to 15000ms = 15s + Z3_set_param_value(Config, "timeout", "15000"); + } + + ~Z3Config() { Z3_del_config(Config); } +}; // end class Z3Config + +// Function used to report errors +void Z3ErrorHandler(Z3_context Context, Z3_error_code Error) { + llvm::report_fatal_error("Z3 error: " + + llvm::Twine(Z3_get_error_msg(Context, Error))); +} + +/// Wrapper for Z3 context +class Z3Context { +public: + Z3_context Context; + + Z3Context() { + Context = Z3_mk_context_rc(Z3Config().Config); + // The error function is set here because the context is the first object + // created by the backend + Z3_set_error_handler(Context, Z3ErrorHandler); + } + + virtual ~Z3Context() { + Z3_del_context(Context); + Context = nullptr; + } +}; // end class Z3Context + +/// Wrapper for Z3 Sort +class Z3Sort : public SMTSort { + friend class Z3Solver; + + Z3Context &Context; + + Z3_sort Sort; + +public: + /// Default constructor, mainly used by make_shared + Z3Sort(Z3Context &C, Z3_sort ZS) : Context(C), Sort(ZS) { + Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort)); + } + + /// Override implicit copy constructor for correct reference counting. + Z3Sort(const Z3Sort &Other) : Context(Other.Context), Sort(Other.Sort) { + Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort)); + } + + /// Override implicit copy assignment constructor for correct reference + /// counting. + Z3Sort &operator=(const Z3Sort &Other) { + Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Other.Sort)); + Z3_dec_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort)); + Sort = Other.Sort; + return *this; + } + + Z3Sort(Z3Sort &&Other) = delete; + Z3Sort &operator=(Z3Sort &&Other) = delete; + + ~Z3Sort() { + if (Sort) + Z3_dec_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort)); + } + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.AddInteger( + Z3_get_ast_id(Context.Context, reinterpret_cast<Z3_ast>(Sort))); + } + + bool isBitvectorSortImpl() const override { + return (Z3_get_sort_kind(Context.Context, Sort) == Z3_BV_SORT); + } + + bool isFloatSortImpl() const override { + return (Z3_get_sort_kind(Context.Context, Sort) == Z3_FLOATING_POINT_SORT); + } + + bool isBooleanSortImpl() const override { + return (Z3_get_sort_kind(Context.Context, Sort) == Z3_BOOL_SORT); + } + + unsigned getBitvectorSortSizeImpl() const override { + return Z3_get_bv_sort_size(Context.Context, Sort); + } + + unsigned getFloatSortSizeImpl() const override { + return Z3_fpa_get_ebits(Context.Context, Sort) + + Z3_fpa_get_sbits(Context.Context, Sort); + } + + bool equal_to(SMTSort const &Other) const override { + return Z3_is_eq_sort(Context.Context, Sort, + static_cast<const Z3Sort &>(Other).Sort); + } + + void print(raw_ostream &OS) const override { + OS << Z3_sort_to_string(Context.Context, Sort); + } +}; // end class Z3Sort + +static const Z3Sort &toZ3Sort(const SMTSort &S) { + return static_cast<const Z3Sort &>(S); +} + +class Z3Expr : public SMTExpr { + friend class Z3Solver; + + Z3Context &Context; + + Z3_ast AST; + +public: + Z3Expr(Z3Context &C, Z3_ast ZA) : SMTExpr(), Context(C), AST(ZA) { + Z3_inc_ref(Context.Context, AST); + } + + /// Override implicit copy constructor for correct reference counting. + Z3Expr(const Z3Expr &Copy) : SMTExpr(), Context(Copy.Context), AST(Copy.AST) { + Z3_inc_ref(Context.Context, AST); + } + + /// Override implicit copy assignment constructor for correct reference + /// counting. + Z3Expr &operator=(const Z3Expr &Other) { + Z3_inc_ref(Context.Context, Other.AST); + Z3_dec_ref(Context.Context, AST); + AST = Other.AST; + return *this; + } + + Z3Expr(Z3Expr &&Other) = delete; + Z3Expr &operator=(Z3Expr &&Other) = delete; + + ~Z3Expr() { + if (AST) + Z3_dec_ref(Context.Context, AST); + } + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.AddInteger(Z3_get_ast_id(Context.Context, AST)); + } + + /// Comparison of AST equality, not model equivalence. + bool equal_to(SMTExpr const &Other) const override { + assert(Z3_is_eq_sort(Context.Context, Z3_get_sort(Context.Context, AST), + Z3_get_sort(Context.Context, + static_cast<const Z3Expr &>(Other).AST)) && + "AST's must have the same sort"); + return Z3_is_eq_ast(Context.Context, AST, + static_cast<const Z3Expr &>(Other).AST); + } + + void print(raw_ostream &OS) const override { + OS << Z3_ast_to_string(Context.Context, AST); + } +}; // end class Z3Expr + +static const Z3Expr &toZ3Expr(const SMTExpr &E) { + return static_cast<const Z3Expr &>(E); +} + +class Z3Model { + friend class Z3Solver; + + Z3Context &Context; + + Z3_model Model; + +public: + Z3Model(Z3Context &C, Z3_model ZM) : Context(C), Model(ZM) { + Z3_model_inc_ref(Context.Context, Model); + } + + Z3Model(const Z3Model &Other) = delete; + Z3Model(Z3Model &&Other) = delete; + Z3Model &operator=(Z3Model &Other) = delete; + Z3Model &operator=(Z3Model &&Other) = delete; + + ~Z3Model() { + if (Model) + Z3_model_dec_ref(Context.Context, Model); + } + + void print(raw_ostream &OS) const { + OS << Z3_model_to_string(Context.Context, Model); + } + + LLVM_DUMP_METHOD void dump() const { print(llvm::errs()); } +}; // end class Z3Model + +/// Get the corresponding IEEE floating-point type for a given bitwidth. +static const llvm::fltSemantics &getFloatSemantics(unsigned BitWidth) { + switch (BitWidth) { + default: + llvm_unreachable("Unsupported floating-point semantics!"); + break; + case 16: + return llvm::APFloat::IEEEhalf(); + case 32: + return llvm::APFloat::IEEEsingle(); + case 64: + return llvm::APFloat::IEEEdouble(); + case 128: + return llvm::APFloat::IEEEquad(); + } +} + +// Determine whether two float semantics are equivalent +static bool areEquivalent(const llvm::fltSemantics &LHS, + const llvm::fltSemantics &RHS) { + return (llvm::APFloat::semanticsPrecision(LHS) == + llvm::APFloat::semanticsPrecision(RHS)) && + (llvm::APFloat::semanticsMinExponent(LHS) == + llvm::APFloat::semanticsMinExponent(RHS)) && + (llvm::APFloat::semanticsMaxExponent(LHS) == + llvm::APFloat::semanticsMaxExponent(RHS)) && + (llvm::APFloat::semanticsSizeInBits(LHS) == + llvm::APFloat::semanticsSizeInBits(RHS)); +} + +class Z3Solver : public SMTSolver { + friend class Z3ConstraintManager; + + Z3Context Context; + + Z3_solver Solver; + + // Cache Sorts + std::set<Z3Sort> CachedSorts; + + // Cache Exprs + std::set<Z3Expr> CachedExprs; + +public: + Z3Solver() : Solver(Z3_mk_simple_solver(Context.Context)) { + Z3_solver_inc_ref(Context.Context, Solver); + } + + Z3Solver(const Z3Solver &Other) = delete; + Z3Solver(Z3Solver &&Other) = delete; + Z3Solver &operator=(Z3Solver &Other) = delete; + Z3Solver &operator=(Z3Solver &&Other) = delete; + + ~Z3Solver() { + if (Solver) + Z3_solver_dec_ref(Context.Context, Solver); + } + + void addConstraint(const SMTExprRef &Exp) const override { + Z3_solver_assert(Context.Context, Solver, toZ3Expr(*Exp).AST); + } + + // Given an SMTSort, adds/retrives it from the cache and returns + // an SMTSortRef to the SMTSort in the cache + SMTSortRef newSortRef(const SMTSort &Sort) { + auto It = CachedSorts.insert(toZ3Sort(Sort)); + return &(*It.first); + } + + // Given an SMTExpr, adds/retrives it from the cache and returns + // an SMTExprRef to the SMTExpr in the cache + SMTExprRef newExprRef(const SMTExpr &Exp) { + auto It = CachedExprs.insert(toZ3Expr(Exp)); + return &(*It.first); + } + + SMTSortRef getBoolSort() override { + return newSortRef(Z3Sort(Context, Z3_mk_bool_sort(Context.Context))); + } + + SMTSortRef getBitvectorSort(unsigned BitWidth) override { + return newSortRef( + Z3Sort(Context, Z3_mk_bv_sort(Context.Context, BitWidth))); + } + + SMTSortRef getSort(const SMTExprRef &Exp) override { + return newSortRef( + Z3Sort(Context, Z3_get_sort(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTSortRef getFloat16Sort() override { + return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_16(Context.Context))); + } + + SMTSortRef getFloat32Sort() override { + return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_32(Context.Context))); + } + + SMTSortRef getFloat64Sort() override { + return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_64(Context.Context))); + } + + SMTSortRef getFloat128Sort() override { + return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_128(Context.Context))); + } + + SMTExprRef mkBVNeg(const SMTExprRef &Exp) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvneg(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkBVNot(const SMTExprRef &Exp) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvnot(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkNot(const SMTExprRef &Exp) override { + return newExprRef( + Z3Expr(Context, Z3_mk_not(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkBVAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvadd(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSub(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvsub(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVMul(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvmul(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSRem(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvsrem(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVURem(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvurem(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvsdiv(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVUDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvudiv(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVShl(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvshl(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVAshr(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvashr(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVLshr(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvlshr(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVXor(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvxor(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVOr(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvor(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvand(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVUlt(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvult(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSlt(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvslt(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVUgt(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvugt(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSgt(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvsgt(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVUle(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvule(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSle(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvsle(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVUge(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvuge(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVSge(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_bvsge(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + Z3_ast Args[2] = {toZ3Expr(*LHS).AST, toZ3Expr(*RHS).AST}; + return newExprRef(Z3Expr(Context, Z3_mk_and(Context.Context, 2, Args))); + } + + SMTExprRef mkOr(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + Z3_ast Args[2] = {toZ3Expr(*LHS).AST, toZ3Expr(*RHS).AST}; + return newExprRef(Z3Expr(Context, Z3_mk_or(Context.Context, 2, Args))); + } + + SMTExprRef mkEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_eq(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPNeg(const SMTExprRef &Exp) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_neg(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkFPIsInfinite(const SMTExprRef &Exp) override { + return newExprRef(Z3Expr( + Context, Z3_mk_fpa_is_infinite(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkFPIsNaN(const SMTExprRef &Exp) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_is_nan(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkFPIsNormal(const SMTExprRef &Exp) override { + return newExprRef(Z3Expr( + Context, Z3_mk_fpa_is_normal(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkFPIsZero(const SMTExprRef &Exp) override { + return newExprRef(Z3Expr( + Context, Z3_mk_fpa_is_zero(Context.Context, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkFPMul(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef( + Z3Expr(Context, + Z3_mk_fpa_mul(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST))); + } + + SMTExprRef mkFPDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef( + Z3Expr(Context, + Z3_mk_fpa_div(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST))); + } + + SMTExprRef mkFPRem(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_rem(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef( + Z3Expr(Context, + Z3_mk_fpa_add(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST))); + } + + SMTExprRef mkFPSub(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef( + Z3Expr(Context, + Z3_mk_fpa_sub(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST))); + } + + SMTExprRef mkFPLt(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_lt(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPGt(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_gt(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPLe(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_leq(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPGe(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_geq(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_fpa_eq(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkIte(const SMTExprRef &Cond, const SMTExprRef &T, + const SMTExprRef &F) override { + return newExprRef( + Z3Expr(Context, Z3_mk_ite(Context.Context, toZ3Expr(*Cond).AST, + toZ3Expr(*T).AST, toZ3Expr(*F).AST))); + } + + SMTExprRef mkBVSignExt(unsigned i, const SMTExprRef &Exp) override { + return newExprRef(Z3Expr( + Context, Z3_mk_sign_ext(Context.Context, i, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkBVZeroExt(unsigned i, const SMTExprRef &Exp) override { + return newExprRef(Z3Expr( + Context, Z3_mk_zero_ext(Context.Context, i, toZ3Expr(*Exp).AST))); + } + + SMTExprRef mkBVExtract(unsigned High, unsigned Low, + const SMTExprRef &Exp) override { + return newExprRef(Z3Expr(Context, Z3_mk_extract(Context.Context, High, Low, + toZ3Expr(*Exp).AST))); + } + + /// Creates a predicate that checks for overflow in a bitvector addition + /// operation + SMTExprRef mkBVAddNoOverflow(const SMTExprRef &LHS, const SMTExprRef &RHS, + bool isSigned) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvadd_no_overflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, isSigned))); + } + + /// Creates a predicate that checks for underflow in a signed bitvector + /// addition operation + SMTExprRef mkBVAddNoUnderflow(const SMTExprRef &LHS, + const SMTExprRef &RHS) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvadd_no_underflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + /// Creates a predicate that checks for overflow in a signed bitvector + /// subtraction operation + SMTExprRef mkBVSubNoOverflow(const SMTExprRef &LHS, + const SMTExprRef &RHS) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvsub_no_overflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + /// Creates a predicate that checks for underflow in a bitvector subtraction + /// operation + SMTExprRef mkBVSubNoUnderflow(const SMTExprRef &LHS, const SMTExprRef &RHS, + bool isSigned) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvsub_no_underflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, isSigned))); + } + + /// Creates a predicate that checks for overflow in a signed bitvector + /// division/modulus operation + SMTExprRef mkBVSDivNoOverflow(const SMTExprRef &LHS, + const SMTExprRef &RHS) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvsdiv_no_overflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + /// Creates a predicate that checks for overflow in a bitvector negation + /// operation + SMTExprRef mkBVNegNoOverflow(const SMTExprRef &Exp) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvneg_no_overflow(Context.Context, toZ3Expr(*Exp).AST))); + } + + /// Creates a predicate that checks for overflow in a bitvector multiplication + /// operation + SMTExprRef mkBVMulNoOverflow(const SMTExprRef &LHS, const SMTExprRef &RHS, + bool isSigned) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvmul_no_overflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST, isSigned))); + } + + /// Creates a predicate that checks for underflow in a signed bitvector + /// multiplication operation + SMTExprRef mkBVMulNoUnderflow(const SMTExprRef &LHS, + const SMTExprRef &RHS) override { + return newExprRef(Z3Expr( + Context, Z3_mk_bvmul_no_underflow(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkBVConcat(const SMTExprRef &LHS, const SMTExprRef &RHS) override { + return newExprRef( + Z3Expr(Context, Z3_mk_concat(Context.Context, toZ3Expr(*LHS).AST, + toZ3Expr(*RHS).AST))); + } + + SMTExprRef mkFPtoFP(const SMTExprRef &From, const SMTSortRef &To) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef(Z3Expr( + Context, + Z3_mk_fpa_to_fp_float(Context.Context, toZ3Expr(*RoundingMode).AST, + toZ3Expr(*From).AST, toZ3Sort(*To).Sort))); + } + + SMTExprRef mkSBVtoFP(const SMTExprRef &From, const SMTSortRef &To) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef(Z3Expr( + Context, + Z3_mk_fpa_to_fp_signed(Context.Context, toZ3Expr(*RoundingMode).AST, + toZ3Expr(*From).AST, toZ3Sort(*To).Sort))); + } + + SMTExprRef mkUBVtoFP(const SMTExprRef &From, const SMTSortRef &To) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef(Z3Expr( + Context, + Z3_mk_fpa_to_fp_unsigned(Context.Context, toZ3Expr(*RoundingMode).AST, + toZ3Expr(*From).AST, toZ3Sort(*To).Sort))); + } + + SMTExprRef mkFPtoSBV(const SMTExprRef &From, unsigned ToWidth) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef(Z3Expr( + Context, Z3_mk_fpa_to_sbv(Context.Context, toZ3Expr(*RoundingMode).AST, + toZ3Expr(*From).AST, ToWidth))); + } + + SMTExprRef mkFPtoUBV(const SMTExprRef &From, unsigned ToWidth) override { + SMTExprRef RoundingMode = getFloatRoundingMode(); + return newExprRef(Z3Expr( + Context, Z3_mk_fpa_to_ubv(Context.Context, toZ3Expr(*RoundingMode).AST, + toZ3Expr(*From).AST, ToWidth))); + } + + SMTExprRef mkBoolean(const bool b) override { + return newExprRef(Z3Expr(Context, b ? Z3_mk_true(Context.Context) + : Z3_mk_false(Context.Context))); + } + + SMTExprRef mkBitvector(const llvm::APSInt Int, unsigned BitWidth) override { + const SMTSortRef Sort = getBitvectorSort(BitWidth); + return newExprRef( + Z3Expr(Context, Z3_mk_numeral(Context.Context, Int.toString(10).c_str(), + toZ3Sort(*Sort).Sort))); + } + + SMTExprRef mkFloat(const llvm::APFloat Float) override { + SMTSortRef Sort = + getFloatSort(llvm::APFloat::semanticsSizeInBits(Float.getSemantics())); + + llvm::APSInt Int = llvm::APSInt(Float.bitcastToAPInt(), false); + SMTExprRef Z3Int = mkBitvector(Int, Int.getBitWidth()); + return newExprRef(Z3Expr( + Context, Z3_mk_fpa_to_fp_bv(Context.Context, toZ3Expr(*Z3Int).AST, + toZ3Sort(*Sort).Sort))); + } + + SMTExprRef mkSymbol(const char *Name, SMTSortRef Sort) override { + return newExprRef( + Z3Expr(Context, Z3_mk_const(Context.Context, + Z3_mk_string_symbol(Context.Context, Name), + toZ3Sort(*Sort).Sort))); + } + + llvm::APSInt getBitvector(const SMTExprRef &Exp, unsigned BitWidth, + bool isUnsigned) override { + return llvm::APSInt( + llvm::APInt(BitWidth, + Z3_get_numeral_string(Context.Context, toZ3Expr(*Exp).AST), + 10), + isUnsigned); + } + + bool getBoolean(const SMTExprRef &Exp) override { + return Z3_get_bool_value(Context.Context, toZ3Expr(*Exp).AST) == Z3_L_TRUE; + } + + SMTExprRef getFloatRoundingMode() override { + // TODO: Don't assume nearest ties to even rounding mode + return newExprRef(Z3Expr(Context, Z3_mk_fpa_rne(Context.Context))); + } + + bool toAPFloat(const SMTSortRef &Sort, const SMTExprRef &AST, + llvm::APFloat &Float, bool useSemantics) { + assert(Sort->isFloatSort() && "Unsupported sort to floating-point!"); + + llvm::APSInt Int(Sort->getFloatSortSize(), true); + const llvm::fltSemantics &Semantics = + getFloatSemantics(Sort->getFloatSortSize()); + SMTSortRef BVSort = getBitvectorSort(Sort->getFloatSortSize()); + if (!toAPSInt(BVSort, AST, Int, true)) { + return false; + } + + if (useSemantics && !areEquivalent(Float.getSemantics(), Semantics)) { + assert(false && "Floating-point types don't match!"); + return false; + } + + Float = llvm::APFloat(Semantics, Int); + return true; + } + + bool toAPSInt(const SMTSortRef &Sort, const SMTExprRef &AST, + llvm::APSInt &Int, bool useSemantics) { + if (Sort->isBitvectorSort()) { + if (useSemantics && Int.getBitWidth() != Sort->getBitvectorSortSize()) { + assert(false && "Bitvector types don't match!"); + return false; + } + + // FIXME: This function is also used to retrieve floating-point values, + // which can be 16, 32, 64 or 128 bits long. Bitvectors can be anything + // between 1 and 64 bits long, which is the reason we have this weird + // guard. In the future, we need proper calls in the backend to retrieve + // floating-points and its special values (NaN, +/-infinity, +/-zero), + // then we can drop this weird condition. + if (Sort->getBitvectorSortSize() <= 64 || + Sort->getBitvectorSortSize() == 128) { + Int = getBitvector(AST, Int.getBitWidth(), Int.isUnsigned()); + return true; + } + + assert(false && "Bitwidth not supported!"); + return false; + } + + if (Sort->isBooleanSort()) { + if (useSemantics && Int.getBitWidth() < 1) { + assert(false && "Boolean type doesn't match!"); + return false; + } + + Int = llvm::APSInt(llvm::APInt(Int.getBitWidth(), getBoolean(AST)), + Int.isUnsigned()); + return true; + } + + llvm_unreachable("Unsupported sort to integer!"); + } + + bool getInterpretation(const SMTExprRef &Exp, llvm::APSInt &Int) override { + Z3Model Model(Context, Z3_solver_get_model(Context.Context, Solver)); + Z3_func_decl Func = Z3_get_app_decl( + Context.Context, Z3_to_app(Context.Context, toZ3Expr(*Exp).AST)); + if (Z3_model_has_interp(Context.Context, Model.Model, Func) != Z3_L_TRUE) + return false; + + SMTExprRef Assign = newExprRef( + Z3Expr(Context, + Z3_model_get_const_interp(Context.Context, Model.Model, Func))); + SMTSortRef Sort = getSort(Assign); + return toAPSInt(Sort, Assign, Int, true); + } + + bool getInterpretation(const SMTExprRef &Exp, llvm::APFloat &Float) override { + Z3Model Model(Context, Z3_solver_get_model(Context.Context, Solver)); + Z3_func_decl Func = Z3_get_app_decl( + Context.Context, Z3_to_app(Context.Context, toZ3Expr(*Exp).AST)); + if (Z3_model_has_interp(Context.Context, Model.Model, Func) != Z3_L_TRUE) + return false; + + SMTExprRef Assign = newExprRef( + Z3Expr(Context, + Z3_model_get_const_interp(Context.Context, Model.Model, Func))); + SMTSortRef Sort = getSort(Assign); + return toAPFloat(Sort, Assign, Float, true); + } + + Optional<bool> check() const override { + Z3_lbool res = Z3_solver_check(Context.Context, Solver); + if (res == Z3_L_TRUE) + return true; + + if (res == Z3_L_FALSE) + return false; + + return Optional<bool>(); + } + + void push() override { return Z3_solver_push(Context.Context, Solver); } + + void pop(unsigned NumStates = 1) override { + assert(Z3_solver_get_num_scopes(Context.Context, Solver) >= NumStates); + return Z3_solver_pop(Context.Context, Solver, NumStates); + } + + bool isFPSupported() override { return true; } + + /// Reset the solver and remove all constraints. + void reset() override { Z3_solver_reset(Context.Context, Solver); } + + void print(raw_ostream &OS) const override { + OS << Z3_solver_to_string(Context.Context, Solver); + } +}; // end class Z3Solver + +} // end anonymous namespace + +#endif + +llvm::SMTSolverRef llvm::CreateZ3Solver() { +#if LLVM_WITH_Z3 + return std::make_unique<Z3Solver>(); +#else + llvm::report_fatal_error("LLVM was not compiled with Z3 support, rebuild " + "with -DLLVM_ENABLE_Z3_SOLVER=ON", + false); + return nullptr; +#endif +} + +LLVM_DUMP_METHOD void SMTSort::dump() const { print(llvm::errs()); } +LLVM_DUMP_METHOD void SMTExpr::dump() const { print(llvm::errs()); } +LLVM_DUMP_METHOD void SMTSolver::dump() const { print(llvm::errs()); } diff --git a/llvm/lib/Support/circular_raw_ostream.cpp b/llvm/lib/Support/circular_raw_ostream.cpp new file mode 100644 index 0000000000000..acd230704ff8d --- /dev/null +++ b/llvm/lib/Support/circular_raw_ostream.cpp @@ -0,0 +1,44 @@ +//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements support for circular buffered streams. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/circular_raw_ostream.h" +#include <algorithm> +using namespace llvm; + +void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) { + if (BufferSize == 0) { + TheStream->write(Ptr, Size); + return; + } + + // Write into the buffer, wrapping if necessary. + while (Size != 0) { + unsigned Bytes = + std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray))); + memcpy(Cur, Ptr, Bytes); + Size -= Bytes; + Cur += Bytes; + if (Cur == BufferArray + BufferSize) { + // Reset the output pointer to the start of the buffer. + Cur = BufferArray; + Filled = true; + } + } +} + +void circular_raw_ostream::flushBufferWithBanner() { + if (BufferSize != 0) { + // Write out the buffer + TheStream->write(Banner, std::strlen(Banner)); + flushBuffer(); + } +} diff --git a/llvm/lib/Support/raw_os_ostream.cpp b/llvm/lib/Support/raw_os_ostream.cpp new file mode 100644 index 0000000000000..81f0d739696ef --- /dev/null +++ b/llvm/lib/Support/raw_os_ostream.cpp @@ -0,0 +1,29 @@ +//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements support adapting raw_ostream to std::ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_os_ostream.h" +#include <ostream> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// raw_os_ostream +//===----------------------------------------------------------------------===// + +raw_os_ostream::~raw_os_ostream() { + flush(); +} + +void raw_os_ostream::write_impl(const char *Ptr, size_t Size) { + OS.write(Ptr, Size); +} + +uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); } diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp new file mode 100644 index 0000000000000..b9989371f5eab --- /dev/null +++ b/llvm/lib/Support/raw_ostream.cpp @@ -0,0 +1,946 @@ +//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements support for bulk buffered stream output. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/NativeFormatting.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <cstdio> +#include <iterator> +#include <sys/stat.h> +#include <system_error> + +// <fcntl.h> may provide O_BINARY. +#if defined(HAVE_FCNTL_H) +# include <fcntl.h> +#endif + +#if defined(HAVE_UNISTD_H) +# include <unistd.h> +#endif + +#if defined(__CYGWIN__) +#include <io.h> +#endif + +#if defined(_MSC_VER) +#include <io.h> +#ifndef STDIN_FILENO +# define STDIN_FILENO 0 +#endif +#ifndef STDOUT_FILENO +# define STDOUT_FILENO 1 +#endif +#ifndef STDERR_FILENO +# define STDERR_FILENO 2 +#endif +#endif + +#ifdef _WIN32 +#include "llvm/Support/ConvertUTF.h" +#include "Windows/WindowsSupport.h" +#endif + +using namespace llvm; + +const raw_ostream::Colors raw_ostream::BLACK; +const raw_ostream::Colors raw_ostream::RED; +const raw_ostream::Colors raw_ostream::GREEN; +const raw_ostream::Colors raw_ostream::YELLOW; +const raw_ostream::Colors raw_ostream::BLUE; +const raw_ostream::Colors raw_ostream::MAGENTA; +const raw_ostream::Colors raw_ostream::CYAN; +const raw_ostream::Colors raw_ostream::WHITE; +const raw_ostream::Colors raw_ostream::SAVEDCOLOR; +const raw_ostream::Colors raw_ostream::RESET; + +raw_ostream::~raw_ostream() { + // raw_ostream's subclasses should take care to flush the buffer + // in their destructors. + assert(OutBufCur == OutBufStart && + "raw_ostream destructor called with non-empty buffer!"); + + if (BufferMode == InternalBuffer) + delete [] OutBufStart; +} + +size_t raw_ostream::preferred_buffer_size() const { + // BUFSIZ is intended to be a reasonable default. + return BUFSIZ; +} + +void raw_ostream::SetBuffered() { + // Ask the subclass to determine an appropriate buffer size. + if (size_t Size = preferred_buffer_size()) + SetBufferSize(Size); + else + // It may return 0, meaning this stream should be unbuffered. + SetUnbuffered(); +} + +void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, + BufferKind Mode) { + assert(((Mode == Unbuffered && !BufferStart && Size == 0) || + (Mode != Unbuffered && BufferStart && Size != 0)) && + "stream must be unbuffered or have at least one byte"); + // Make sure the current buffer is free of content (we can't flush here; the + // child buffer management logic will be in write_impl). + assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!"); + + if (BufferMode == InternalBuffer) + delete [] OutBufStart; + OutBufStart = BufferStart; + OutBufEnd = OutBufStart+Size; + OutBufCur = OutBufStart; + BufferMode = Mode; + + assert(OutBufStart <= OutBufEnd && "Invalid size!"); +} + +raw_ostream &raw_ostream::operator<<(unsigned long N) { + write_integer(*this, static_cast<uint64_t>(N), 0, IntegerStyle::Integer); + return *this; +} + +raw_ostream &raw_ostream::operator<<(long N) { + write_integer(*this, static_cast<int64_t>(N), 0, IntegerStyle::Integer); + return *this; +} + +raw_ostream &raw_ostream::operator<<(unsigned long long N) { + write_integer(*this, static_cast<uint64_t>(N), 0, IntegerStyle::Integer); + return *this; +} + +raw_ostream &raw_ostream::operator<<(long long N) { + write_integer(*this, static_cast<int64_t>(N), 0, IntegerStyle::Integer); + return *this; +} + +raw_ostream &raw_ostream::write_hex(unsigned long long N) { + llvm::write_hex(*this, N, HexPrintStyle::Lower); + return *this; +} + +raw_ostream &raw_ostream::operator<<(Colors C) { + if (C == Colors::RESET) + resetColor(); + else + changeColor(C); + return *this; +} + +raw_ostream &raw_ostream::write_uuid(const uuid_t UUID) { + for (int Idx = 0; Idx < 16; ++Idx) { + *this << format("%02" PRIX32, UUID[Idx]); + if (Idx == 3 || Idx == 5 || Idx == 7 || Idx == 9) + *this << "-"; + } + return *this; +} + + +raw_ostream &raw_ostream::write_escaped(StringRef Str, + bool UseHexEscapes) { + for (unsigned char c : Str) { + switch (c) { + case '\\': + *this << '\\' << '\\'; + break; + case '\t': + *this << '\\' << 't'; + break; + case '\n': + *this << '\\' << 'n'; + break; + case '"': + *this << '\\' << '"'; + break; + default: + if (isPrint(c)) { + *this << c; + break; + } + + // Write out the escaped representation. + if (UseHexEscapes) { + *this << '\\' << 'x'; + *this << hexdigit((c >> 4 & 0xF)); + *this << hexdigit((c >> 0) & 0xF); + } else { + // Always use a full 3-character octal escape. + *this << '\\'; + *this << char('0' + ((c >> 6) & 7)); + *this << char('0' + ((c >> 3) & 7)); + *this << char('0' + ((c >> 0) & 7)); + } + } + } + + return *this; +} + +raw_ostream &raw_ostream::operator<<(const void *P) { + llvm::write_hex(*this, (uintptr_t)P, HexPrintStyle::PrefixLower); + return *this; +} + +raw_ostream &raw_ostream::operator<<(double N) { + llvm::write_double(*this, N, FloatStyle::Exponent); + return *this; +} + +void raw_ostream::flush_nonempty() { + assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty."); + size_t Length = OutBufCur - OutBufStart; + OutBufCur = OutBufStart; + write_impl(OutBufStart, Length); +} + +raw_ostream &raw_ostream::write(unsigned char C) { + // Group exceptional cases into a single branch. + if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) { + if (LLVM_UNLIKELY(!OutBufStart)) { + if (BufferMode == Unbuffered) { + write_impl(reinterpret_cast<char*>(&C), 1); + return *this; + } + // Set up a buffer and start over. + SetBuffered(); + return write(C); + } + + flush_nonempty(); + } + + *OutBufCur++ = C; + return *this; +} + +raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { + // Group exceptional cases into a single branch. + if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) { + if (LLVM_UNLIKELY(!OutBufStart)) { + if (BufferMode == Unbuffered) { + write_impl(Ptr, Size); + return *this; + } + // Set up a buffer and start over. + SetBuffered(); + return write(Ptr, Size); + } + + size_t NumBytes = OutBufEnd - OutBufCur; + + // If the buffer is empty at this point we have a string that is larger + // than the buffer. Directly write the chunk that is a multiple of the + // preferred buffer size and put the remainder in the buffer. + if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) { + assert(NumBytes != 0 && "undefined behavior"); + size_t BytesToWrite = Size - (Size % NumBytes); + write_impl(Ptr, BytesToWrite); + size_t BytesRemaining = Size - BytesToWrite; + if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) { + // Too much left over to copy into our buffer. + return write(Ptr + BytesToWrite, BytesRemaining); + } + copy_to_buffer(Ptr + BytesToWrite, BytesRemaining); + return *this; + } + + // We don't have enough space in the buffer to fit the string in. Insert as + // much as possible, flush and start over with the remainder. + copy_to_buffer(Ptr, NumBytes); + flush_nonempty(); + return write(Ptr + NumBytes, Size - NumBytes); + } + + copy_to_buffer(Ptr, Size); + + return *this; +} + +void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) { + assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!"); + + // Handle short strings specially, memcpy isn't very good at very short + // strings. + switch (Size) { + case 4: OutBufCur[3] = Ptr[3]; LLVM_FALLTHROUGH; + case 3: OutBufCur[2] = Ptr[2]; LLVM_FALLTHROUGH; + case 2: OutBufCur[1] = Ptr[1]; LLVM_FALLTHROUGH; + case 1: OutBufCur[0] = Ptr[0]; LLVM_FALLTHROUGH; + case 0: break; + default: + memcpy(OutBufCur, Ptr, Size); + break; + } + + OutBufCur += Size; +} + +// Formatted output. +raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { + // If we have more than a few bytes left in our output buffer, try + // formatting directly onto its end. + size_t NextBufferSize = 127; + size_t BufferBytesLeft = OutBufEnd - OutBufCur; + if (BufferBytesLeft > 3) { + size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); + + // Common case is that we have plenty of space. + if (BytesUsed <= BufferBytesLeft) { + OutBufCur += BytesUsed; + return *this; + } + + // Otherwise, we overflowed and the return value tells us the size to try + // again with. + NextBufferSize = BytesUsed; + } + + // If we got here, we didn't have enough space in the output buffer for the + // string. Try printing into a SmallVector that is resized to have enough + // space. Iterate until we win. + SmallVector<char, 128> V; + + while (true) { + V.resize(NextBufferSize); + + // Try formatting into the SmallVector. + size_t BytesUsed = Fmt.print(V.data(), NextBufferSize); + + // If BytesUsed fit into the vector, we win. + if (BytesUsed <= NextBufferSize) + return write(V.data(), BytesUsed); + + // Otherwise, try again with a new size. + assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?"); + NextBufferSize = BytesUsed; + } +} + +raw_ostream &raw_ostream::operator<<(const formatv_object_base &Obj) { + SmallString<128> S; + Obj.format(*this); + return *this; +} + +raw_ostream &raw_ostream::operator<<(const FormattedString &FS) { + if (FS.Str.size() >= FS.Width || FS.Justify == FormattedString::JustifyNone) { + this->operator<<(FS.Str); + return *this; + } + const size_t Difference = FS.Width - FS.Str.size(); + switch (FS.Justify) { + case FormattedString::JustifyLeft: + this->operator<<(FS.Str); + this->indent(Difference); + break; + case FormattedString::JustifyRight: + this->indent(Difference); + this->operator<<(FS.Str); + break; + case FormattedString::JustifyCenter: { + int PadAmount = Difference / 2; + this->indent(PadAmount); + this->operator<<(FS.Str); + this->indent(Difference - PadAmount); + break; + } + default: + llvm_unreachable("Bad Justification"); + } + return *this; +} + +raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) { + if (FN.Hex) { + HexPrintStyle Style; + if (FN.Upper && FN.HexPrefix) + Style = HexPrintStyle::PrefixUpper; + else if (FN.Upper && !FN.HexPrefix) + Style = HexPrintStyle::Upper; + else if (!FN.Upper && FN.HexPrefix) + Style = HexPrintStyle::PrefixLower; + else + Style = HexPrintStyle::Lower; + llvm::write_hex(*this, FN.HexValue, Style, FN.Width); + } else { + llvm::SmallString<16> Buffer; + llvm::raw_svector_ostream Stream(Buffer); + llvm::write_integer(Stream, FN.DecValue, 0, IntegerStyle::Integer); + if (Buffer.size() < FN.Width) + indent(FN.Width - Buffer.size()); + (*this) << Buffer; + } + return *this; +} + +raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) { + if (FB.Bytes.empty()) + return *this; + + size_t LineIndex = 0; + auto Bytes = FB.Bytes; + const size_t Size = Bytes.size(); + HexPrintStyle HPS = FB.Upper ? HexPrintStyle::Upper : HexPrintStyle::Lower; + uint64_t OffsetWidth = 0; + if (FB.FirstByteOffset.hasValue()) { + // Figure out how many nibbles are needed to print the largest offset + // represented by this data set, so that we can align the offset field + // to the right width. + size_t Lines = Size / FB.NumPerLine; + uint64_t MaxOffset = *FB.FirstByteOffset + Lines * FB.NumPerLine; + unsigned Power = 0; + if (MaxOffset > 0) + Power = llvm::Log2_64_Ceil(MaxOffset); + OffsetWidth = std::max<uint64_t>(4, llvm::alignTo(Power, 4) / 4); + } + + // The width of a block of data including all spaces for group separators. + unsigned NumByteGroups = + alignTo(FB.NumPerLine, FB.ByteGroupSize) / FB.ByteGroupSize; + unsigned BlockCharWidth = FB.NumPerLine * 2 + NumByteGroups - 1; + + while (!Bytes.empty()) { + indent(FB.IndentLevel); + + if (FB.FirstByteOffset.hasValue()) { + uint64_t Offset = FB.FirstByteOffset.getValue(); + llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth); + *this << ": "; + } + + auto Line = Bytes.take_front(FB.NumPerLine); + + size_t CharsPrinted = 0; + // Print the hex bytes for this line in groups + for (size_t I = 0; I < Line.size(); ++I, CharsPrinted += 2) { + if (I && (I % FB.ByteGroupSize) == 0) { + ++CharsPrinted; + *this << " "; + } + llvm::write_hex(*this, Line[I], HPS, 2); + } + + if (FB.ASCII) { + // Print any spaces needed for any bytes that we didn't print on this + // line so that the ASCII bytes are correctly aligned. + assert(BlockCharWidth >= CharsPrinted); + indent(BlockCharWidth - CharsPrinted + 2); + *this << "|"; + + // Print the ASCII char values for each byte on this line + for (uint8_t Byte : Line) { + if (isPrint(Byte)) + *this << static_cast<char>(Byte); + else + *this << '.'; + } + *this << '|'; + } + + Bytes = Bytes.drop_front(Line.size()); + LineIndex += Line.size(); + if (LineIndex < Size) + *this << '\n'; + } + return *this; +} + +template <char C> +static raw_ostream &write_padding(raw_ostream &OS, unsigned NumChars) { + static const char Chars[] = {C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C}; + + // Usually the indentation is small, handle it with a fastpath. + if (NumChars < array_lengthof(Chars)) + return OS.write(Chars, NumChars); + + while (NumChars) { + unsigned NumToWrite = std::min(NumChars, + (unsigned)array_lengthof(Chars)-1); + OS.write(Chars, NumToWrite); + NumChars -= NumToWrite; + } + return OS; +} + +/// indent - Insert 'NumSpaces' spaces. +raw_ostream &raw_ostream::indent(unsigned NumSpaces) { + return write_padding<' '>(*this, NumSpaces); +} + +/// write_zeros - Insert 'NumZeros' nulls. +raw_ostream &raw_ostream::write_zeros(unsigned NumZeros) { + return write_padding<'\0'>(*this, NumZeros); +} + +void raw_ostream::anchor() {} + +//===----------------------------------------------------------------------===// +// Formatted Output +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void format_object_base::home() { +} + +//===----------------------------------------------------------------------===// +// raw_fd_ostream +//===----------------------------------------------------------------------===// + +static int getFD(StringRef Filename, std::error_code &EC, + sys::fs::CreationDisposition Disp, sys::fs::FileAccess Access, + sys::fs::OpenFlags Flags) { + assert((Access & sys::fs::FA_Write) && + "Cannot make a raw_ostream from a read-only descriptor!"); + + // Handle "-" as stdout. Note that when we do this, we consider ourself + // the owner of stdout and may set the "binary" flag globally based on Flags. + if (Filename == "-") { + EC = std::error_code(); + // If user requested binary then put stdout into binary mode if + // possible. + if (!(Flags & sys::fs::OF_Text)) + sys::ChangeStdoutToBinary(); + return STDOUT_FILENO; + } + + int FD; + if (Access & sys::fs::FA_Read) + EC = sys::fs::openFileForReadWrite(Filename, FD, Disp, Flags); + else + EC = sys::fs::openFileForWrite(Filename, FD, Disp, Flags); + if (EC) + return -1; + + return FD; +} + +raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC) + : raw_fd_ostream(Filename, EC, sys::fs::CD_CreateAlways, sys::fs::FA_Write, + sys::fs::OF_None) {} + +raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, + sys::fs::CreationDisposition Disp) + : raw_fd_ostream(Filename, EC, Disp, sys::fs::FA_Write, sys::fs::OF_None) {} + +raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, + sys::fs::FileAccess Access) + : raw_fd_ostream(Filename, EC, sys::fs::CD_CreateAlways, Access, + sys::fs::OF_None) {} + +raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, + sys::fs::OpenFlags Flags) + : raw_fd_ostream(Filename, EC, sys::fs::CD_CreateAlways, sys::fs::FA_Write, + Flags) {} + +raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, + sys::fs::CreationDisposition Disp, + sys::fs::FileAccess Access, + sys::fs::OpenFlags Flags) + : raw_fd_ostream(getFD(Filename, EC, Disp, Access, Flags), true) {} + +/// FD is the file descriptor that this writes to. If ShouldClose is true, this +/// closes the file when the stream is destroyed. +raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) + : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose) { + if (FD < 0 ) { + ShouldClose = false; + return; + } + + // Do not attempt to close stdout or stderr. We used to try to maintain the + // property that tools that support writing file to stdout should not also + // write informational output to stdout, but in practice we were never able to + // maintain this invariant. Many features have been added to LLVM and clang + // (-fdump-record-layouts, optimization remarks, etc) that print to stdout, so + // users must simply be aware that mixed output and remarks is a possibility. + if (FD <= STDERR_FILENO) + ShouldClose = false; + +#ifdef _WIN32 + // Check if this is a console device. This is not equivalent to isatty. + IsWindowsConsole = + ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; +#endif + + // Get the starting position. + off_t loc = ::lseek(FD, 0, SEEK_CUR); +#ifdef _WIN32 + // MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes. + sys::fs::file_status Status; + std::error_code EC = status(FD, Status); + SupportsSeeking = !EC && Status.type() == sys::fs::file_type::regular_file; +#else + SupportsSeeking = loc != (off_t)-1; +#endif + if (!SupportsSeeking) + pos = 0; + else + pos = static_cast<uint64_t>(loc); +} + +raw_fd_ostream::~raw_fd_ostream() { + if (FD >= 0) { + flush(); + if (ShouldClose) { + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + error_detected(EC); + } + } + +#ifdef __MINGW32__ + // On mingw, global dtors should not call exit(). + // report_fatal_error() invokes exit(). We know report_fatal_error() + // might not write messages to stderr when any errors were detected + // on FD == 2. + if (FD == 2) return; +#endif + + // If there are any pending errors, report them now. Clients wishing + // to avoid report_fatal_error calls should check for errors with + // has_error() and clear the error flag with clear_error() before + // destructing raw_ostream objects which may have errors. + if (has_error()) + report_fatal_error("IO failure on output stream: " + error().message(), + /*gen_crash_diag=*/false); +} + +#if defined(_WIN32) +// The most reliable way to print unicode in a Windows console is with +// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This +// assumes that LLVM programs always print valid UTF-8 to the console. The data +// might not be UTF-8 for two major reasons: +// 1. The program is printing binary (-filetype=obj -o -), in which case it +// would have been gibberish anyway. +// 2. The program is printing text in a semi-ascii compatible codepage like +// shift-jis or cp1252. +// +// Most LLVM programs don't produce non-ascii text unless they are quoting +// user source input. A well-behaved LLVM program should either validate that +// the input is UTF-8 or transcode from the local codepage to UTF-8 before +// quoting it. If they don't, this may mess up the encoding, but this is still +// probably the best compromise we can make. +static bool write_console_impl(int FD, StringRef Data) { + SmallVector<wchar_t, 256> WideText; + + // Fall back to ::write if it wasn't valid UTF-8. + if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText)) + return false; + + // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data + // that can be written to the console at a time. + size_t MaxWriteSize = WideText.size(); + if (!RunningWindows8OrGreater()) + MaxWriteSize = 32767; + + size_t WCharsWritten = 0; + do { + size_t WCharsToWrite = + std::min(MaxWriteSize, WideText.size() - WCharsWritten); + DWORD ActuallyWritten; + bool Success = + ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten], + WCharsToWrite, &ActuallyWritten, + /*Reserved=*/nullptr); + + // The most likely reason for WriteConsoleW to fail is that FD no longer + // points to a console. Fall back to ::write. If this isn't the first loop + // iteration, something is truly wrong. + if (!Success) + return false; + + WCharsWritten += ActuallyWritten; + } while (WCharsWritten != WideText.size()); + return true; +} +#endif + +void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { + assert(FD >= 0 && "File already closed."); + pos += Size; + +#if defined(_WIN32) + // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16 + // and using WriteConsoleW. If that fails, fall back to plain write(). + if (IsWindowsConsole) + if (write_console_impl(FD, StringRef(Ptr, Size))) + return; +#endif + + // The maximum write size is limited to INT32_MAX. A write + // greater than SSIZE_MAX is implementation-defined in POSIX, + // and Windows _write requires 32 bit input. + size_t MaxWriteSize = INT32_MAX; + +#if defined(__linux__) + // It is observed that Linux returns EINVAL for a very large write (>2G). + // Make it a reasonably small value. + MaxWriteSize = 1024 * 1024 * 1024; +#endif + + do { + size_t ChunkSize = std::min(Size, MaxWriteSize); + ssize_t ret = ::write(FD, Ptr, ChunkSize); + + if (ret < 0) { + // If it's a recoverable error, swallow it and retry the write. + // + // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since + // raw_ostream isn't designed to do non-blocking I/O. However, some + // programs, such as old versions of bjam, have mistakenly used + // O_NONBLOCK. For compatibility, emulate blocking semantics by + // spinning until the write succeeds. If you don't want spinning, + // don't use O_NONBLOCK file descriptors with raw_ostream. + if (errno == EINTR || errno == EAGAIN +#ifdef EWOULDBLOCK + || errno == EWOULDBLOCK +#endif + ) + continue; + + // Otherwise it's a non-recoverable error. Note it and quit. + error_detected(std::error_code(errno, std::generic_category())); + break; + } + + // The write may have written some or all of the data. Update the + // size and buffer pointer to reflect the remainder that needs + // to be written. If there are no bytes left, we're done. + Ptr += ret; + Size -= ret; + } while (Size > 0); +} + +void raw_fd_ostream::close() { + assert(ShouldClose); + ShouldClose = false; + flush(); + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + error_detected(EC); + FD = -1; +} + +uint64_t raw_fd_ostream::seek(uint64_t off) { + assert(SupportsSeeking && "Stream does not support seeking!"); + flush(); +#ifdef _WIN32 + pos = ::_lseeki64(FD, off, SEEK_SET); +#elif defined(HAVE_LSEEK64) + pos = ::lseek64(FD, off, SEEK_SET); +#else + pos = ::lseek(FD, off, SEEK_SET); +#endif + if (pos == (uint64_t)-1) + error_detected(std::error_code(errno, std::generic_category())); + return pos; +} + +void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size, + uint64_t Offset) { + uint64_t Pos = tell(); + seek(Offset); + write(Ptr, Size); + seek(Pos); +} + +size_t raw_fd_ostream::preferred_buffer_size() const { +#if defined(_WIN32) + // Disable buffering for console devices. Console output is re-encoded from + // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the + // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled + // below on most other OSs, so do the same thing on Windows and avoid that + // complexity. + if (IsWindowsConsole) + return 0; + return raw_ostream::preferred_buffer_size(); +#elif !defined(__minix) + // Minix has no st_blksize. + assert(FD >= 0 && "File not yet open!"); + struct stat statbuf; + if (fstat(FD, &statbuf) != 0) + return 0; + + // If this is a terminal, don't use buffering. Line buffering + // would be a more traditional thing to do, but it's not worth + // the complexity. + if (S_ISCHR(statbuf.st_mode) && isatty(FD)) + return 0; + // Return the preferred block size. + return statbuf.st_blksize; +#else + return raw_ostream::preferred_buffer_size(); +#endif +} + +raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, + bool bg) { + if (!ColorEnabled) + return *this; + + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = + (colors == SAVEDCOLOR) + ? sys::Process::OutputBold(bg) + : sys::Process::OutputColor(static_cast<char>(colors), bold, bg); + if (colorcode) { + size_t len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + +raw_ostream &raw_fd_ostream::resetColor() { + if (!ColorEnabled) + return *this; + + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = sys::Process::ResetColor(); + if (colorcode) { + size_t len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + +raw_ostream &raw_fd_ostream::reverseColor() { + if (!ColorEnabled) + return *this; + + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = sys::Process::OutputReverse(); + if (colorcode) { + size_t len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + +bool raw_fd_ostream::is_displayed() const { + return sys::Process::FileDescriptorIsDisplayed(FD); +} + +bool raw_fd_ostream::has_colors() const { + return sys::Process::FileDescriptorHasColors(FD); +} + +void raw_fd_ostream::anchor() {} + +//===----------------------------------------------------------------------===// +// outs(), errs(), nulls() +//===----------------------------------------------------------------------===// + +/// outs() - This returns a reference to a raw_ostream for standard output. +/// Use it like: outs() << "foo" << "bar"; +raw_ostream &llvm::outs() { + // Set buffer settings to model stdout behavior. + std::error_code EC; + static raw_fd_ostream S("-", EC, sys::fs::OF_None); + assert(!EC); + return S; +} + +/// errs() - This returns a reference to a raw_ostream for standard error. +/// Use it like: errs() << "foo" << "bar"; +raw_ostream &llvm::errs() { + // Set standard error to be unbuffered by default. + static raw_fd_ostream S(STDERR_FILENO, false, true); + return S; +} + +/// nulls() - This returns a reference to a raw_ostream which discards output. +raw_ostream &llvm::nulls() { + static raw_null_ostream S; + return S; +} + +//===----------------------------------------------------------------------===// +// raw_string_ostream +//===----------------------------------------------------------------------===// + +raw_string_ostream::~raw_string_ostream() { + flush(); +} + +void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { + OS.append(Ptr, Size); +} + +//===----------------------------------------------------------------------===// +// raw_svector_ostream +//===----------------------------------------------------------------------===// + +uint64_t raw_svector_ostream::current_pos() const { return OS.size(); } + +void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { + OS.append(Ptr, Ptr + Size); +} + +void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size, + uint64_t Offset) { + memcpy(OS.data() + Offset, Ptr, Size); +} + +//===----------------------------------------------------------------------===// +// raw_null_ostream +//===----------------------------------------------------------------------===// + +raw_null_ostream::~raw_null_ostream() { +#ifndef NDEBUG + // ~raw_ostream asserts that the buffer is empty. This isn't necessary + // with raw_null_ostream, but it's better to have raw_null_ostream follow + // the rules than to change the rules just for raw_null_ostream. + flush(); +#endif +} + +void raw_null_ostream::write_impl(const char *Ptr, size_t Size) { +} + +uint64_t raw_null_ostream::current_pos() const { + return 0; +} + +void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size, + uint64_t Offset) {} + +void raw_pwrite_stream::anchor() {} + +void buffer_ostream::anchor() {} diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c new file mode 100644 index 0000000000000..ee2a1d87a2672 --- /dev/null +++ b/llvm/lib/Support/regcomp.c @@ -0,0 +1,1702 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 + */ + +#include <sys/types.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" + +/* character-class table */ +static struct cclass { + const char *name; + const char *chars; + const char *multis; +} cclasses[] = { + { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", ""} , + { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + ""} , + { "blank", " \t", ""} , + { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", ""} , + { "digit", "0123456789", ""} , + { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""} , + { "lower", "abcdefghijklmnopqrstuvwxyz", + ""} , + { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", + ""} , + { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""} , + { "space", "\t\n\v\f\r ", ""} , + { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + ""} , + { "xdigit", "0123456789ABCDEFabcdef", + ""} , + { NULL, 0, "" } +}; + +/* character-name table */ +static struct cname { + const char *name; + char code; +} cnames[] = { + { "NUL", '\0' }, + { "SOH", '\001' }, + { "STX", '\002' }, + { "ETX", '\003' }, + { "EOT", '\004' }, + { "ENQ", '\005' }, + { "ACK", '\006' }, + { "BEL", '\007' }, + { "alert", '\007' }, + { "BS", '\010' }, + { "backspace", '\b' }, + { "HT", '\011' }, + { "tab", '\t' }, + { "LF", '\012' }, + { "newline", '\n' }, + { "VT", '\013' }, + { "vertical-tab", '\v' }, + { "FF", '\014' }, + { "form-feed", '\f' }, + { "CR", '\015' }, + { "carriage-return", '\r' }, + { "SO", '\016' }, + { "SI", '\017' }, + { "DLE", '\020' }, + { "DC1", '\021' }, + { "DC2", '\022' }, + { "DC3", '\023' }, + { "DC4", '\024' }, + { "NAK", '\025' }, + { "SYN", '\026' }, + { "ETB", '\027' }, + { "CAN", '\030' }, + { "EM", '\031' }, + { "SUB", '\032' }, + { "ESC", '\033' }, + { "IS4", '\034' }, + { "FS", '\034' }, + { "IS3", '\035' }, + { "GS", '\035' }, + { "IS2", '\036' }, + { "RS", '\036' }, + { "IS1", '\037' }, + { "US", '\037' }, + { "space", ' ' }, + { "exclamation-mark", '!' }, + { "quotation-mark", '"' }, + { "number-sign", '#' }, + { "dollar-sign", '$' }, + { "percent-sign", '%' }, + { "ampersand", '&' }, + { "apostrophe", '\'' }, + { "left-parenthesis", '(' }, + { "right-parenthesis", ')' }, + { "asterisk", '*' }, + { "plus-sign", '+' }, + { "comma", ',' }, + { "hyphen", '-' }, + { "hyphen-minus", '-' }, + { "period", '.' }, + { "full-stop", '.' }, + { "slash", '/' }, + { "solidus", '/' }, + { "zero", '0' }, + { "one", '1' }, + { "two", '2' }, + { "three", '3' }, + { "four", '4' }, + { "five", '5' }, + { "six", '6' }, + { "seven", '7' }, + { "eight", '8' }, + { "nine", '9' }, + { "colon", ':' }, + { "semicolon", ';' }, + { "less-than-sign", '<' }, + { "equals-sign", '=' }, + { "greater-than-sign", '>' }, + { "question-mark", '?' }, + { "commercial-at", '@' }, + { "left-square-bracket", '[' }, + { "backslash", '\\' }, + { "reverse-solidus", '\\' }, + { "right-square-bracket", ']' }, + { "circumflex", '^' }, + { "circumflex-accent", '^' }, + { "underscore", '_' }, + { "low-line", '_' }, + { "grave-accent", '`' }, + { "left-brace", '{' }, + { "left-curly-bracket", '{' }, + { "vertical-line", '|' }, + { "right-brace", '}' }, + { "right-curly-bracket", '}' }, + { "tilde", '~' }, + { "DEL", '\177' }, + { NULL, 0 } +}; + +/* + * parse structure, passed up and down to avoid global variables and + * other clumsinesses + */ +struct parse { + char *next; /* next character in RE */ + char *end; /* end of string (-> NUL normally) */ + int error; /* has an error been seen? */ + sop *strip; /* malloced strip */ + sopno ssize; /* malloced strip size (allocated) */ + sopno slen; /* malloced strip length (used) */ + int ncsalloc; /* number of csets allocated */ + struct re_guts *g; +# define NPAREN 10 /* we need to remember () 1-9 for back refs */ + sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ + sopno pend[NPAREN]; /* -> ) ([0] unused) */ +}; + +static void p_ere(struct parse *, int); +static void p_ere_exp(struct parse *); +static void p_str(struct parse *); +static void p_bre(struct parse *, int, int); +static int p_simp_re(struct parse *, int); +static int p_count(struct parse *); +static void p_bracket(struct parse *); +static void p_b_term(struct parse *, cset *); +static void p_b_cclass(struct parse *, cset *); +static void p_b_eclass(struct parse *, cset *); +static char p_b_symbol(struct parse *); +static char p_b_coll_elem(struct parse *, int); +static char othercase(int); +static void bothcases(struct parse *, int); +static void ordinary(struct parse *, int); +static void nonnewline(struct parse *); +static void repeat(struct parse *, sopno, int, int); +static int seterr(struct parse *, int); +static cset *allocset(struct parse *); +static void freeset(struct parse *, cset *); +static int freezeset(struct parse *, cset *); +static int firstch(struct parse *, cset *); +static int nch(struct parse *, cset *); +static void mcadd(struct parse *, cset *, const char *); +static void mcinvert(struct parse *, cset *); +static void mccase(struct parse *, cset *); +static int isinsets(struct re_guts *, int); +static int samesets(struct re_guts *, int, int); +static void categorize(struct parse *, struct re_guts *); +static sopno dupl(struct parse *, sopno, sopno); +static void doemit(struct parse *, sop, size_t); +static void doinsert(struct parse *, sop, size_t, sopno); +static void dofwd(struct parse *, sopno, sop); +static void enlarge(struct parse *, sopno); +static void stripsnug(struct parse *, struct re_guts *); +static void findmust(struct parse *, struct re_guts *); +static sopno pluscount(struct parse *, struct re_guts *); + +static char nuls[10]; /* place to point scanner in event of error */ + +/* + * macros for use with parse structure + * BEWARE: these know that the parse structure is named `p' !!! + */ +#define PEEK() (*p->next) +#define PEEK2() (*(p->next+1)) +#define MORE() (p->next < p->end) +#define MORE2() (p->next+1 < p->end) +#define SEE(c) (MORE() && PEEK() == (c)) +#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) +#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) +#define NEXT() (p->next++) +#define NEXT2() (p->next += 2) +#define NEXTn(n) (p->next += (n)) +#define GETNEXT() (*p->next++) +#define SETERROR(e) seterr(p, (e)) +#define REQUIRE(co, e) (void)((co) || SETERROR(e)) +#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) +#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) +#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) +#define HERE() (p->slen) +#define THERE() (p->slen - 1) +#define THERETHERE() (p->slen - 2) +#define DROP(n) (p->slen -= (n)) + +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define INFINITY (DUPMAX + 1) + +#ifndef NDEBUG +static int never = 0; /* for use in asserts; shuts lint up */ +#else +#define never 0 /* some <assert.h>s have bugs too */ +#endif + +/* + - llvm_regcomp - interface for parser and compilation + */ +int /* 0 success, otherwise REG_something */ +llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) +{ + struct parse pa; + struct re_guts *g; + struct parse *p = &pa; + int i; + size_t len; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&~REG_DUMP) +#endif + + cflags = GOODFLAGS(cflags); + if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) + return(REG_INVARG); + + if (cflags®_PEND) { + if (preg->re_endp < pattern) + return(REG_INVARG); + len = preg->re_endp - pattern; + } else + len = strlen((const char *)pattern); + + /* do the mallocs early so failure handling is easy */ + g = (struct re_guts *)malloc(sizeof(struct re_guts) + + (NC-1)*sizeof(cat_t)); + if (g == NULL) + return(REG_ESPACE); + p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + p->strip = (sop *)calloc(p->ssize, sizeof(sop)); + p->slen = 0; + if (p->strip == NULL) { + free((char *)g); + return(REG_ESPACE); + } + + /* set things up */ + p->g = g; + p->next = (char *)pattern; /* convenience; we do not modify it */ + p->end = p->next + len; + p->error = 0; + p->ncsalloc = 0; + for (i = 0; i < NPAREN; i++) { + p->pbegin[i] = 0; + p->pend[i] = 0; + } + g->csetsize = NC; + g->sets = NULL; + g->setbits = NULL; + g->ncsets = 0; + g->cflags = cflags; + g->iflags = 0; + g->nbol = 0; + g->neol = 0; + g->must = NULL; + g->mlen = 0; + g->nsub = 0; + g->ncategories = 1; /* category 0 is "everything else" */ + g->categories = &g->catspace[-(CHAR_MIN)]; + (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); + g->backrefs = 0; + + /* do it */ + EMIT(OEND, 0); + g->firststate = THERE(); + if (cflags®_EXTENDED) + p_ere(p, OUT); + else if (cflags®_NOSPEC) + p_str(p); + else + p_bre(p, OUT, OUT); + EMIT(OEND, 0); + g->laststate = THERE(); + + /* tidy up loose ends and fill things in */ + categorize(p, g); + stripsnug(p, g); + findmust(p, g); + g->nplus = pluscount(p, g); + g->magic = MAGIC2; + preg->re_nsub = g->nsub; + preg->re_g = g; + preg->re_magic = MAGIC1; +#ifndef REDEBUG + /* not debugging, so can't rely on the assert() in llvm_regexec() */ + if (g->iflags®EX_BAD) + SETERROR(REG_ASSERT); +#endif + + /* win or lose, we're done */ + if (p->error != 0) /* lose */ + llvm_regfree(preg); + return(p->error); +} + +/* + - p_ere - ERE parser top level, concatenation and alternation + */ +static void +p_ere(struct parse *p, int stop) /* character this ERE should end at */ +{ + char c; + sopno prevback = 0; + sopno prevfwd = 0; + sopno conc; + int first = 1; /* is this the first alternative? */ + + for (;;) { + /* do a bunch of concatenated expressions */ + conc = HERE(); + while (MORE() && (c = PEEK()) != '|' && c != stop) + p_ere_exp(p); + REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ + + if (!EAT('|')) + break; /* NOTE BREAK OUT */ + + if (first) { + INSERT(OCH_, conc); /* offset is wrong */ + prevfwd = conc; + prevback = conc; + first = 0; + } + ASTERN(OOR1, prevback); + prevback = THERE(); + AHEAD(prevfwd); /* fix previous offset */ + prevfwd = HERE(); + EMIT(OOR2, 0); /* offset is very wrong */ + } + + if (!first) { /* tail-end fixups */ + AHEAD(prevfwd); + ASTERN(O_CH, prevback); + } + + assert(!MORE() || SEE(stop)); +} + +/* + - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + */ +static void +p_ere_exp(struct parse *p) +{ + char c; + sopno pos; + int count; + int count2; + int backrefnum; + sopno subno; + int wascaret = 0; + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + + pos = HERE(); + switch (c) { + case '(': + REQUIRE(MORE(), REG_EPAREN); + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + if (!SEE(')')) + p_ere(p, ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + MUSTEAT(')', REG_EPAREN); + break; +#ifndef POSIX_MISTAKE + case ')': /* happens only if no current unmatched ( */ + /* + * You may ask, why the ifndef? Because I didn't notice + * this until slightly too late for 1003.2, and none of the + * other 1003.2 regular-expression reviewers noticed it at + * all. So an unmatched ) is legal POSIX, at least until + * we can get it fixed. + */ + SETERROR(REG_EPAREN); + break; +#endif + case '^': + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + wascaret = 1; + break; + case '$': + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + break; + case '|': + SETERROR(REG_EMPTY); + break; + case '*': + case '+': + case '?': + SETERROR(REG_BADRPT); + break; + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case '\\': + REQUIRE(MORE(), REG_EESCAPE); + c = GETNEXT(); + if (c >= '1' && c <= '9') { + /* \[0-9] is taken to be a back-reference to a previously specified + * matching group. backrefnum will hold the number. The matching + * group must exist (i.e. if \4 is found there must have been at + * least 4 matching groups specified in the pattern previously). + */ + backrefnum = c - '0'; + if (p->pend[backrefnum] == 0) { + SETERROR(REG_ESUBREG); + break; + } + + /* Make sure everything checks out and emit the sequence + * that marks a back-reference to the parse structure. + */ + assert(backrefnum <= p->g->nsub); + EMIT(OBACK_, backrefnum); + assert(p->pbegin[backrefnum] != 0); + assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN); + assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN); + (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]); + EMIT(O_BACK, backrefnum); + p->g->backrefs = 1; + } else { + /* Other chars are simply themselves when escaped with a backslash. + */ + ordinary(p, c); + } + break; + case '{': /* okay as ordinary except if digit follows */ + REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); + LLVM_FALLTHROUGH; + default: + ordinary(p, c); + break; + } + + if (!MORE()) + return; + c = PEEK(); + /* we call { a repetition if followed by a digit */ + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) + return; /* no repetition, we're done */ + NEXT(); + + REQUIRE(!wascaret, REG_BADRPT); + switch (c) { + case '*': /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + break; + case '+': + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + break; + case '?': + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, pos); /* offset slightly wrong */ + ASTERN(OOR1, pos); /* this one's right */ + AHEAD(pos); /* fix the OCH_ */ + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + break; + case '{': + count = p_count(p); + if (EAT(',')) { + if (isdigit((uch)PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EAT('}')) { /* error heuristics */ + while (MORE() && PEEK() != '}') + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + break; + } + + if (!MORE()) + return; + c = PEEK(); + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) + return; + SETERROR(REG_BADRPT); +} + +/* + - p_str - string (no metacharacters) "parser" + */ +static void +p_str(struct parse *p) +{ + REQUIRE(MORE(), REG_EMPTY); + while (MORE()) + ordinary(p, GETNEXT()); +} + +/* + - p_bre - BRE parser top level, anchoring and concatenation + * Giving end1 as OUT essentially eliminates the end1/end2 check. + * + * This implementation is a bit of a kludge, in that a trailing $ is first + * taken as an ordinary character and then revised to be an anchor. The + * only undesirable side effect is that '$' gets included as a character + * category in such cases. This is fairly harmless; not worth fixing. + * The amount of lookahead needed to avoid this kludge is excessive. + */ +static void +p_bre(struct parse *p, + int end1, /* first terminating character */ + int end2) /* second terminating character */ +{ + sopno start = HERE(); + int first = 1; /* first subexpression? */ + int wasdollar = 0; + + if (EAT('^')) { + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + } + while (MORE() && !SEETWO(end1, end2)) { + wasdollar = p_simp_re(p, first); + first = 0; + } + if (wasdollar) { /* oops, that was a trailing anchor */ + DROP(1); + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + } + + REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ +} + +/* + - p_simp_re - parse a simple RE, an atom possibly followed by a repetition + */ +static int /* was the simple RE an unbackslashed $? */ +p_simp_re(struct parse *p, + int starordinary) /* is a leading * an ordinary character? */ +{ + int c; + int count; + int count2; + sopno pos; + int i; + sopno subno; +# define BACKSL (1<<CHAR_BIT) + + pos = HERE(); /* repetition op, if any, covers from here */ + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + if (c == '\\') { + REQUIRE(MORE(), REG_EESCAPE); + c = BACKSL | GETNEXT(); + } + switch (c) { + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case BACKSL|'{': + SETERROR(REG_BADRPT); + break; + case BACKSL|'(': + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + /* the MORE here is an error heuristic */ + if (MORE() && !SEETWO('\\', ')')) + p_bre(p, '\\', ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + break; + case BACKSL|')': /* should not get here -- must be user */ + case BACKSL|'}': + SETERROR(REG_EPAREN); + break; + case BACKSL|'1': + case BACKSL|'2': + case BACKSL|'3': + case BACKSL|'4': + case BACKSL|'5': + case BACKSL|'6': + case BACKSL|'7': + case BACKSL|'8': + case BACKSL|'9': + i = (c&~BACKSL) - '0'; + assert(i < NPAREN); + if (p->pend[i] != 0) { + assert(i <= p->g->nsub); + EMIT(OBACK_, i); + assert(p->pbegin[i] != 0); + assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); + assert(OP(p->strip[p->pend[i]]) == ORPAREN); + (void) dupl(p, p->pbegin[i]+1, p->pend[i]); + EMIT(O_BACK, i); + } else + SETERROR(REG_ESUBREG); + p->g->backrefs = 1; + break; + case '*': + REQUIRE(starordinary, REG_BADRPT); + LLVM_FALLTHROUGH; + default: + ordinary(p, (char)c); + break; + } + + if (EAT('*')) { /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + } else if (EATTWO('\\', '{')) { + count = p_count(p); + if (EAT(',')) { + if (MORE() && isdigit((uch)PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EATTWO('\\', '}')) { /* error heuristics */ + while (MORE() && !SEETWO('\\', '}')) + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + } else if (c == '$') /* $ (but not \$) ends it */ + return(1); + + return(0); +} + +/* + - p_count - parse a repetition count + */ +static int /* the value */ +p_count(struct parse *p) +{ + int count = 0; + int ndigits = 0; + + while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { + count = count*10 + (GETNEXT() - '0'); + ndigits++; + } + + REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + return(count); +} + +/* + - p_bracket - parse a bracketed character list + * + * Note a significant property of this code: if the allocset() did SETERROR, + * no set operations are done. + */ +static void +p_bracket(struct parse *p) +{ + cset *cs; + int invert = 0; + + /* Dept of Truly Sickening Special-Case Kludges */ + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } + + if ((cs = allocset(p)) == NULL) { + /* allocset did set error status in p */ + return; + } + + if (EAT('^')) + invert++; /* make note to invert set at end */ + if (EAT(']')) + CHadd(cs, ']'); + else if (EAT('-')) + CHadd(cs, '-'); + while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) + p_b_term(p, cs); + if (EAT('-')) + CHadd(cs, '-'); + MUSTEAT(']', REG_EBRACK); + + if (p->error != 0) { /* don't mess things up further */ + freeset(p, cs); + return; + } + + if (p->g->cflags®_ICASE) { + int i; + int ci; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i) && isalpha(i)) { + ci = othercase(i); + if (ci != i) + CHadd(cs, ci); + } + if (cs->multis != NULL) + mccase(p, cs); + } + if (invert) { + int i; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i)) + CHsub(cs, i); + else + CHadd(cs, i); + if (p->g->cflags®_NEWLINE) + CHsub(cs, '\n'); + if (cs->multis != NULL) + mcinvert(p, cs); + } + + assert(cs->multis == NULL); /* xxx */ + + if (nch(p, cs) == 1) { /* optimize singleton sets */ + ordinary(p, firstch(p, cs)); + freeset(p, cs); + } else + EMIT(OANYOF, freezeset(p, cs)); +} + +/* + - p_b_term - parse one term of a bracketed character list + */ +static void +p_b_term(struct parse *p, cset *cs) +{ + char c; + char start, finish; + int i; + + /* classify what we've got */ + switch ((MORE()) ? PEEK() : '\0') { + case '[': + c = (MORE2()) ? PEEK2() : '\0'; + break; + case '-': + SETERROR(REG_ERANGE); + return; /* NOTE RETURN */ + break; + default: + c = '\0'; + break; + } + + switch (c) { + case ':': /* character class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECTYPE); + p_b_cclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + break; + case '=': /* equivalence class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + p_b_eclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + break; + default: /* symbol, ordinary character, or range */ +/* xxx revision needed for multichar stuff */ + start = p_b_symbol(p); + if (SEE('-') && MORE2() && PEEK2() != ']') { + /* range */ + NEXT(); + if (EAT('-')) + finish = '-'; + else + finish = p_b_symbol(p); + } else + finish = start; +/* xxx what about signed chars here... */ + REQUIRE(start <= finish, REG_ERANGE); + for (i = start; i <= finish; i++) + CHadd(cs, i); + break; + } +} + +/* + - p_b_cclass - parse a character-class name and deal with it + */ +static void +p_b_cclass(struct parse *p, cset *cs) +{ + char *sp = p->next; + struct cclass *cp; + size_t len; + const char *u; + char c; + + while (MORE() && isalpha((uch)PEEK())) + NEXT(); + len = p->next - sp; + for (cp = cclasses; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + break; + if (cp->name == NULL) { + /* oops, didn't find it */ + SETERROR(REG_ECTYPE); + return; + } + + u = cp->chars; + while ((c = *u++) != '\0') + CHadd(cs, c); + for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) + MCadd(p, cs, u); +} + +/* + - p_b_eclass - parse an equivalence-class name and deal with it + * + * This implementation is incomplete. xxx + */ +static void +p_b_eclass(struct parse *p, cset *cs) +{ + char c; + + c = p_b_coll_elem(p, '='); + CHadd(cs, c); +} + +/* + - p_b_symbol - parse a character or [..]ed multicharacter collating symbol + */ +static char /* value of symbol */ +p_b_symbol(struct parse *p) +{ + char value; + + REQUIRE(MORE(), REG_EBRACK); + if (!EATTWO('[', '.')) + return(GETNEXT()); + + /* collating symbol */ + value = p_b_coll_elem(p, '.'); + REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + return(value); +} + +/* + - p_b_coll_elem - parse a collating-element name and look it up + */ +static char /* value of collating element */ +p_b_coll_elem(struct parse *p, + int endc) /* name ended by endc,']' */ +{ + char *sp = p->next; + struct cname *cp; + size_t len; + + while (MORE() && !SEETWO(endc, ']')) + NEXT(); + if (!MORE()) { + SETERROR(REG_EBRACK); + return(0); + } + len = p->next - sp; + for (cp = cnames; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len) + return(cp->code); /* known name */ + if (len == 1) + return(*sp); /* single character */ + SETERROR(REG_ECOLLATE); /* neither */ + return(0); +} + +/* + - othercase - return the case counterpart of an alphabetic + */ +static char /* if no counterpart, return ch */ +othercase(int ch) +{ + ch = (uch)ch; + assert(isalpha(ch)); + if (isupper(ch)) + return ((uch)tolower(ch)); + else if (islower(ch)) + return ((uch)toupper(ch)); + else /* peculiar, but could happen */ + return(ch); +} + +/* + - bothcases - emit a dualcase version of a two-case character + * + * Boy, is this implementation ever a kludge... + */ +static void +bothcases(struct parse *p, int ch) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[3]; + + ch = (uch)ch; + assert(othercase(ch) != ch); /* p_bracket() would recurse */ + p->next = bracket; + p->end = bracket+2; + bracket[0] = ch; + bracket[1] = ']'; + bracket[2] = '\0'; + p_bracket(p); + assert(p->next == bracket+2); + p->next = oldnext; + p->end = oldend; +} + +/* + - ordinary - emit an ordinary character + */ +static void +ordinary(struct parse *p, int ch) +{ + cat_t *cap = p->g->categories; + + if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) + bothcases(p, ch); + else { + EMIT(OCHAR, (uch)ch); + if (cap[ch] == 0) + cap[ch] = p->g->ncategories++; + } +} + +/* + - nonnewline - emit REG_NEWLINE version of OANY + * + * Boy, is this implementation ever a kludge... + */ +static void +nonnewline(struct parse *p) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[4]; + + p->next = bracket; + p->end = bracket+3; + bracket[0] = '^'; + bracket[1] = '\n'; + bracket[2] = ']'; + bracket[3] = '\0'; + p_bracket(p); + assert(p->next == bracket+3); + p->next = oldnext; + p->end = oldend; +} + +/* + - repeat - generate code for a bounded repetition, recursively if needed + */ +static void +repeat(struct parse *p, + sopno start, /* operand from here to end of strip */ + int from, /* repeated from this number */ + int to) /* to this number of times (maybe INFINITY) */ +{ + sopno finish = HERE(); +# define N 2 +# define INF 3 +# define REP(f, t) ((f)*8 + (t)) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) + sopno copy; + + if (p->error != 0) /* head off possible runaway recursion */ + return; + + assert(from <= to); + + switch (REP(MAP(from), MAP(to))) { + case REP(0, 0): /* must be user doing this */ + DROP(finish-start); /* drop the operand */ + break; + case REP(0, 1): /* as x{1,1}? */ + case REP(0, N): /* as x{1,n}? */ + case REP(0, INF): /* as x{1,}? */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); /* offset is wrong... */ + repeat(p, start+1, 1, to); + ASTERN(OOR1, start); + AHEAD(start); /* ... fix it */ + EMIT(OOR2, 0); + AHEAD(THERE()); + ASTERN(O_CH, THERETHERE()); + break; + case REP(1, 1): /* trivial case */ + /* done */ + break; + case REP(1, N): /* as x?x{1,n-1} */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); + ASTERN(OOR1, start); + AHEAD(start); + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + copy = dupl(p, start+1, finish+1); + assert(copy == finish+4); + repeat(p, copy, 1, to-1); + break; + case REP(1, INF): /* as x+ */ + INSERT(OPLUS_, start); + ASTERN(O_PLUS, start); + break; + case REP(N, N): /* as xx{m-1,n-1} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to-1); + break; + case REP(N, INF): /* as xx{n-1,INF} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to); + break; + default: /* "can't happen" */ + SETERROR(REG_ASSERT); /* just in case */ + break; + } +} + +/* + - seterr - set an error condition + */ +static int /* useless but makes type checking happy */ +seterr(struct parse *p, int e) +{ + if (p->error == 0) /* keep earliest error condition */ + p->error = e; + p->next = nuls; /* try to bring things to a halt */ + p->end = nuls; + return(0); /* make the return value well-defined */ +} + +/* + - allocset - allocate a set of characters for [] + */ +static cset * +allocset(struct parse *p) +{ + int no = p->g->ncsets++; + size_t nc; + size_t nbytes; + cset *cs; + size_t css = (size_t)p->g->csetsize; + int i; + + if (no >= p->ncsalloc) { /* need another column of space */ + void *ptr; + + p->ncsalloc += CHAR_BIT; + nc = p->ncsalloc; + if (nc > SIZE_MAX / sizeof(cset)) + goto nomem; + assert(nc % CHAR_BIT == 0); + nbytes = nc / CHAR_BIT * css; + + ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); + if (ptr == NULL) + goto nomem; + p->g->sets = ptr; + + ptr = (uch *)realloc((char *)p->g->setbits, nbytes); + if (ptr == NULL) + goto nomem; + p->g->setbits = ptr; + + for (i = 0; i < no; i++) + p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); + + (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); + } + /* XXX should not happen */ + if (p->g->sets == NULL || p->g->setbits == NULL) + goto nomem; + + cs = &p->g->sets[no]; + cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); + cs->mask = 1 << ((no) % CHAR_BIT); + cs->hash = 0; + cs->smultis = 0; + cs->multis = NULL; + + return(cs); +nomem: + free(p->g->sets); + p->g->sets = NULL; + free(p->g->setbits); + p->g->setbits = NULL; + + SETERROR(REG_ESPACE); + /* caller's responsibility not to do set ops */ + return(NULL); +} + +/* + - freeset - free a now-unused set + */ +static void +freeset(struct parse *p, cset *cs) +{ + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + CHsub(cs, i); + if (cs == top-1) /* recover only the easy case */ + p->g->ncsets--; +} + +/* + - freezeset - final processing on a set of characters + * + * The main task here is merging identical sets. This is usually a waste + * of time (although the hash code minimizes the overhead), but can win + * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash + * is done using addition rather than xor -- all ASCII [aA] sets xor to + * the same value! + */ +static int /* set number */ +freezeset(struct parse *p, cset *cs) +{ + uch h = cs->hash; + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + cset *cs2; + size_t css = (size_t)p->g->csetsize; + + /* look for an earlier one which is the same */ + for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) + if (cs2->hash == h && cs2 != cs) { + /* maybe */ + for (i = 0; i < css; i++) + if (!!CHIN(cs2, i) != !!CHIN(cs, i)) + break; /* no */ + if (i == css) + break; /* yes */ + } + + if (cs2 < top) { /* found one */ + freeset(p, cs); + cs = cs2; + } + + return((int)(cs - p->g->sets)); +} + +/* + - firstch - return first character in a set (which must have at least one) + */ +static int /* character; there is no "none" value */ +firstch(struct parse *p, cset *cs) +{ + size_t i; + size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + return((char)i); + assert(never); + return(0); /* arbitrary */ +} + +/* + - nch - number of characters in a set + */ +static int +nch(struct parse *p, cset *cs) +{ + size_t i; + size_t css = (size_t)p->g->csetsize; + int n = 0; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + n++; + return(n); +} + +/* + - mcadd - add a collating element to a cset + */ +static void +mcadd( struct parse *p, cset *cs, const char *cp) +{ + size_t oldend = cs->smultis; + void *np; + + cs->smultis += strlen(cp) + 1; + np = realloc(cs->multis, cs->smultis); + if (np == NULL) { + if (cs->multis) + free(cs->multis); + cs->multis = NULL; + SETERROR(REG_ESPACE); + return; + } + cs->multis = np; + + llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); +} + +/* + - mcinvert - invert the list of collating elements in a cset + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +/* ARGSUSED */ +static void +mcinvert(struct parse *p, cset *cs) +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - mccase - add case counterparts of the list of collating elements in a cset + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +/* ARGSUSED */ +static void +mccase(struct parse *p, cset *cs) +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - isinsets - is this character in any sets? + */ +static int /* predicate */ +isinsets(struct re_guts *g, int c) +{ + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc = (uch)c; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc] != 0) + return(1); + return(0); +} + +/* + - samesets - are these two characters in exactly the same sets? + */ +static int /* predicate */ +samesets(struct re_guts *g, int c1, int c2) +{ + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc1 = (uch)c1; + unsigned uc2 = (uch)c2; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc1] != col[uc2]) + return(0); + return(1); +} + +/* + - categorize - sort out character categories + */ +static void +categorize(struct parse *p, struct re_guts *g) +{ + cat_t *cats = g->categories; + int c; + int c2; + cat_t cat; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (cats[c] == 0 && isinsets(g, c)) { + cat = g->ncategories++; + cats[c] = cat; + for (c2 = c+1; c2 <= CHAR_MAX; c2++) + if (cats[c2] == 0 && samesets(g, c, c2)) + cats[c2] = cat; + } +} + +/* + - dupl - emit a duplicate of a bunch of sops + */ +static sopno /* start of duplicate */ +dupl(struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ +{ + sopno ret = HERE(); + sopno len = finish - start; + + assert(finish >= start); + if (len == 0) + return(ret); + enlarge(p, p->ssize + len); /* this many unexpected additions */ + assert(p->ssize >= p->slen + len); + (void) memmove((char *)(p->strip + p->slen), + (char *)(p->strip + start), (size_t)len*sizeof(sop)); + p->slen += len; + return(ret); +} + +/* + - doemit - emit a strip operator + * + * It might seem better to implement this as a macro with a function as + * hard-case backup, but it's just too big and messy unless there are + * some changes to the data structures. Maybe later. + */ +static void +doemit(struct parse *p, sop op, size_t opnd) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* deal with oversize operands ("can't happen", more or less) */ + assert(opnd < 1<<OPSHIFT); + + /* deal with undersized strip */ + if (p->slen >= p->ssize) + enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ + assert(p->slen < p->ssize); + + /* finally, it's all reduced to the easy case */ + p->strip[p->slen++] = SOP(op, opnd); +} + +/* + - doinsert - insert a sop into the strip + */ +static void +doinsert(struct parse *p, sop op, size_t opnd, sopno pos) +{ + sopno sn; + sop s; + int i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + sn = HERE(); + EMIT(op, opnd); /* do checks, ensure space */ + assert(HERE() == sn+1); + s = p->strip[sn]; + + /* adjust paren pointers */ + assert(pos > 0); + for (i = 1; i < NPAREN; i++) { + if (p->pbegin[i] >= pos) { + p->pbegin[i]++; + } + if (p->pend[i] >= pos) { + p->pend[i]++; + } + } + + memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], + (HERE()-pos-1)*sizeof(sop)); + p->strip[pos] = s; +} + +/* + - dofwd - complete a forward reference + */ +static void +dofwd(struct parse *p, sopno pos, sop value) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + assert(value < 1<<OPSHIFT); + p->strip[pos] = OP(p->strip[pos]) | value; +} + +/* + - enlarge - enlarge the strip + */ +static void +enlarge(struct parse *p, sopno size) +{ + sop *sp; + + if (p->ssize >= size) + return; + + if ((uintptr_t)size > SIZE_MAX / sizeof(sop)) { + SETERROR(REG_ESPACE); + return; + } + + sp = (sop *)realloc(p->strip, size*sizeof(sop)); + if (sp == NULL) { + SETERROR(REG_ESPACE); + return; + } + p->strip = sp; + p->ssize = size; +} + +/* + - stripsnug - compact the strip + */ +static void +stripsnug(struct parse *p, struct re_guts *g) +{ + g->nstates = p->slen; + if ((uintptr_t)p->slen > SIZE_MAX / sizeof(sop)) { + g->strip = p->strip; + SETERROR(REG_ESPACE); + return; + } + + g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); + if (g->strip == NULL) { + SETERROR(REG_ESPACE); + g->strip = p->strip; + } +} + +/* + - findmust - fill in must and mlen with longest mandatory literal string + * + * This algorithm could do fancy things like analyzing the operands of | + * for common subsequences. Someday. This code is simple and finds most + * of the interesting cases. + * + * Note that must and mlen got initialized during setup. + */ +static void +findmust(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop *start = 0; /* start initialized in the default case, after that */ + sop *newstart = 0; /* newstart was initialized in the OCHAR case */ + sopno newlen; + sop s; + char *cp; + sopno i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* find the longest OCHAR sequence in strip */ + newlen = 0; + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OCHAR: /* sequence member */ + if (newlen == 0) /* new sequence */ + newstart = scan - 1; + newlen++; + break; + case OPLUS_: /* things that don't break one */ + case OLPAREN: + case ORPAREN: + break; + case OQUEST_: /* things that must be skipped */ + case OCH_: + scan--; + do { + scan += OPND(s); + s = *scan; + /* assert() interferes w debug printouts */ + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) { + g->iflags |= REGEX_BAD; + return; + } + } while (OP(s) != O_QUEST && OP(s) != O_CH); + LLVM_FALLTHROUGH; + default: /* things that break a sequence */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + } + newlen = 0; + break; + } + } while (OP(s) != OEND); + + if (g->mlen == 0) /* there isn't one */ + return; + + /* turn it into a character string */ + g->must = malloc((size_t)g->mlen + 1); + if (g->must == NULL) { /* argh; just forget it */ + g->mlen = 0; + return; + } + cp = g->must; + scan = start; + for (i = g->mlen; i > 0; i--) { + while (OP(s = *scan++) != OCHAR) + continue; + assert(cp < g->must + g->mlen); + *cp++ = (char)OPND(s); + } + assert(cp == g->must + g->mlen); + *cp++ = '\0'; /* just on general principles */ +} + +/* + - pluscount - count + nesting + */ +static sopno /* nesting depth */ +pluscount(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop s; + sopno plusnest = 0; + sopno maxnest = 0; + + if (p->error != 0) + return(0); /* there may not be an OEND */ + + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OPLUS_: + plusnest++; + break; + case O_PLUS: + if (plusnest > maxnest) + maxnest = plusnest; + plusnest--; + break; + } + } while (OP(s) != OEND); + if (plusnest != 0) + g->iflags |= REGEX_BAD; + return(maxnest); +} diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc new file mode 100644 index 0000000000000..41787aff12423 --- /dev/null +++ b/llvm/lib/Support/regengine.inc @@ -0,0 +1,1034 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)engine.c 8.5 (Berkeley) 3/20/94 + */ + +/* + * The matching engine and friends. This file is #included by regexec.c + * after suitable #defines of a variety of macros used herein, so that + * different state representations can be used without duplicating masses + * of code. + */ + +#ifdef SNAMES +#define matcher smatcher +#define fast sfast +#define slow sslow +#define dissect sdissect +#define backref sbackref +#define step sstep +#define print sprint +#define at sat +#define match smat +#define nope snope +#endif +#ifdef LNAMES +#define matcher lmatcher +#define fast lfast +#define slow lslow +#define dissect ldissect +#define backref lbackref +#define step lstep +#define print lprint +#define at lat +#define match lmat +#define nope lnope +#endif + +/* another structure passed up and down to avoid zillions of parameters */ +struct match { + struct re_guts *g; + int eflags; + llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ + const char *offp; /* offsets work from here */ + const char *beginp; /* start of string -- virtual NUL precedes */ + const char *endp; /* end of string -- virtual NUL here */ + const char *coldp; /* can be no match starting before here */ + const char **lastpos; /* [nplus+1] */ + STATEVARS; + states st; /* current states */ + states fresh; /* states for a fresh start */ + states tmp; /* temporary */ + states empty; /* empty set of states */ +}; + +static int matcher(struct re_guts *, const char *, size_t, + llvm_regmatch_t[], int); +static const char *dissect(struct match *, const char *, const char *, sopno, + sopno); +static const char *backref(struct match *, const char *, const char *, sopno, + sopno, sopno, int); +static const char *fast(struct match *, const char *, const char *, sopno, sopno); +static const char *slow(struct match *, const char *, const char *, sopno, sopno); +static states step(struct re_guts *, sopno, sopno, states, int, states); +#define MAX_RECURSION 100 +#define BOL (OUT+1) +#define EOL (BOL+1) +#define BOLEOL (BOL+2) +#define NOTHING (BOL+3) +#define BOW (BOL+4) +#define EOW (BOL+5) +#define CODEMAX (BOL+5) /* highest code used */ +#define NONCHAR(c) ((c) > CHAR_MAX) +#define NNONCHAR (CODEMAX-CHAR_MAX) +#ifdef REDEBUG +static void print(struct match *, char *, states, int, FILE *); +#endif +#ifdef REDEBUG +static void at(struct match *, char *, char *, char *, sopno, sopno); +#endif +#ifdef REDEBUG +static char *pchar(int); +#endif + +#ifdef REDEBUG +#define SP(t, s, c) print(m, t, s, c, stdout) +#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) +#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } +static int nope = 0; +#else +#define SP(t, s, c) /* nothing */ +#define AT(t, p1, p2, s1, s2) /* nothing */ +#define NOTE(s) /* nothing */ +#endif + +/* + - matcher - the actual matching engine + */ +static int /* 0 success, REG_NOMATCH failure */ +matcher(struct re_guts *g, const char *string, size_t nmatch, + llvm_regmatch_t pmatch[], + int eflags) +{ + const char *endp; + size_t i; + struct match mv; + struct match *m = &mv; + const char *dp; + const sopno gf = g->firststate+1; /* +1 for OEND */ + const sopno gl = g->laststate; + const char *start; + const char *stop; + + /* simplify the situation where possible */ + if (g->cflags®_NOSUB) + nmatch = 0; + if (eflags®_STARTEND) { + start = string + pmatch[0].rm_so; + stop = string + pmatch[0].rm_eo; + } else { + start = string; + stop = start + strlen(start); + } + if (stop < start) + return(REG_INVARG); + + /* prescreening; this does wonders for this rather slow code */ + if (g->must != NULL) { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return(REG_NOMATCH); + } + + /* match struct setup */ + m->g = g; + m->eflags = eflags; + m->pmatch = NULL; + m->lastpos = NULL; + m->offp = string; + m->beginp = start; + m->endp = stop; + STATESETUP(m, 4); + SETUP(m->st); + SETUP(m->fresh); + SETUP(m->tmp); + SETUP(m->empty); + CLEAR(m->empty); + + /* this loop does only one repetition except for backrefs */ + for (;;) { + endp = fast(m, start, stop, gf, gl); + if (endp == NULL) { /* a miss */ + free(m->pmatch); + free((void*)m->lastpos); + STATETEARDOWN(m); + return(REG_NOMATCH); + } + if (nmatch == 0 && !g->backrefs) + break; /* no further info needed */ + + /* where? */ + assert(m->coldp != NULL); + for (;;) { + NOTE("finding start"); + endp = slow(m, m->coldp, stop, gf, gl); + if (endp != NULL) + break; + assert(m->coldp < m->endp); + m->coldp++; + } + if (nmatch == 1 && !g->backrefs) + break; /* no further info needed */ + + /* oh my, they want the subexpressions... */ + if (m->pmatch == NULL) + m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * + sizeof(llvm_regmatch_t)); + if (m->pmatch == NULL) { + STATETEARDOWN(m); + return(REG_ESPACE); + } + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + if (!g->backrefs && !(m->eflags®_BACKR)) { + NOTE("dissecting"); + dp = dissect(m, m->coldp, endp, gf, gl); + } else { + if (g->nplus > 0 && m->lastpos == NULL) + m->lastpos = (const char **)malloc((g->nplus+1) * + sizeof(char *)); + if (g->nplus > 0 && m->lastpos == NULL) { + free(m->pmatch); + STATETEARDOWN(m); + return(REG_ESPACE); + } + NOTE("backref dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + if (dp != NULL) + break; + + /* uh-oh... we couldn't find a subexpression-level match */ + assert(g->backrefs); /* must be back references doing it */ + assert(g->nplus == 0 || m->lastpos != NULL); + for (;;) { + if (dp != NULL || endp <= m->coldp) + break; /* defeat */ + NOTE("backoff"); + endp = slow(m, m->coldp, endp-1, gf, gl); + if (endp == NULL) + break; /* defeat */ + /* try it on a shorter possibility */ +#ifndef NDEBUG + for (i = 1; i <= m->g->nsub; i++) { + assert(m->pmatch[i].rm_so == -1); + assert(m->pmatch[i].rm_eo == -1); + } +#endif + NOTE("backoff dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + assert(dp == NULL || dp == endp); + if (dp != NULL) /* found a shorter one */ + break; + + /* despite initial appearances, there is no match here */ + NOTE("false alarm"); + if (m->coldp == stop) + break; + start = m->coldp + 1; /* recycle starting later */ + } + + /* fill in the details if requested */ + if (nmatch > 0) { + pmatch[0].rm_so = m->coldp - m->offp; + pmatch[0].rm_eo = endp - m->offp; + } + if (nmatch > 1) { + assert(m->pmatch != NULL); + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) + pmatch[i] = m->pmatch[i]; + else { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + } + + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return(0); +} + +/* + - dissect - figure out what matched what, no back references + */ +static const char * /* == stop (success) always */ +dissect(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + int i; + sopno ss; /* start sop of current subRE */ + sopno es; /* end sop of current subRE */ + const char *sp; /* start of string matched by it */ + const char *stp; /* string matched by it cannot pass here */ + const char *rest; /* start of rest of string */ + const char *tail; /* string unmatched by rest of RE */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + const char *ssp; /* start of string matched by subsubRE */ + const char *sep; /* end of string matched by subsubRE */ + const char *oldssp; /* previous ssp */ + + AT("diss", start, stop, startst, stopst); + sp = start; + for (ss = startst; ss < stopst; ss = es) { + /* identify end of subRE */ + es = ss; + switch (OP(m->g->strip[es])) { + case OPLUS_: + case OQUEST_: + es += OPND(m->g->strip[es]); + break; + case OCH_: + while (OP(m->g->strip[es]) != O_CH) + es += OPND(m->g->strip[es]); + break; + } + es++; + + /* figure out what it matched */ + switch (OP(m->g->strip[ss])) { + case OEND: + assert(nope); + break; + case OCHAR: + sp++; + break; + case OBOL: + case OEOL: + case OBOW: + case OEOW: + break; + case OANY: + case OANYOF: + sp++; + break; + case OBACK_: + case O_BACK: + assert(nope); + break; + /* cases where length of match is hard to find */ + case OQUEST_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + /* did innards match? */ + if (slow(m, sp, rest, ssub, esub) != NULL) { + const char *dp = dissect(m, sp, rest, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == rest); + } else /* no */ + assert(sp == rest); + sp = rest; + break; + case OPLUS_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + ssp = sp; + oldssp = ssp; + for (;;) { /* find last match of innards */ + sep = slow(m, ssp, rest, ssub, esub); + if (sep == NULL || sep == ssp) + break; /* failed or matched null */ + oldssp = ssp; /* on to next try */ + ssp = sep; + } + if (sep == NULL) { + /* last successful match */ + sep = ssp; + ssp = oldssp; + } + assert(sep == rest); /* must exhaust substring */ + assert(slow(m, ssp, sep, ssub, esub) == rest); + { + const char *dp = dissect(m, ssp, sep, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == sep); + } + sp = rest; + break; + case OCH_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = ss + OPND(m->g->strip[ss]) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + if (slow(m, sp, rest, ssub, esub) == rest) + break; /* it matched all of it */ + /* that one missed, try next one */ + assert(OP(m->g->strip[esub]) == OOR1); + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + { + const char *dp = dissect(m, sp, rest, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == rest); + } + sp = rest; + break; + case O_PLUS: + case O_QUEST: + case OOR1: + case OOR2: + case O_CH: + assert(nope); + break; + case OLPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_so = sp - m->offp; + break; + case ORPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_eo = sp - m->offp; + break; + default: /* uh oh */ + assert(nope); + break; + } + } + + assert(sp == stop); + return(sp); +} + +/* + - backref - figure out what matched what, figuring in back references + */ +static const char * /* == stop (success) or NULL (failure) */ +backref(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst, sopno lev, int rec) /* PLUS nesting level */ +{ + int i; + sopno ss; /* start sop of current subRE */ + const char *sp; /* start of string matched by it */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + const char *ssp; /* start of string matched by subsubRE */ + const char *dp; + size_t len; + int hard; + sop s; + llvm_regoff_t offsave; + cset *cs; + + AT("back", start, stop, startst, stopst); + sp = start; + + /* get as far as we can with easy stuff */ + hard = 0; + for (ss = startst; !hard && ss < stopst; ss++) + switch (OP(s = m->g->strip[ss])) { + case OCHAR: + if (sp == stop || *sp++ != (char)OPND(s)) + return(NULL); + break; + case OANY: + if (sp == stop) + return(NULL); + sp++; + break; + case OANYOF: + cs = &m->g->sets[OPND(s)]; + if (sp == stop || !CHIN(cs, *sp++)) + return(NULL); + break; + case OBOL: + if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOL: + if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OBOW: + if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp > m->beginp && + !ISWORD(*(sp-1))) ) && + (sp < m->endp && ISWORD(*sp)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOW: + if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp < m->endp && !ISWORD(*sp)) ) && + (sp > m->beginp && ISWORD(*(sp-1))) ) + { /* yes */ } + else + return(NULL); + break; + case O_QUEST: + break; + case OOR1: /* matches null but needs to skip */ + ss++; + s = m->g->strip[ss]; + do { + assert(OP(s) == OOR2); + ss += OPND(s); + } while (OP(s = m->g->strip[ss]) != O_CH); + /* note that the ss++ gets us past the O_CH */ + break; + default: /* have to make a choice */ + hard = 1; + break; + } + if (!hard) { /* that was it! */ + if (sp != stop) + return(NULL); + return(sp); + } + ss--; /* adjust for the for's final increment */ + + /* the hard stuff */ + AT("hard", sp, stop, ss, stopst); + s = m->g->strip[ss]; + switch (OP(s)) { + case OBACK_: /* the vilest depths */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + if (m->pmatch[i].rm_eo == -1) + return(NULL); + assert(m->pmatch[i].rm_so != -1); + len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + if (len == 0 && rec++ > MAX_RECURSION) + return(NULL); + assert(stop - m->beginp >= len); + if (sp > stop - len) + return(NULL); /* not enough left to match */ + ssp = m->offp + m->pmatch[i].rm_so; + if (memcmp(sp, ssp, len) != 0) + return(NULL); + while (m->g->strip[ss] != SOP(O_BACK, i)) + ss++; + return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); + break; + case OQUEST_: /* to null or not */ + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); /* not */ + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); + break; + case OPLUS_: + assert(m->lastpos != NULL); + assert(lev+1 <= m->g->nplus); + m->lastpos[lev+1] = sp; + return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); + break; + case O_PLUS: + if (sp == m->lastpos[lev]) /* last pass matched null */ + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + /* try another pass */ + m->lastpos[lev] = sp; + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); + if (dp == NULL) + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + else + return(dp); + break; + case OCH_: /* find the right one, if any */ + ssub = ss + 1; + esub = ss + OPND(s) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + dp = backref(m, sp, stop, ssub, esub, lev, rec); + if (dp != NULL) + return(dp); + /* that one missed, try next one */ + if (OP(m->g->strip[esub]) == O_CH) + return(NULL); /* there is none */ + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + break; + case OLPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_so; + m->pmatch[i].rm_so = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_so = offsave; + return(NULL); + break; + case ORPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_eo; + m->pmatch[i].rm_eo = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_eo = offsave; + return(NULL); + break; + default: /* uh oh */ + assert(nope); + break; + } + + /* "can't happen" */ + assert(nope); + /* NOTREACHED */ + return NULL; +} + +/* + - fast - step through the string at top speed + */ +static const char * /* where tentative match ended, or NULL */ +fast(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + states st = m->st; + states fresh = m->fresh; + states tmp = m->tmp; + const char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + const char *coldp; /* last p after which no match was underway */ + + CLEAR(st); + SET1(st, startst); + st = step(m->g, startst, stopst, st, NOTHING, st); + ASSIGN(fresh, st); + SP("start", st, *p); + coldp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + if (EQ(st, fresh)) + coldp = p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("boleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("boweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, fresh); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("aft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + assert(coldp != NULL); + m->coldp = coldp; + if (ISSET(st, stopst)) + return(p+1); + else + return(NULL); +} + +/* + - slow - step through the string more deliberately + */ +static const char * /* where it ended */ +slow(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + states st = m->st; + states empty = m->empty; + states tmp = m->tmp; + const char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + const char *matchp; /* last p at which a match ended */ + + AT("slow", start, stop, startst, stopst); + CLEAR(st); + SET1(st, startst); + SP("sstart", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + matchp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst)) + matchp = p; + if (EQ(st, empty) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, empty); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("saft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + return(matchp); +} + + +/* + - step - map set of states reachable before char to set reachable after + */ +static states +step(struct re_guts *g, + sopno start, /* start state within strip */ + sopno stop, /* state after stop state within strip */ + states bef, /* states reachable before */ + int ch, /* character or NONCHAR code */ + states aft) /* states already known reachable after */ +{ + cset *cs; + sop s; + sopno pc; + onestate here; /* note, macros know this name */ + sopno look; + int i; + + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { + s = g->strip[pc]; + switch (OP(s)) { + case OEND: + assert(pc == stop-1); + break; + case OCHAR: + /* only characters can match */ + assert(!NONCHAR(ch) || ch != (char)OPND(s)); + if (ch == (char)OPND(s)) + FWD(aft, bef, 1); + break; + case OBOL: + if (ch == BOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OEOL: + if (ch == EOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OBOW: + if (ch == BOW) + FWD(aft, bef, 1); + break; + case OEOW: + if (ch == EOW) + FWD(aft, bef, 1); + break; + case OANY: + if (!NONCHAR(ch)) + FWD(aft, bef, 1); + break; + case OANYOF: + cs = &g->sets[OPND(s)]; + if (!NONCHAR(ch) && CHIN(cs, ch)) + FWD(aft, bef, 1); + break; + case OBACK_: /* ignored here */ + case O_BACK: + FWD(aft, aft, 1); + break; + case OPLUS_: /* forward, this is just an empty */ + FWD(aft, aft, 1); + break; + case O_PLUS: /* both forward and back */ + FWD(aft, aft, 1); + i = ISSETBACK(aft, OPND(s)); + BACK(aft, aft, OPND(s)); + if (!i && ISSETBACK(aft, OPND(s))) { + /* oho, must reconsider loop body */ + pc -= OPND(s) + 1; + INIT(here, pc); + } + break; + case OQUEST_: /* two branches, both forward */ + FWD(aft, aft, 1); + FWD(aft, aft, OPND(s)); + break; + case O_QUEST: /* just an empty */ + FWD(aft, aft, 1); + break; + case OLPAREN: /* not significant here */ + case ORPAREN: + FWD(aft, aft, 1); + break; + case OCH_: /* mark the first two branches */ + FWD(aft, aft, 1); + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + break; + case OOR1: /* done a branch, find the O_CH */ + if (ISSTATEIN(aft, here)) { + for (look = 1; + OP(s = g->strip[pc+look]) != O_CH; + look += OPND(s)) + assert(OP(s) == OOR2); + FWD(aft, aft, look); + } + break; + case OOR2: /* propagate OCH_'s marking */ + FWD(aft, aft, 1); + if (OP(g->strip[pc+OPND(s)]) != O_CH) { + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + } + break; + case O_CH: /* just empty */ + FWD(aft, aft, 1); + break; + default: /* ooooops... */ + assert(nope); + break; + } + } + + return(aft); +} + +#ifdef REDEBUG +/* + - print - print a set of states + */ +static void +print(struct match *m, char *caption, states st, int ch, FILE *d) +{ + struct re_guts *g = m->g; + int i; + int first = 1; + + if (!(m->eflags®_TRACE)) + return; + + (void)fprintf(d, "%s", caption); + if (ch != '\0') + (void)fprintf(d, " %s", pchar(ch)); + for (i = 0; i < g->nstates; i++) + if (ISSET(st, i)) { + (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + first = 0; + } + (void)fprintf(d, "\n"); +} + +/* + - at - print current situation + */ +static void +at(struct match *m, char *title, char *start, char *stop, sopno startst, + sopno stopst) +{ + if (!(m->eflags®_TRACE)) + return; + + (void)printf("%s %s-", title, pchar(*start)); + (void)printf("%s ", pchar(*stop)); + (void)printf("%ld-%ld\n", (long)startst, (long)stopst); +} + +#ifndef PCHARDONE +#define PCHARDONE /* never again */ +/* + - pchar - make a character printable + * + * Is this identical to regchar() over in debug.c? Well, yes. But a + * duplicate here avoids having a debugging-capable regexec.o tied to + * a matching debug.o, and this is convenient. It all disappears in + * the non-debug compilation anyway, so it doesn't matter much. + */ +static char * /* -> representation */ +pchar(int ch) +{ + static char pbuf[10]; + + if (isPrint(ch) || ch == ' ') + (void)snprintf(pbuf, sizeof pbuf, "%c", ch); + else + (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); + return(pbuf); +} +#endif +#endif + +#undef matcher +#undef fast +#undef slow +#undef dissect +#undef backref +#undef step +#undef print +#undef at +#undef match +#undef nope diff --git a/llvm/lib/Support/regerror.c b/llvm/lib/Support/regerror.c new file mode 100644 index 0000000000000..1d67c9a2b03b1 --- /dev/null +++ b/llvm/lib/Support/regerror.c @@ -0,0 +1,135 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regerror.c 8.4 (Berkeley) 3/20/94 + */ + +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include "regex_impl.h" + +#include "regutils.h" + +#ifdef _MSC_VER +#define snprintf _snprintf +#endif + +static const char *regatoi(const llvm_regex_t *, char *, int); + +static struct rerr { + int code; + const char *name; + const char *explain; +} rerrs[] = { + { REG_NOMATCH, "REG_NOMATCH", "llvm_regexec() failed to match" }, + { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, + { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, + { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, + { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, + { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, + { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, + { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, + { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, + { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, + { REG_ERANGE, "REG_ERANGE", "invalid character range" }, + { REG_ESPACE, "REG_ESPACE", "out of memory" }, + { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, + { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, + { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, + { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, + { 0, "", "*** unknown regexp error code ***" } +}; + +/* + - llvm_regerror - the interface to error numbers + = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); + */ +/* ARGSUSED */ +size_t +llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size) +{ + struct rerr *r; + size_t len; + int target = errcode &~ REG_ITOA; + const char *s; + char convbuf[50]; + + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf, sizeof convbuf); + else { + for (r = rerrs; r->code != 0; r++) + if (r->code == target) + break; + + if (errcode®_ITOA) { + if (r->code != 0) { + assert(strlen(r->name) < sizeof(convbuf)); + (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf); + } else + (void)snprintf(convbuf, sizeof convbuf, + "REG_0x%x", target); + s = convbuf; + } else + s = r->explain; + } + + len = strlen(s) + 1; + if (errbuf_size > 0) { + llvm_strlcpy(errbuf, s, errbuf_size); + } + + return(len); +} + +/* + - regatoi - internal routine to implement REG_ATOI + */ +static const char * +regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize) +{ + struct rerr *r; + + for (r = rerrs; r->code != 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code == 0) + return("0"); + + (void)snprintf(localbuf, localbufsize, "%d", r->code); + return(localbuf); +} diff --git a/llvm/lib/Support/regex2.h b/llvm/lib/Support/regex2.h new file mode 100644 index 0000000000000..19d14cd14abbb --- /dev/null +++ b/llvm/lib/Support/regex2.h @@ -0,0 +1,165 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex2.h 8.4 (Berkeley) 3/20/94 + */ + +#ifndef LLVM_SUPPORT_REGEX2_H +#define LLVM_SUPPORT_REGEX2_H + +#include "regutils.h" +#include <stddef.h> + +/* + * internals of regex_t + */ +#define MAGIC1 ((('r'^0200)<<8) | 'e') + +/* + * The internal representation is a *strip*, a sequence of + * operators ending with an endmarker. (Some terminology etc. is a + * historical relic of earlier versions which used multiple strips.) + * Certain oddities in the representation are there to permit running + * the machinery backwards; in particular, any deviation from sequential + * flow must be marked at both its source and its destination. Some + * fine points: + * + * - OPLUS_ and O_PLUS are *inside* the loop they create. + * - OQUEST_ and O_QUEST are *outside* the bypass they create. + * - OCH_ and O_CH are *outside* the multi-way branch they create, while + * OOR1 and OOR2 are respectively the end and the beginning of one of + * the branches. Note that there is an implicit OOR2 following OCH_ + * and an implicit OOR1 preceding O_CH. + * + * In state representations, an operator's bit is on to signify a state + * immediately *preceding* "execution" of that operator. + */ +typedef unsigned long sop; /* strip operator */ +typedef long sopno; +#define OPRMASK 0xf8000000LU +#define OPDMASK 0x07ffffffLU +#define OPSHIFT ((unsigned)27) +#define OP(n) ((n)&OPRMASK) +#define OPND(n) ((n)&OPDMASK) +#define SOP(op, opnd) ((op)|(opnd)) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND (1LU<<OPSHIFT) /* endmarker - */ +#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */ +#define OBOL (3LU<<OPSHIFT) /* left anchor - */ +#define OEOL (4LU<<OPSHIFT) /* right anchor - */ +#define OANY (5LU<<OPSHIFT) /* . - */ +#define OANYOF (6LU<<OPSHIFT) /* [...] set number */ +#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */ +#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */ +#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */ +#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */ +#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */ +#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */ +#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */ +#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */ +#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */ +#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */ +#define OBOW (19LU<<OPSHIFT) /* begin word - */ +#define OEOW (20LU<<OPSHIFT) /* end word - */ + +/* + * Structure for [] character-set representation. Character sets are + * done as bit vectors, grouped 8 to a byte vector for compactness. + * The individual set therefore has both a pointer to the byte vector + * and a mask to pick out the relevant bit of each byte. A hash code + * simplifies testing whether two sets could be identical. + * + * This will get trickier for multicharacter collating elements. As + * preliminary hooks for dealing with such things, we also carry along + * a string of multi-character elements, and decide the size of the + * vectors at run time. + */ +typedef struct { + uch *ptr; /* -> uch [csetsize] */ + uch mask; /* bit within array */ + uch hash; /* hash code */ + size_t smultis; + char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ +} cset; +/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ +#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) +#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) +#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) + +/* stuff for character categories */ +typedef unsigned char cat_t; + +/* + * main compiled-expression structure + */ +struct re_guts { + int magic; +# define MAGIC2 ((('R'^0200)<<8)|'E') + sop *strip; /* malloced area for strip */ + int csetsize; /* number of bits in a cset vector */ + int ncsets; /* number of csets in use */ + cset *sets; /* -> cset [ncsets] */ + uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ + int cflags; /* copy of llvm_regcomp() cflags argument */ + sopno nstates; /* = number of sops */ + sopno firststate; /* the initial OEND (normally 0) */ + sopno laststate; /* the final OEND */ + int iflags; /* internal flags */ +# define USEBOL 01 /* used ^ */ +# define USEEOL 02 /* used $ */ +# define REGEX_BAD 04 /* something wrong */ + int nbol; /* number of ^ used */ + int neol; /* number of $ used */ + int ncategories; /* how many character categories */ + cat_t *categories; /* ->catspace[-CHAR_MIN] */ + char *must; /* match must contain this string */ + int mlen; /* length of must */ + size_t nsub; /* copy of re_nsub */ + int backrefs; /* does it use back references? */ + sopno nplus; /* how deep does it nest +s? */ + /* catspace must be last */ + cat_t catspace[1]; /* actually [NC] */ +}; + +/* misc utilities */ +#define OUT (CHAR_MAX+1) /* a non-character value */ +#define ISWORD(c) (isalnum(c&0xff) || (c) == '_') + +#endif diff --git a/llvm/lib/Support/regex_impl.h b/llvm/lib/Support/regex_impl.h new file mode 100644 index 0000000000000..8ddac7dcf9982 --- /dev/null +++ b/llvm/lib/Support/regex_impl.h @@ -0,0 +1,108 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992 Henry Spencer. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer of the University of Toronto. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex.h 8.1 (Berkeley) 6/2/93 + */ + +#ifndef _REGEX_H_ +#define _REGEX_H_ + +#include <sys/types.h> +typedef off_t llvm_regoff_t; +typedef struct { + llvm_regoff_t rm_so; /* start of match */ + llvm_regoff_t rm_eo; /* end of match */ +} llvm_regmatch_t; + +typedef struct llvm_regex { + int re_magic; + size_t re_nsub; /* number of parenthesized subexpressions */ + const char *re_endp; /* end pointer for REG_PEND */ + struct re_guts *re_g; /* none of your business :-) */ +} llvm_regex_t; + +/* llvm_regcomp() flags */ +#define REG_BASIC 0000 +#define REG_EXTENDED 0001 +#define REG_ICASE 0002 +#define REG_NOSUB 0004 +#define REG_NEWLINE 0010 +#define REG_NOSPEC 0020 +#define REG_PEND 0040 +#define REG_DUMP 0200 + +/* llvm_regerror() flags */ +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 +#define REG_EMPTY 14 +#define REG_ASSERT 15 +#define REG_INVARG 16 +#define REG_ATOI 255 /* convert name to number (!) */ +#define REG_ITOA 0400 /* convert number to name (!) */ + +/* llvm_regexec() flags */ +#define REG_NOTBOL 00001 +#define REG_NOTEOL 00002 +#define REG_STARTEND 00004 +#define REG_TRACE 00400 /* tracing of execution */ +#define REG_LARGE 01000 /* force large representation */ +#define REG_BACKR 02000 /* force use of backref code */ + +#ifdef __cplusplus +extern "C" { +#endif + +int llvm_regcomp(llvm_regex_t *, const char *, int); +size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); +int llvm_regexec(const llvm_regex_t *, const char *, size_t, + llvm_regmatch_t [], int); +void llvm_regfree(llvm_regex_t *); +size_t llvm_strlcpy(char *dst, const char *src, size_t siz); + +#ifdef __cplusplus +} +#endif + +#endif /* !_REGEX_H_ */ diff --git a/llvm/lib/Support/regexec.c b/llvm/lib/Support/regexec.c new file mode 100644 index 0000000000000..bd5e72d4c5220 --- /dev/null +++ b/llvm/lib/Support/regexec.c @@ -0,0 +1,162 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regexec.c 8.3 (Berkeley) 3/20/94 + */ + +/* + * the outer shell of llvm_regexec() + * + * This file includes engine.inc *twice*, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets. + */ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <ctype.h> +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +/* macros for manipulating states, small version */ +/* FIXME: 'states' is assumed as 'long' on small version. */ +#define states1 long /* for later use in llvm_regexec() decision */ +#define states states1 +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) +#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS long dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate long +#define INIT(o, n) ((o) = (unsigned long)1 << (n)) +#define INC(o) ((o) = (unsigned long)(o) << 1) +#define ISSTATEIN(v, o) (((v) & (o)) != 0) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +/* function names */ +#define SNAMES /* engine.inc looks after details */ + +#include "regengine.inc" + +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES + +/* macros for manipulating states, large version */ +#define states char * +#define CLEAR(v) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) memmove(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS long vn; char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) return(REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate long +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* function names */ +#define LNAMES /* flag */ + +#include "regengine.inc" + +/* + - llvm_regexec - interface for matching + * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. + */ +int /* 0 success, REG_NOMATCH failure */ +llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch, + llvm_regmatch_t pmatch[], int eflags) +{ + struct re_guts *g = preg->re_g; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#endif + + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return(REG_BADPAT); + assert(!(g->iflags®EX_BAD)); + if (g->iflags®EX_BAD) /* backstop for no-debug case */ + return(REG_BADPAT); + eflags = GOODFLAGS(eflags); + + if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) + return(smatcher(g, string, nmatch, pmatch, eflags)); + else + return(lmatcher(g, string, nmatch, pmatch, eflags)); +} diff --git a/llvm/lib/Support/regfree.c b/llvm/lib/Support/regfree.c new file mode 100644 index 0000000000000..dc2b4af90fa74 --- /dev/null +++ b/llvm/lib/Support/regfree.c @@ -0,0 +1,72 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regfree.c 8.3 (Berkeley) 3/20/94 + */ + +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +/* + - llvm_regfree - free everything + */ +void +llvm_regfree(llvm_regex_t *preg) +{ + struct re_guts *g; + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ + return; + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) + free((char *)g->sets); + if (g->setbits != NULL) + free((char *)g->setbits); + if (g->must != NULL) + free(g->must); + free((char *)g); +} diff --git a/llvm/lib/Support/regstrlcpy.c b/llvm/lib/Support/regstrlcpy.c new file mode 100644 index 0000000000000..8b68afdf75f16 --- /dev/null +++ b/llvm/lib/Support/regstrlcpy.c @@ -0,0 +1,52 @@ +/* + * This code is derived from OpenBSD's libc, original license follows: + * + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +#include "regex_impl.h" +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +llvm_strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} diff --git a/llvm/lib/Support/regutils.h b/llvm/lib/Support/regutils.h new file mode 100644 index 0000000000000..49a975cd2703e --- /dev/null +++ b/llvm/lib/Support/regutils.h @@ -0,0 +1,58 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)utils.h 8.3 (Berkeley) 3/20/94 + */ + +#ifndef LLVM_SUPPORT_REGUTILS_H +#define LLVM_SUPPORT_REGUTILS_H + +/* utility definitions */ +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include <assert.h> + +/* for old systems with bcopy() but no memmove() */ +#ifdef USEBCOPY +#define memmove(d, s, c) bcopy(s, d, c) +#endif + +#endif diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp new file mode 100644 index 0000000000000..e9dceed2c4aee --- /dev/null +++ b/llvm/lib/Support/xxhash.cpp @@ -0,0 +1,138 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* based on revision d2df04efcbef7d7f6886d345861e5dfda4edacc1 Removed + * everything but a simple interface for computing XXh64. */ + +#include "llvm/Support/xxhash.h" +#include "llvm/Support/Endian.h" + +#include <stdlib.h> +#include <string.h> + +using namespace llvm; +using namespace support; + +static uint64_t rotl64(uint64_t X, size_t R) { + return (X << R) | (X >> (64 - R)); +} + +static const uint64_t PRIME64_1 = 11400714785074694791ULL; +static const uint64_t PRIME64_2 = 14029467366897019727ULL; +static const uint64_t PRIME64_3 = 1609587929392839161ULL; +static const uint64_t PRIME64_4 = 9650029242287828579ULL; +static const uint64_t PRIME64_5 = 2870177450012600261ULL; + +static uint64_t round(uint64_t Acc, uint64_t Input) { + Acc += Input * PRIME64_2; + Acc = rotl64(Acc, 31); + Acc *= PRIME64_1; + return Acc; +} + +static uint64_t mergeRound(uint64_t Acc, uint64_t Val) { + Val = round(0, Val); + Acc ^= Val; + Acc = Acc * PRIME64_1 + PRIME64_4; + return Acc; +} + +uint64_t llvm::xxHash64(StringRef Data) { + size_t Len = Data.size(); + uint64_t Seed = 0; + const unsigned char *P = Data.bytes_begin(); + const unsigned char *const BEnd = Data.bytes_end(); + uint64_t H64; + + if (Len >= 32) { + const unsigned char *const Limit = BEnd - 32; + uint64_t V1 = Seed + PRIME64_1 + PRIME64_2; + uint64_t V2 = Seed + PRIME64_2; + uint64_t V3 = Seed + 0; + uint64_t V4 = Seed - PRIME64_1; + + do { + V1 = round(V1, endian::read64le(P)); + P += 8; + V2 = round(V2, endian::read64le(P)); + P += 8; + V3 = round(V3, endian::read64le(P)); + P += 8; + V4 = round(V4, endian::read64le(P)); + P += 8; + } while (P <= Limit); + + H64 = rotl64(V1, 1) + rotl64(V2, 7) + rotl64(V3, 12) + rotl64(V4, 18); + H64 = mergeRound(H64, V1); + H64 = mergeRound(H64, V2); + H64 = mergeRound(H64, V3); + H64 = mergeRound(H64, V4); + + } else { + H64 = Seed + PRIME64_5; + } + + H64 += (uint64_t)Len; + + while (P + 8 <= BEnd) { + uint64_t const K1 = round(0, endian::read64le(P)); + H64 ^= K1; + H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4; + P += 8; + } + + if (P + 4 <= BEnd) { + H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1; + H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3; + P += 4; + } + + while (P < BEnd) { + H64 ^= (*P) * PRIME64_5; + H64 = rotl64(H64, 11) * PRIME64_1; + P++; + } + + H64 ^= H64 >> 33; + H64 *= PRIME64_2; + H64 ^= H64 >> 29; + H64 *= PRIME64_3; + H64 ^= H64 >> 32; + + return H64; +} + +uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) { + return xxHash64({(const char *)Data.data(), Data.size()}); +} |