summaryrefslogtreecommitdiff
path: root/clang/utils/TableGen/NeonEmitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/utils/TableGen/NeonEmitter.cpp')
-rw-r--r--clang/utils/TableGen/NeonEmitter.cpp411
1 files changed, 282 insertions, 129 deletions
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index a0f3fb2ddc08..d5bf59ef04ad 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -27,8 +27,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
@@ -98,7 +99,8 @@ enum EltType {
Poly128,
Float16,
Float32,
- Float64
+ Float64,
+ BFloat16
};
} // end namespace NeonTypeFlags
@@ -146,6 +148,7 @@ private:
SInt,
UInt,
Poly,
+ BFloat16,
};
TypeKind Kind;
bool Immediate, Constant, Pointer;
@@ -198,6 +201,7 @@ public:
bool isInt() const { return isInteger() && ElementBitwidth == 32; }
bool isLong() const { return isInteger() && ElementBitwidth == 64; }
bool isVoid() const { return Kind == Void; }
+ bool isBFloat16() const { return Kind == BFloat16; }
unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
unsigned getSizeInBits() const { return Bitwidth; }
unsigned getElementSizeInBits() const { return ElementBitwidth; }
@@ -238,6 +242,11 @@ public:
NumVectors = 1;
}
+ void make32BitElement() {
+ assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
+ ElementBitwidth = 32;
+ }
+
void doubleLanes() {
assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
Bitwidth = 128;
@@ -297,14 +306,12 @@ public:
/// The main grunt class. This represents an instantiation of an intrinsic with
/// a particular typespec and prototype.
class Intrinsic {
- friend class DagEmitter;
-
/// The Record this intrinsic was created from.
Record *R;
/// The unmangled name.
std::string Name;
/// The input and output typespecs. InTS == OutTS except when
- /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
+ /// CartesianProductWith is non-empty - this is the case for vreinterpret.
TypeSpec OutTS, InTS;
/// The base class kind. Most intrinsics use ClassS, which has full type
/// info for integers (s32/u32). Some use ClassI, which doesn't care about
@@ -337,7 +344,7 @@ class Intrinsic {
/// The set of intrinsics that this intrinsic uses/requires.
std::set<Intrinsic *> Dependencies;
/// The "base type", which is Type('d', OutTS). InBaseType is only
- /// different if CartesianProductOfTypes = 1 (for vreinterpret).
+ /// different if CartesianProductWith is non-empty (for vreinterpret).
Type BaseType, InBaseType;
/// The return variable.
Variable RetVar;
@@ -518,7 +525,8 @@ private:
std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);
std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
- std::pair<Type, std::string> emitDagCall(DagInit *DI);
+ std::pair<Type, std::string> emitDagCall(DagInit *DI,
+ bool MatchMangledName);
std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
std::pair<Type, std::string> emitDagOp(DagInit *DI);
@@ -546,7 +554,8 @@ class NeonEmitter {
public:
/// Called by Intrinsic - this attempts to get an intrinsic that takes
/// the given types as arguments.
- Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types);
+ Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
+ Optional<std::string> MangledName);
/// Called by Intrinsic - returns a globally-unique number.
unsigned getUniqueNumber() { return UniqueNumber++; }
@@ -577,8 +586,11 @@ public:
// runFP16 - Emit arm_fp16.h.inc
void runFP16(raw_ostream &o);
- // runHeader - Emit all the __builtin prototypes used in arm_neon.h
- // and arm_fp16.h
+ // runBF16 - Emit arm_bf16.h.inc
+ void runBF16(raw_ostream &o);
+
+ // runHeader - Emit all the __builtin prototypes used in arm_neon.h,
+ // arm_fp16.h and arm_bf16.h
void runHeader(raw_ostream &o);
// runTests - Emit tests for all the Neon intrinsics.
@@ -603,6 +615,8 @@ std::string Type::str() const {
S += "poly";
else if (isFloating())
S += "float";
+ else if (isBFloat16())
+ S += "bfloat";
else
S += "int";
@@ -642,7 +656,10 @@ std::string Type::builtin_str() const {
case 128: S += "LLLi"; break;
default: llvm_unreachable("Unhandled case!");
}
- else
+ else if (isBFloat16()) {
+ assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
+ S += "y";
+ } else
switch (ElementBitwidth) {
case 16: S += "h"; break;
case 32: S += "f"; break;
@@ -696,6 +713,11 @@ unsigned Type::getNeonEnum() const {
Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
}
+ if (isBFloat16()) {
+ assert(Addend == 1 && "BFloat16 is only 16 bit");
+ Base = (unsigned)NeonTypeFlags::BFloat16;
+ }
+
if (Bitwidth == 128)
Base |= (unsigned)NeonTypeFlags::QuadFlag;
if (isInteger() && !isSigned())
@@ -719,6 +741,9 @@ Type Type::fromTypedefName(StringRef Name) {
} else if (Name.startswith("poly")) {
T.Kind = Poly;
Name = Name.drop_front(4);
+ } else if (Name.startswith("bfloat")) {
+ T.Kind = BFloat16;
+ Name = Name.drop_front(6);
} else {
assert(Name.startswith("int"));
Name = Name.drop_front(3);
@@ -817,6 +842,10 @@ void Type::applyTypespec(bool &Quad) {
if (isPoly())
NumVectors = 0;
break;
+ case 'b':
+ Kind = BFloat16;
+ ElementBitwidth = 16;
+ break;
default:
llvm_unreachable("Unhandled type code!");
}
@@ -843,6 +872,10 @@ void Type::applyModifiers(StringRef Mods) {
case 'U':
Kind = UInt;
break;
+ case 'B':
+ Kind = BFloat16;
+ ElementBitwidth = 16;
+ break;
case 'F':
Kind = Float;
break;
@@ -924,6 +957,9 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
if (CK == ClassB)
return "";
+ if (T.isBFloat16())
+ return "bf16";
+
if (T.isPoly())
typeCode = 'p';
else if (T.isInteger())
@@ -961,7 +997,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
Type RetT = getReturnType();
if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
- !RetT.isFloating())
+ !RetT.isFloating() && !RetT.isBFloat16())
RetT.makeInteger(RetT.getElementSizeInBits(), false);
// Since the return value must be one type, return a vector type of the
@@ -1026,7 +1062,8 @@ std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
std::string S = Name;
if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
- Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32")
+ Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
+ Name == "vcvt_f32_bf16")
return Name;
if (!typeCode.empty()) {
@@ -1257,7 +1294,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {
if (!getReturnType().isVoid() && !SRet)
S += "(" + RetVar.getType().str() + ") ";
- S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
+ S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
if (SRet)
S += "&" + RetVar.getName() + ", ";
@@ -1383,8 +1420,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
return emitDagSaveTemp(DI);
if (Op == "op")
return emitDagOp(DI);
- if (Op == "call")
- return emitDagCall(DI);
+ if (Op == "call" || Op == "call_mangled")
+ return emitDagCall(DI, Op == "call_mangled");
if (Op == "name_replace")
return emitDagNameReplace(DI);
if (Op == "literal")
@@ -1398,25 +1435,26 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
if (DI->getNumArgs() == 2) {
// Unary op.
std::pair<Type, std::string> R =
- emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
+ emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
return std::make_pair(R.first, Op + R.second);
} else {
assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
std::pair<Type, std::string> R1 =
- emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
+ emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
std::pair<Type, std::string> R2 =
- emitDagArg(DI->getArg(2), DI->getArgNameStr(2));
+ emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));
assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
}
}
-std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
+std::pair<Type, std::string>
+Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
std::vector<Type> Types;
std::vector<std::string> Values;
for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
std::pair<Type, std::string> R =
- emitDagArg(DI->getArg(I + 1), DI->getArgNameStr(I + 1));
+ emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));
Types.push_back(R.first);
Values.push_back(R.second);
}
@@ -1427,7 +1465,13 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
N = SI->getAsUnquotedString();
else
N = emitDagArg(DI->getArg(0), "").second;
- Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types);
+ Optional<std::string> MangledName;
+ if (MatchMangledName) {
+ if (Intr.getRecord()->getValueAsBit("isLaneQ"))
+ N += "q";
+ MangledName = Intr.mangleName(N, ClassS);
+ }
+ Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);
// Make sure the callee is known as an early def.
Callee.setNeededEarly();
@@ -1451,9 +1495,9 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
bool IsBitCast){
// (cast MOD* VAL) -> cast VAL to type given by MOD.
- std::pair<Type, std::string> R = emitDagArg(
- DI->getArg(DI->getNumArgs() - 1),
- DI->getArgNameStr(DI->getNumArgs() - 1));
+ std::pair<Type, std::string> R =
+ emitDagArg(DI->getArg(DI->getNumArgs() - 1),
+ std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));
Type castToType = R.first;
for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
@@ -1465,10 +1509,11 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
// 5. The value "H" or "D" to half or double the bitwidth.
// 6. The value "8" to convert to 8-bit (signed) integer lanes.
if (!DI->getArgNameStr(ArgIdx).empty()) {
- assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
- Intr.Variables.end(),
+ assert_with_loc(Intr.Variables.find(std::string(
+ DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(),
"Variable not found");
- castToType = Intr.Variables[DI->getArgNameStr(ArgIdx)].getType();
+ castToType =
+ Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();
} else {
StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
assert_with_loc(SI, "Expected string type or $Name for cast type");
@@ -1485,6 +1530,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
castToType.doubleLanes();
} else if (SI->getAsUnquotedString() == "8") {
castToType.makeInteger(8, true);
+ } else if (SI->getAsUnquotedString() == "32") {
+ castToType.make32BitElement();
} else {
castToType = Type::fromTypedefName(SI->getAsUnquotedString());
assert_with_loc(!castToType.isVoid(), "Unknown typedef");
@@ -1583,9 +1630,9 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
// (shuffle arg1, arg2, sequence)
std::pair<Type, std::string> Arg1 =
- emitDagArg(DI->getArg(0), DI->getArgNameStr(0));
+ emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
std::pair<Type, std::string> Arg2 =
- emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
+ emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(Arg1.first == Arg2.first,
"Different types in arguments to shuffle!");
@@ -1627,8 +1674,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
- std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
- DI->getArgNameStr(0));
+ std::pair<Type, std::string> A =
+ emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
Type T = Intr.getBaseType();
@@ -1646,10 +1693,10 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
- std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
- DI->getArgNameStr(0));
- std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
- DI->getArgNameStr(1));
+ std::pair<Type, std::string> A =
+ emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
+ std::pair<Type, std::string> B =
+ emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(B.first.isScalar(),
"dup_typed() requires a scalar as the second argument");
@@ -1668,10 +1715,10 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI)
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
- std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
- DI->getArgNameStr(0));
- std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
- DI->getArgNameStr(1));
+ std::pair<Type, std::string> A =
+ emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
+ std::pair<Type, std::string> B =
+ emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(B.first.isScalar(),
"splat() requires a scalar int as the second argument");
@@ -1687,13 +1734,13 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
- std::pair<Type, std::string> A = emitDagArg(DI->getArg(1),
- DI->getArgNameStr(1));
+ std::pair<Type, std::string> A =
+ emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
assert_with_loc(!A.first.isVoid(),
"Argument to save_temp() must have non-void type!");
- std::string N = DI->getArgNameStr(0);
+ std::string N = std::string(DI->getArgNameStr(0));
assert_with_loc(!N.empty(),
"save_temp() expects a name as the first argument");
@@ -1831,7 +1878,8 @@ void Intrinsic::indexBody() {
// NeonEmitter implementation
//===----------------------------------------------------------------------===//
-Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
+Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
+ Optional<std::string> MangledName) {
// First, look up the name in the intrinsic map.
assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
("Intrinsic '" + Name + "' not found!").str());
@@ -1860,17 +1908,19 @@ Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
}
ErrMsg += ")\n";
+ if (MangledName && MangledName != I.getMangledName(true))
+ continue;
+
if (I.getNumParams() != Types.size())
continue;
- bool Good = true;
- for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
- if (I.getParamType(Arg) != Types[Arg]) {
- Good = false;
- break;
- }
- }
- if (Good)
+ unsigned ArgNum = 0;
+ bool MatchingArgumentTypes =
+ std::all_of(Types.begin(), Types.end(), [&](const auto &Type) {
+ return Type == I.getParamType(ArgNum++);
+ });
+
+ if (MatchingArgumentTypes)
GoodVec.push_back(&I);
}
@@ -1883,14 +1933,14 @@ Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
void NeonEmitter::createIntrinsic(Record *R,
SmallVectorImpl<Intrinsic *> &Out) {
- std::string Name = R->getValueAsString("Name");
- std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
+ std::string Name = std::string(R->getValueAsString("Name"));
+ std::string Proto = std::string(R->getValueAsString("Prototype"));
+ std::string Types = std::string(R->getValueAsString("Types"));
Record *OperationRec = R->getValueAsDef("Operation");
- bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
- std::string Guard = R->getValueAsString("ArchGuard");
+ std::string Guard = std::string(R->getValueAsString("ArchGuard"));
bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
+ std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
// Set the global current record. This allows assert_with_loc to produce
// decent location information even when highly nested.
@@ -1905,17 +1955,20 @@ void NeonEmitter::createIntrinsic(Record *R,
CK = ClassMap[R->getSuperClasses()[1].first];
std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
- for (auto TS : TypeSpecs) {
- if (CartesianProductOfTypes) {
+ if (!CartesianProductWith.empty()) {
+ std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);
+ for (auto TS : TypeSpecs) {
Type DefaultT(TS, ".");
- for (auto SrcTS : TypeSpecs) {
+ for (auto SrcTS : ProductTypeSpecs) {
Type DefaultSrcT(SrcTS, ".");
if (TS == SrcTS ||
DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
continue;
NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
}
- } else {
+ }
+ } else {
+ for (auto TS : TypeSpecs) {
NewTypeSpecs.push_back(std::make_pair(TS, TS));
}
}
@@ -2143,6 +2196,74 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
genIntrinsicRangeCheckCode(OS, Defs);
}
+static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
+ std::string TypedefTypes(types);
+ std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
+
+ // Emit vector typedefs.
+ bool InIfdef = false;
+ for (auto &TS : TDTypeVec) {
+ bool IsA64 = false;
+ Type T(TS, ".");
+ if (T.isDouble())
+ IsA64 = true;
+
+ if (InIfdef && !IsA64) {
+ OS << "#endif\n";
+ InIfdef = false;
+ }
+ if (!InIfdef && IsA64) {
+ OS << "#ifdef __aarch64__\n";
+ InIfdef = true;
+ }
+
+ if (T.isPoly())
+ OS << "typedef __attribute__((neon_polyvector_type(";
+ else
+ OS << "typedef __attribute__((neon_vector_type(";
+
+ Type T2 = T;
+ T2.makeScalar();
+ OS << T.getNumElements() << "))) ";
+ OS << T2.str();
+ OS << " " << T.str() << ";\n";
+ }
+ if (InIfdef)
+ OS << "#endif\n";
+ OS << "\n";
+
+ // Emit struct typedefs.
+ InIfdef = false;
+ for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
+ for (auto &TS : TDTypeVec) {
+ bool IsA64 = false;
+ Type T(TS, ".");
+ if (T.isDouble())
+ IsA64 = true;
+
+ if (InIfdef && !IsA64) {
+ OS << "#endif\n";
+ InIfdef = false;
+ }
+ if (!InIfdef && IsA64) {
+ OS << "#ifdef __aarch64__\n";
+ InIfdef = true;
+ }
+
+ const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
+ Type VT(TS, Mods);
+ OS << "typedef struct " << VT.str() << " {\n";
+ OS << " " << T.str() << " val";
+ OS << "[" << NumMembers << "]";
+ OS << ";\n} ";
+ OS << VT.str() << ";\n";
+ OS << "\n";
+ }
+ }
+ if (InIfdef)
+ OS << "#endif\n";
+}
+
/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
@@ -2191,12 +2312,22 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#ifndef __ARM_NEON_H\n";
OS << "#define __ARM_NEON_H\n\n";
+ OS << "#ifndef __ARM_FP\n";
+ OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
+ "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
+ OS << "#else\n\n";
+
OS << "#if !defined(__ARM_NEON)\n";
OS << "#error \"NEON support not enabled\"\n";
- OS << "#endif\n\n";
+ OS << "#else\n\n";
OS << "#include <stdint.h>\n\n";
+ OS << "#ifdef __ARM_FEATURE_BF16\n";
+ OS << "#include <arm_bf16.h>\n";
+ OS << "typedef __bf16 bfloat16_t;\n";
+ OS << "#endif\n\n";
+
// Emit NEON-specific scalar typedefs.
OS << "typedef float float32_t;\n";
OS << "typedef __fp16 float16_t;\n";
@@ -2214,76 +2345,14 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
+ OS << "typedef int64_t poly64_t;\n";
OS << "#endif\n";
- // Emit Neon vector typedefs.
- std::string TypedefTypes(
- "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
- std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
-
- // Emit vector typedefs.
- bool InIfdef = false;
- for (auto &TS : TDTypeVec) {
- bool IsA64 = false;
- Type T(TS, ".");
- if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64))
- IsA64 = true;
-
- if (InIfdef && !IsA64) {
- OS << "#endif\n";
- InIfdef = false;
- }
- if (!InIfdef && IsA64) {
- OS << "#ifdef __aarch64__\n";
- InIfdef = true;
- }
+ emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
- if (T.isPoly())
- OS << "typedef __attribute__((neon_polyvector_type(";
- else
- OS << "typedef __attribute__((neon_vector_type(";
-
- Type T2 = T;
- T2.makeScalar();
- OS << T.getNumElements() << "))) ";
- OS << T2.str();
- OS << " " << T.str() << ";\n";
- }
- if (InIfdef)
- OS << "#endif\n";
- OS << "\n";
-
- // Emit struct typedefs.
- InIfdef = false;
- for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
- for (auto &TS : TDTypeVec) {
- bool IsA64 = false;
- Type T(TS, ".");
- if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64))
- IsA64 = true;
-
- if (InIfdef && !IsA64) {
- OS << "#endif\n";
- InIfdef = false;
- }
- if (!InIfdef && IsA64) {
- OS << "#ifdef __aarch64__\n";
- InIfdef = true;
- }
-
- const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
- Type VT(TS, Mods);
- OS << "typedef struct " << VT.str() << " {\n";
- OS << " " << T.str() << " val";
- OS << "[" << NumMembers << "]";
- OS << ";\n} ";
- OS << VT.str() << ";\n";
- OS << "\n";
- }
- }
- if (InIfdef)
- OS << "#endif\n";
- OS << "\n";
+ OS << "#ifdef __ARM_FEATURE_BF16\n";
+ emitNeonTypeDefs("bQb", OS);
+ OS << "#endif\n\n";
OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
"__nodebug__))\n\n";
@@ -2340,6 +2409,8 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "\n";
OS << "#undef __ai\n\n";
+ OS << "#endif /* if !defined(__ARM_NEON) */\n";
+ OS << "#endif /* ifndef __ARM_FP */\n";
OS << "#endif /* __ARM_NEON_H */\n";
}
@@ -2450,6 +2521,84 @@ void NeonEmitter::runFP16(raw_ostream &OS) {
OS << "#endif /* __ARM_FP16_H */\n";
}
+void NeonEmitter::runBF16(raw_ostream &OS) {
+ OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
+ "-----------------------------------===\n"
+ " *\n"
+ " *\n"
+ " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+ "Exceptions.\n"
+ " * See https://llvm.org/LICENSE.txt for license information.\n"
+ " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ " *\n"
+ " *===-----------------------------------------------------------------"
+ "------===\n"
+ " */\n\n";
+
+ OS << "#ifndef __ARM_BF16_H\n";
+ OS << "#define __ARM_BF16_H\n\n";
+
+ OS << "typedef __bf16 bfloat16_t;\n";
+
+ OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
+ "__nodebug__))\n\n";
+
+ SmallVector<Intrinsic *, 128> Defs;
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ for (auto *I : Defs)
+ I->indexBody();
+
+ llvm::stable_sort(Defs, llvm::deref<std::less<>>());
+
+ // Only emit a def when its requirements have been met.
+ // FIXME: This loop could be made faster, but it's fast enough for now.
+ bool MadeProgress = true;
+ std::string InGuard;
+ while (!Defs.empty() && MadeProgress) {
+ MadeProgress = false;
+
+ for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
+ I != Defs.end(); /*No step*/) {
+ bool DependenciesSatisfied = true;
+ for (auto *II : (*I)->getDependencies()) {
+ if (llvm::is_contained(Defs, II))
+ DependenciesSatisfied = false;
+ }
+ if (!DependenciesSatisfied) {
+ // Try the next one.
+ ++I;
+ continue;
+ }
+
+ // Emit #endif/#if pair if needed.
+ if ((*I)->getGuard() != InGuard) {
+ if (!InGuard.empty())
+ OS << "#endif\n";
+ InGuard = (*I)->getGuard();
+ if (!InGuard.empty())
+ OS << "#if " << InGuard << "\n";
+ }
+
+ // Actually generate the intrinsic code.
+ OS << (*I)->generate();
+
+ MadeProgress = true;
+ I = Defs.erase(I);
+ }
+ }
+ assert(Defs.empty() && "Some requirements were not satisfied!");
+ if (!InGuard.empty())
+ OS << "#endif\n";
+
+ OS << "\n";
+ OS << "#undef __ai\n\n";
+
+ OS << "#endif\n";
+}
+
void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).run(OS);
}
@@ -2458,6 +2607,10 @@ void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runFP16(OS);
}
+void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) {
+ NeonEmitter(Records).runBF16(OS);
+}
+
void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runHeader(OS);
}