diff options
Diffstat (limited to 'llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp')
-rw-r--r-- | llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 616 |
1 files changed, 470 insertions, 146 deletions
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index d37d812df485e..a3014b2aba92c 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -39,6 +40,11 @@ using namespace llvm; +static cl::opt<bool> LVIInlineAsmHardening( + "x86-experimental-lvi-inline-asm-hardening", + cl::desc("Harden inline assembly code that may be vulnerable to Load Value" + " Injection (LVI). This feature is experimental."), cl::Hidden); + static bool checkScale(unsigned Scale, StringRef &ErrMsg) { if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; @@ -74,7 +80,7 @@ class X86AsmParser : public MCTargetAsmParser { enum VEXEncoding { VEXEncoding_Default, - VEXEncoding_VEX2, + VEXEncoding_VEX, VEXEncoding_VEX3, VEXEncoding_EVEX, }; @@ -326,6 +332,7 @@ private: IES_PLUS, IES_MINUS, IES_OFFSET, + IES_CAST, IES_NOT, IES_MULTIPLY, IES_DIVIDE, @@ -352,6 +359,7 @@ private: bool MemExpr; bool OffsetOperator; SMLoc OffsetOperatorLoc; + StringRef CurType; bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { if (Sym) { @@ -379,6 +387,7 @@ private: unsigned getScale() { return Scale; } const MCExpr *getSym() { return Sym; } StringRef getSymName() { return SymName; } + StringRef getType() { return CurType; } int64_t getImm() { return Imm + IC.execute(); } bool isValidEndState() { return State == IES_RBRAC || State == IES_INTEGER; @@ -611,9 +620,9 @@ private: } bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, const InlineAsmIdentifierInfo &IDInfo, - bool ParsingInlineAsm, StringRef &ErrMsg) { + bool ParsingMSInlineAsm, StringRef &ErrMsg) { // InlineAsm: Treat an enum value as an integer - if (ParsingInlineAsm) + if (ParsingMSInlineAsm) if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) return onInteger(IDInfo.Enum.EnumVal, ErrMsg); // Treat a symbolic constant like an integer @@ -624,6 +633,7 @@ private: default: State = IES_ERROR; break; + case IES_CAST: case IES_PLUS: case IES_MINUS: case IES_NOT: @@ -634,7 +644,7 @@ private: MemExpr = true; State = IES_INTEGER; IC.pushOperand(IC_IMM); - if (ParsingInlineAsm) + if (ParsingMSInlineAsm) Info = IDInfo; break; } @@ -736,6 +746,7 @@ private: IC.pushOperator(IC_PLUS); break; case IES_INIT: + case IES_CAST: assert(!BracCount && "BracCount should be zero on parsing's start"); State = IES_LBRAC; break; @@ -808,6 +819,7 @@ private: case IES_INTEGER: case IES_OFFSET: case IES_REGISTER: + case IES_RBRAC: case IES_RPAREN: State = IES_RPAREN; IC.pushOperator(IC_RPAREN); @@ -815,7 +827,7 @@ private: } } bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, - const InlineAsmIdentifierInfo &IDInfo, bool ParsingInlineAsm, + const InlineAsmIdentifierInfo &IDInfo, bool ParsingMSInlineAsm, StringRef &ErrMsg) { PrevState = State; switch (State) { @@ -833,13 +845,26 @@ private: // As we cannot yet resolve the actual value (offset), we retain // the requested semantics by pushing a '0' to the operands stack IC.pushOperand(IC_IMM); - if (ParsingInlineAsm) { + if (ParsingMSInlineAsm) { Info = IDInfo; } break; } return false; } + void onCast(StringRef Type) { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_LPAREN: + setType(Type); + State = IES_CAST; + break; + } + } + void setType(StringRef Type) { CurType = Type; } }; bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, @@ -858,6 +883,11 @@ private: return nullptr; } + bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc, + SMLoc EndLoc); + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, + bool RestoreOnFailure); + std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); bool IsSIReg(unsigned Reg); @@ -896,10 +926,10 @@ private: bool ParseIntelMemoryOperandSize(unsigned &Size); std::unique_ptr<X86Operand> - CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, - unsigned IndexReg, unsigned Scale, SMLoc Start, - SMLoc End, unsigned Size, StringRef Identifier, - const InlineAsmIdentifierInfo &Info); + CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, + unsigned IndexReg, unsigned Scale, SMLoc Start, + SMLoc End, unsigned Size, StringRef Identifier, + const InlineAsmIdentifierInfo &Info); bool parseDirectiveEven(SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -927,9 +957,14 @@ private: bool validateInstruction(MCInst &Inst, const OperandVector &Ops); bool processInstruction(MCInst &Inst, const OperandVector &Ops); - /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds + // Load Value Injection (LVI) Mitigations for machine code + void emitWarningForSpecialLVIInstruction(SMLoc Loc); + void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out); + void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out); + + /// Wrapper around MCStreamer::emitInstruction(). Possibly adds /// instrumentation around Inst. - void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); + void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -1023,6 +1058,8 @@ public: } bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; @@ -1129,36 +1166,21 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, return checkScale(Scale, ErrMsg); } -bool X86AsmParser::ParseRegister(unsigned &RegNo, - SMLoc &StartLoc, SMLoc &EndLoc) { - MCAsmParser &Parser = getParser(); - RegNo = 0; - const AsmToken &PercentTok = Parser.getTok(); - StartLoc = PercentTok.getLoc(); - +bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName, + SMLoc StartLoc, SMLoc EndLoc) { // If we encounter a %, ignore it. This code handles registers with and // without the prefix, unprefixed registers can occur in cfi directives. - if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) - Parser.Lex(); // Eat percent token. + RegName.consume_front("%"); - const AsmToken &Tok = Parser.getTok(); - EndLoc = Tok.getEndLoc(); - - if (Tok.isNot(AsmToken::Identifier)) { - if (isParsingIntelSyntax()) return true; - return Error(StartLoc, "invalid register name", - SMRange(StartLoc, EndLoc)); - } - - RegNo = MatchRegisterName(Tok.getString()); + RegNo = MatchRegisterName(RegName); // If the match failed, try the register name as lowercase. if (RegNo == 0) - RegNo = MatchRegisterName(Tok.getString().lower()); + RegNo = MatchRegisterName(RegName.lower()); // The "flags" and "mxcsr" registers cannot be referenced directly. // Treat it as an identifier instead. - if (isParsingInlineAsm() && isParsingIntelSyntax() && + if (isParsingMSInlineAsm() && isParsingIntelSyntax() && (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) RegNo = 0; @@ -1172,27 +1194,137 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || X86II::isX86_64NonExtLowByteReg(RegNo) || X86II::isX86_64ExtendedReg(RegNo)) { - StringRef RegName = Tok.getString(); - Parser.Lex(); // Eat register name. return Error(StartLoc, "register %" + RegName + " is only available in 64-bit mode", SMRange(StartLoc, EndLoc)); } } + // If this is "db[0-15]", match it as an alias + // for dr[0-15]. + if (RegNo == 0 && RegName.startswith("db")) { + if (RegName.size() == 3) { + switch (RegName[2]) { + case '0': + RegNo = X86::DR0; + break; + case '1': + RegNo = X86::DR1; + break; + case '2': + RegNo = X86::DR2; + break; + case '3': + RegNo = X86::DR3; + break; + case '4': + RegNo = X86::DR4; + break; + case '5': + RegNo = X86::DR5; + break; + case '6': + RegNo = X86::DR6; + break; + case '7': + RegNo = X86::DR7; + break; + case '8': + RegNo = X86::DR8; + break; + case '9': + RegNo = X86::DR9; + break; + } + } else if (RegName.size() == 4 && RegName[2] == '1') { + switch (RegName[3]) { + case '0': + RegNo = X86::DR10; + break; + case '1': + RegNo = X86::DR11; + break; + case '2': + RegNo = X86::DR12; + break; + case '3': + RegNo = X86::DR13; + break; + case '4': + RegNo = X86::DR14; + break; + case '5': + RegNo = X86::DR15; + break; + } + } + } + + if (RegNo == 0) { + if (isParsingIntelSyntax()) + return true; + return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); + } + return false; +} + +bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc, bool RestoreOnFailure) { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + RegNo = 0; + + SmallVector<AsmToken, 5> Tokens; + auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() { + if (RestoreOnFailure) { + while (!Tokens.empty()) { + Lexer.UnLex(Tokens.pop_back_val()); + } + } + }; + + const AsmToken &PercentTok = Parser.getTok(); + StartLoc = PercentTok.getLoc(); + + // If we encounter a %, ignore it. This code handles registers with and + // without the prefix, unprefixed registers can occur in cfi directives. + if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) { + Tokens.push_back(PercentTok); + Parser.Lex(); // Eat percent token. + } + + const AsmToken &Tok = Parser.getTok(); + EndLoc = Tok.getEndLoc(); + + if (Tok.isNot(AsmToken::Identifier)) { + OnFailure(); + if (isParsingIntelSyntax()) return true; + return Error(StartLoc, "invalid register name", + SMRange(StartLoc, EndLoc)); + } + + if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) { + OnFailure(); + return true; + } + // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. if (RegNo == X86::ST0) { + Tokens.push_back(Tok); Parser.Lex(); // Eat 'st' // Check to see if we have '(4)' after %st. - if (getLexer().isNot(AsmToken::LParen)) + if (Lexer.isNot(AsmToken::LParen)) return false; // Lex the paren. - getParser().Lex(); + Tokens.push_back(Parser.getTok()); + Parser.Lex(); const AsmToken &IntTok = Parser.getTok(); - if (IntTok.isNot(AsmToken::Integer)) + if (IntTok.isNot(AsmToken::Integer)) { + OnFailure(); return Error(IntTok.getLoc(), "expected stack index"); + } switch (IntTok.getIntVal()) { case 0: RegNo = X86::ST0; break; case 1: RegNo = X86::ST1; break; @@ -1202,11 +1334,18 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, case 5: RegNo = X86::ST5; break; case 6: RegNo = X86::ST6; break; case 7: RegNo = X86::ST7; break; - default: return Error(IntTok.getLoc(), "invalid stack index"); + default: + OnFailure(); + return Error(IntTok.getLoc(), "invalid stack index"); } - if (getParser().Lex().isNot(AsmToken::RParen)) + // Lex IntTok + Tokens.push_back(IntTok); + Parser.Lex(); + if (Lexer.isNot(AsmToken::RParen)) { + OnFailure(); return Error(Parser.getTok().getLoc(), "expected ')'"); + } EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat ')' @@ -1215,41 +1354,8 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, EndLoc = Parser.getTok().getEndLoc(); - // If this is "db[0-15]", match it as an alias - // for dr[0-15]. - if (RegNo == 0 && Tok.getString().startswith("db")) { - if (Tok.getString().size() == 3) { - switch (Tok.getString()[2]) { - case '0': RegNo = X86::DR0; break; - case '1': RegNo = X86::DR1; break; - case '2': RegNo = X86::DR2; break; - case '3': RegNo = X86::DR3; break; - case '4': RegNo = X86::DR4; break; - case '5': RegNo = X86::DR5; break; - case '6': RegNo = X86::DR6; break; - case '7': RegNo = X86::DR7; break; - case '8': RegNo = X86::DR8; break; - case '9': RegNo = X86::DR9; break; - } - } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') { - switch (Tok.getString()[3]) { - case '0': RegNo = X86::DR10; break; - case '1': RegNo = X86::DR11; break; - case '2': RegNo = X86::DR12; break; - case '3': RegNo = X86::DR13; break; - case '4': RegNo = X86::DR14; break; - case '5': RegNo = X86::DR15; break; - } - } - - if (RegNo != 0) { - EndLoc = Parser.getTok().getEndLoc(); - Parser.Lex(); // Eat it. - return false; - } - } - if (RegNo == 0) { + OnFailure(); if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); @@ -1259,6 +1365,25 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, return false; } +bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); +} + +OperandMatchResultTy X86AsmParser::tryParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) { + bool Result = + ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); + bool PendingErrors = getParser().hasPendingError(); + getParser().clearPendingErrors(); + if (PendingErrors) + return MatchOperand_ParseFail; + if (Result) + return MatchOperand_NoMatch; + return MatchOperand_Success; +} + std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { bool Parse32 = is32BitMode() || Code16GCC; unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); @@ -1405,7 +1530,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { return ParseATTOperand(); } -std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( +std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm( unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info) { @@ -1445,8 +1570,9 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( } else { BaseReg = BaseReg ? BaseReg : 1; return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, - IndexReg, Scale, Start, End, Size, Identifier, - Decl, FrontendSize); + IndexReg, Scale, Start, End, Size, + /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, + FrontendSize); } } @@ -1483,7 +1609,7 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, return true; StringRef ErrMsg; ParseError = - SM.onOffset(Val, OffsetLoc, ID, Info, isParsingInlineAsm(), ErrMsg); + SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg); if (ParseError) return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); } else { @@ -1525,12 +1651,51 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; - // Register + // (MASM only) <TYPE> PTR operator + if (Parser.isParsingMasm()) { + const AsmToken &NextTok = getLexer().peekTok(); + if (NextTok.is(AsmToken::Identifier) && + NextTok.getIdentifier().equals_lower("ptr")) { + SM.onCast(Identifier); + // Eat type and PTR. + consumeToken(); + End = consumeToken(); + break; + } + } + // Register, or (MASM only) <register>.<field> unsigned Reg; - if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) { - if (SM.onRegister(Reg, ErrMsg)) - return Error(Tok.getLoc(), ErrMsg); - break; + if (Tok.is(AsmToken::Identifier)) { + if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { + if (SM.onRegister(Reg, ErrMsg)) + return Error(IdentLoc, ErrMsg); + break; + } + if (Parser.isParsingMasm()) { + const std::pair<StringRef, StringRef> IDField = + Tok.getString().split('.'); + const StringRef ID = IDField.first, Field = IDField.second; + SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); + if (!Field.empty() && + !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { + if (SM.onRegister(Reg, ErrMsg)) + return Error(IdentLoc, ErrMsg); + + StringRef Type; + unsigned Offset = 0; + SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); + if (Parser.lookUpField(Field, Type, Offset)) + return Error(FieldStartLoc, "unknown offset"); + else if (SM.onPlus(ErrMsg)) + return Error(getTok().getLoc(), ErrMsg); + else if (SM.onInteger(Offset, ErrMsg)) + return Error(IdentLoc, ErrMsg); + SM.setType(Type); + + End = consumeToken(); + break; + } + } } // Operator synonymous ("not", "or" etc.) bool ParseError = false; @@ -1542,37 +1707,40 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { // Symbol reference, when parsing assembly content InlineAsmIdentifierInfo Info; const MCExpr *Val; - if (!isParsingInlineAsm()) { - if (getParser().parsePrimaryExpr(Val, End)) { - return Error(Tok.getLoc(), "Unexpected identifier!"); - } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { - return Error(IdentLoc, ErrMsg); - } else + if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { + // MS Dot Operator expression + if (Identifier.count('.') && + (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { + if (ParseIntelDotOperator(SM, End)) + return true; break; + } } - // MS InlineAsm operators (TYPE/LENGTH/SIZE) - if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { - if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { - if (SM.onInteger(Val, ErrMsg)) - return Error(IdentLoc, ErrMsg); - } else - return true; - break; - } - // MS Dot Operator expression - if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { - if (ParseIntelDotOperator(SM, End)) + if (isParsingMSInlineAsm()) { + // MS InlineAsm operators (TYPE/LENGTH/SIZE) + if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { + if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { + if (SM.onInteger(Val, ErrMsg)) + return Error(IdentLoc, ErrMsg); + } else + return true; + break; + } + // MS InlineAsm identifier + // Call parseIdentifier() to combine @ with the identifier behind it. + if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) + return Error(IdentLoc, "expected identifier"); + if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) return true; + else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) + return Error(IdentLoc, ErrMsg); break; } - // MS InlineAsm identifier - // Call parseIdentifier() to combine @ with the identifier behind it. - if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) - return Error(IdentLoc, "expected identifier"); - if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) - return true; - else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) + if (getParser().parsePrimaryExpr(Val, End)) { + return Error(Tok.getLoc(), "Unexpected identifier!"); + } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { return Error(IdentLoc, ErrMsg); + } break; } case AsmToken::Integer: { @@ -1593,8 +1761,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); InlineAsmIdentifierInfo Info; - if (SM.onIdentifierExpr(Val, Identifier, Info, - isParsingInlineAsm(), ErrMsg)) + if (SM.onIdentifierExpr(Val, Identifier, Info, isParsingMSInlineAsm(), + ErrMsg)) return Error(Loc, ErrMsg); End = consumeToken(); } else { @@ -1688,7 +1856,7 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier( const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) { MCAsmParser &Parser = getParser(); - assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); + assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly."); Val = nullptr; StringRef LineBuf(Identifier.data()); @@ -1777,9 +1945,11 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) { } /// Parse the '.' operator. -bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { +bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, + SMLoc &End) { const AsmToken &Tok = getTok(); - unsigned Offset; + StringRef Type; + unsigned Offset = 0; // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); @@ -1791,10 +1961,15 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); Offset = DotDisp.getZExtValue(); - } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { - std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); - if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, - Offset)) + } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && + Tok.is(AsmToken::Identifier)) { + const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); + const StringRef Base = BaseMember.first, Member = BaseMember.second; + if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) && + getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) && + getParser().lookUpField(DotDispStr, Type, Offset) && + (!SemaCallback || + SemaCallback->LookupInlineAsmField(Base, Member, Offset))) return Error(Tok.getLoc(), "Unable to lookup field reference!"); } else return Error(Tok.getLoc(), "Unexpected token type!"); @@ -1805,6 +1980,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) while (Tok.getLoc().getPointer() < DotExprEndLoc) Lex(); SM.addImm(Offset); + SM.setType(Type); return false; } @@ -1816,7 +1992,7 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, // Eat offset, mark start of identifier. SMLoc Start = Lex().getLoc(); ID = getTok().getString(); - if (!isParsingInlineAsm()) { + if (!isParsingMSInlineAsm()) { if ((getTok().isNot(AsmToken::Identifier) && getTok().isNot(AsmToken::String)) || getParser().parsePrimaryExpr(Val, End)) @@ -1939,7 +2115,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { if (ParseIntelExpression(SM, End)) return nullptr; - if (isParsingInlineAsm()) + if (isParsingMSInlineAsm()) RewriteIntelExpression(SM, Start, Tok.getLoc()); int64_t Imm = SM.getImm(); @@ -1953,7 +2129,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { // RegNo != 0 specifies a valid segment register, // and we are parsing a segment override if (!SM.isMemExpr() && !RegNo) { - if (isParsingInlineAsm() && SM.isOffsetOperator()) { + if (isParsingMSInlineAsm() && SM.isOffsetOperator()) { const InlineAsmIdentifierInfo Info = SM.getIdentifierInfo(); if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { // Disp includes the address of a variable; make sure this is recorded @@ -2005,10 +2181,18 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), ErrMsg)) return ErrorOperand(Start, ErrMsg); - if (isParsingInlineAsm()) - return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg, - Scale, Start, End, Size, SM.getSymName(), - SM.getIdentifierInfo()); + if (isParsingMSInlineAsm()) + return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, SM.getSymName(), + SM.getIdentifierInfo()); + + // When parsing x64 MS-style assembly, all memory operands default to + // RIP-relative when interpreted as non-absolute references. + if (Parser.isParsingMasm() && is64BitMode()) + return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, BaseReg, + IndexReg, Scale, Start, End, Size, + /*DefaultBaseReg=*/X86::RIP); + if (!(BaseReg || IndexReg || RegNo)) return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, @@ -2420,8 +2604,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, return Error(Parser.getTok().getLoc(), "Expected '}'"); Parser.Lex(); // Eat curly. - if (Prefix == "vex2") - ForcedVEXEncoding = VEXEncoding_VEX2; + if (Prefix == "vex" || Prefix == "vex2") + ForcedVEXEncoding = VEXEncoding_VEX; else if (Prefix == "vex3") ForcedVEXEncoding = VEXEncoding_VEX3; else if (Prefix == "evex") @@ -2711,7 +2895,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // In MS inline asm curly braces mark the beginning/end of a block, // therefore they should be interepreted as end of statement CurlyAsEndOfStatement = - isParsingIntelSyntax() && isParsingInlineAsm() && + isParsingIntelSyntax() && isParsingMSInlineAsm() && (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) return TokError("unexpected token in argument list"); @@ -3096,9 +3280,122 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { static const char *getSubtargetFeatureName(uint64_t Val); -void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, +void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) { + Warning(Loc, "Instruction may be vulnerable to LVI and " + "requires manual mitigation"); + Note(SMLoc(), "See https://software.intel.com/" + "security-software-guidance/insights/" + "deep-dive-load-value-injection#specialinstructions" + " for more information"); +} + +/// RET instructions and also instructions that indirect calls/jumps from memory +/// combine a load and a branch within a single instruction. To mitigate these +/// instructions against LVI, they must be decomposed into separate load and +/// branch instructions, with an LFENCE in between. For more details, see: +/// - X86LoadValueInjectionRetHardening.cpp +/// - X86LoadValueInjectionIndirectThunks.cpp +/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection +/// +/// Returns `true` if a mitigation was applied or warning was emitted. +void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { + // Information on control-flow instructions that require manual mitigation can + // be found here: + // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions + switch (Inst.getOpcode()) { + case X86::RETW: + case X86::RETL: + case X86::RETQ: + case X86::RETIL: + case X86::RETIQ: + case X86::RETIW: { + MCInst ShlInst, FenceInst; + bool Parse32 = is32BitMode() || Code16GCC; + unsigned Basereg = + is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); + auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, + /*BaseReg=*/Basereg, /*IndexReg=*/0, + /*Scale=*/1, SMLoc{}, SMLoc{}, 0); + ShlInst.setOpcode(X86::SHL64mi); + ShlMemOp->addMemOperands(ShlInst, 5); + ShlInst.addOperand(MCOperand::createImm(0)); + FenceInst.setOpcode(X86::LFENCE); + Out.emitInstruction(ShlInst, getSTI()); + Out.emitInstruction(FenceInst, getSTI()); + return; + } + case X86::JMP16m: + case X86::JMP32m: + case X86::JMP64m: + case X86::CALL16m: + case X86::CALL32m: + case X86::CALL64m: + emitWarningForSpecialLVIInstruction(Inst.getLoc()); + return; + } +} + +/// To mitigate LVI, every instruction that performs a load can be followed by +/// an LFENCE instruction to squash any potential mis-speculation. There are +/// some instructions that require additional considerations, and may requre +/// manual mitigation. For more details, see: +/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection +/// +/// Returns `true` if a mitigation was applied or warning was emitted. +void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst, + MCStreamer &Out) { + auto Opcode = Inst.getOpcode(); + auto Flags = Inst.getFlags(); + if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) { + // Information on REP string instructions that require manual mitigation can + // be found here: + // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions + switch (Opcode) { + case X86::CMPSB: + case X86::CMPSW: + case X86::CMPSL: + case X86::CMPSQ: + case X86::SCASB: + case X86::SCASW: + case X86::SCASL: + case X86::SCASQ: + emitWarningForSpecialLVIInstruction(Inst.getLoc()); + return; + } + } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) { + // If a REP instruction is found on its own line, it may or may not be + // followed by a vulnerable instruction. Emit a warning just in case. + emitWarningForSpecialLVIInstruction(Inst.getLoc()); + return; + } + + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + + // Can't mitigate after terminators or calls. A control flow change may have + // already occurred. + if (MCID.isTerminator() || MCID.isCall()) + return; + + // LFENCE has the mayLoad property, don't double fence. + if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { + MCInst FenceInst; + FenceInst.setOpcode(X86::LFENCE); + Out.emitInstruction(FenceInst, getSTI()); + } +} + +void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out) { - Out.EmitInstruction(Inst, getSTI()); + if (LVIInlineAsmHardening && + getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity]) + applyLVICFIMitigation(Inst, Out); + + Out.emitInstruction(Inst, getSTI()); + + if (LVIInlineAsmHardening && + getSTI().getFeatureBits()[X86::FeatureLVILoadHardening]) + applyLVILoadHardeningMitigation(Inst, Out); } bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -3133,7 +3430,7 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - EmitInstruction(Inst, Operands, Out); + emitInstruction(Inst, Operands, Out); Operands[0] = X86Operand::CreateToken(Repl, IDLoc); } } @@ -3170,7 +3467,7 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) return Match_Unsupported; - if ((ForcedVEXEncoding == VEXEncoding_VEX2 || + if ((ForcedVEXEncoding == VEXEncoding_VEX || ForcedVEXEncoding == VEXEncoding_VEX3) && (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) return Match_Unsupported; @@ -3240,7 +3537,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - EmitInstruction(Inst, Operands, Out); + emitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; case Match_InvalidImmUnsignedi4: { @@ -3282,20 +3579,47 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, // Otherwise, we assume that this may be an integer instruction, which comes // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; + // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 } + const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0"; // Check for the various suffix matches. uint64_t ErrorInfoIgnore; FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. unsigned Match[4]; + // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one. + // So we should make sure the suffix matcher only works for memory variant + // that has the same size with the suffix. + // FIXME: This flag is a workaround for legacy instructions that didn't + // declare non suffix variant assembly. + bool HasVectorReg = false; + X86Operand *MemOp = nullptr; + for (const auto &Op : Operands) { + X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); + if (X86Op->isVectorReg()) + HasVectorReg = true; + else if (X86Op->isMem()) { + MemOp = X86Op; + assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax"); + // Have we found an unqualified memory operand, + // break. IA allows only one memory operand. + break; + } + } + for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { Tmp.back() = Suffixes[I]; - Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore, - MissingFeatures, MatchingInlineAsm, - isParsingIntelSyntax()); - // If this returned as a missing feature failure, remember that. - if (Match[I] == Match_MissingFeature) - ErrorInfoMissingFeatures = MissingFeatures; + if (MemOp && HasVectorReg) + MemOp->Mem.Size = MemSize[I]; + Match[I] = Match_MnemonicFail; + if (MemOp || !HasVectorReg) { + Match[I] = + MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures, + MatchingInlineAsm, isParsingIntelSyntax()); + // If this returned as a missing feature failure, remember that. + if (Match[I] == Match_MissingFeature) + ErrorInfoMissingFeatures = MissingFeatures; + } } // Restore the old token. @@ -3309,7 +3633,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - EmitInstruction(Inst, Operands, Out); + emitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; } @@ -3562,7 +3886,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, ; Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - EmitInstruction(Inst, Operands, Out); + emitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; } else if (NumSuccessfulMatches > 1) { @@ -3684,9 +4008,9 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) { Section = getStreamer().getCurrentSectionOnly(); } if (Section->UseCodeAlign()) - getStreamer().EmitCodeAlignment(2, 0); + getStreamer().emitCodeAlignment(2, 0); else - getStreamer().EmitValueToAlignment(2, 0, 1, 0); + getStreamer().emitValueToAlignment(2, 0, 1, 0); return false; } @@ -3699,7 +4023,7 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { Parser.Lex(); if (!is16BitMode()) { SwitchMode(X86::Mode16Bit); - getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); } } else if (IDVal == ".code16gcc") { // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. @@ -3707,19 +4031,19 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { Code16GCC = true; if (!is16BitMode()) { SwitchMode(X86::Mode16Bit); - getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); } } else if (IDVal == ".code32") { Parser.Lex(); if (!is32BitMode()) { SwitchMode(X86::Mode32Bit); - getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); + getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); } } else if (IDVal == ".code64") { Parser.Lex(); if (!is64BitMode()) { SwitchMode(X86::Mode64Bit); - getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); + getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); } } else { Error(L, "unknown directive " + IDVal); |