1 files changed, 291 insertions, 27 deletions
diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 3299a53ff5ba..9a455c105482 100644
--- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -10,11 +10,13 @@
 #include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVMCExpr.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "MCTargetDesc/RISCVTargetStreamer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
@@ -22,10 +24,17 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 
+#include <limits>
+
 using namespace llvm;
 
+// Include the auto-generated portion of the compress emitter.
+#define GEN_COMPRESS_INSTR
+#include "RISCVGenCompressInstEmitter.inc"
+
 namespace {
 struct RISCVOperand;
 
@@ -33,11 +42,16 @@ class RISCVAsmParser : public MCTargetAsmParser {
   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
   bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
 
+  RISCVTargetStreamer &getTargetStreamer() {
+    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+    return static_cast<RISCVTargetStreamer &>(TS);
+  }
+
   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
                                       unsigned Kind) override;
 
   bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
-                                  int Lower, int Upper, Twine Msg);
+                                  int64_t Lower, int64_t Upper, Twine Msg);
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                OperandVector &Operands, MCStreamer &Out,
@@ -51,6 +65,20 @@ class RISCVAsmParser : public MCTargetAsmParser {
 
   bool ParseDirective(AsmToken DirectiveID) override;
 
+  // Helper to actually emit an instruction to the MCStreamer. Also, when
+  // possible, compression of the instruction is performed.
+  void emitToStreamer(MCStreamer &S, const MCInst &Inst);
+
+  // Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that
+  // synthesize the desired immedate value into the destination register.
+  void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
+
+  /// Helper for processing MC instructions that have been successfully matched
+  /// by MatchAndEmitInstruction. Modifications to the emitted instructions,
+  /// like the expansion of pseudo instructions (e.g., "li"), can be performed
+  /// in this method.
+  bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
 // Auto-generated instruction matching functions
 #define GET_ASSEMBLER_HEADER
 #include "RISCVGenAsmMatcher.inc"
@@ -61,8 +89,25 @@ class RISCVAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
   OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
 
-  bool parseOperand(OperandVector &Operands);
+  bool parseOperand(OperandVector &Operands, bool ForceImmediate);
+
+  bool parseDirectiveOption();
+
+  void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    if (!(getSTI().getFeatureBits()[Feature])) {
+      MCSubtargetInfo &STI = copySTI();
+      setAvailableFeatures(
+          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
+    }
+  }
 
+  void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    if (getSTI().getFeatureBits()[Feature]) {
+      MCSubtargetInfo &STI = copySTI();
+      setAvailableFeatures(
+          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
+    }
+  }
 public:
   enum RISCVMatchResultTy {
     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -78,6 +123,10 @@ public:
   RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                  const MCInstrInfo &MII, const MCTargetOptions &Options)
       : MCTargetAsmParser(Options, STI, MII) {
+    Parser.addAliasForDirective(".half", ".2byte");
+    Parser.addAliasForDirective(".hword", ".2byte");
+    Parser.addAliasForDirective(".word", ".4byte");
+    Parser.addAliasForDirective(".dword", ".8byte");
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   }
 };
@@ -167,6 +216,16 @@ public:
 
   // Predicate methods for AsmOperands defined in RISCVInstrInfo.td
 
+  bool isBareSymbol() const {
+    int64_t Imm;
+    RISCVMCExpr::VariantKind VK;
+    // Must be of 'immediate' type but not a constant.
+    if (!isImm() || evaluateConstantImm(Imm, VK))
+      return false;
+    return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+           VK == RISCVMCExpr::VK_RISCV_None;
+  }
+
   /// Return true if the operand is a valid for the fence instruction e.g.
   /// ('iorw').
   bool isFenceArg() const {
@@ -206,6 +265,18 @@ public:
     return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid;
   }
 
+  bool isImmXLen() const {
+    int64_t Imm;
+    RISCVMCExpr::VariantKind VK;
+    if (!isImm())
+      return false;
+    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    // Given only Imm, ensuring that the actually specified constant is either
+    // a signed or unsigned 64-bit number is unfortunately impossible.
+    bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm);
+    return IsConstantImm && IsInRange && VK == RISCVMCExpr::VK_RISCV_None;
+  }
+
   bool isUImmLog2XLen() const {
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
@@ -260,12 +331,26 @@ public:
            (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
   }
 
-  bool isUImm6NonZero() const {
+  bool isSImm6NonZero() const {
+    RISCVMCExpr::VariantKind VK;
+    int64_t Imm;
+    bool IsValid;
+    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    if (!IsConstantImm)
+      IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+    else
+      IsValid = ((Imm != 0) && isInt<6>(Imm));
+    return IsValid &&
+           (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+  }
+
+  bool isCLUIImm() const {
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
     bool IsConstantImm = evaluateConstantImm(Imm, VK);
-    return IsConstantImm && isUInt<6>(Imm) && (Imm != 0) &&
-           VK == RISCVMCExpr::VK_RISCV_None;
+    return IsConstantImm && (Imm != 0) &&
+           (isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) &&
+            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isUImm7Lsb00() const {
@@ -321,8 +406,9 @@ public:
       IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
     else
       IsValid = isInt<12>(Imm);
-    return IsValid &&
-           (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+    return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
+                       VK == RISCVMCExpr::VK_RISCV_LO ||
+                       VK == RISCVMCExpr::VK_RISCV_PCREL_LO);
   }
 
   bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
@@ -338,11 +424,11 @@ public:
 
   bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
 
-  bool isSImm10Lsb0000() const {
+  bool isSImm10Lsb0000NonZero() const {
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
     bool IsConstantImm = evaluateConstantImm(Imm, VK);
-    return IsConstantImm && isShiftedInt<6, 4>(Imm) &&
+    return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
@@ -564,7 +650,7 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
 }
 
 bool RISCVAsmParser::generateImmOutOfRangeError(
-    OperandVector &Operands, uint64_t ErrorInfo, int Lower, int Upper,
+    OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper,
     Twine Msg = "immediate must be an integer in the range") {
   SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
   return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
@@ -581,9 +667,7 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   default:
     break;
   case Match_Success:
-    Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, getSTI());
-    return false;
+    return processInstruction(Inst, IDLoc, Out);
   case Match_MissingFeature:
     return Error(IDLoc, "instruction use requires an option to be enabled");
   case Match_MnemonicFail:
@@ -600,6 +684,14 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     }
     return Error(ErrorLoc, "invalid operand for instruction");
   }
+  case Match_InvalidImmXLen:
+    if (isRV64()) {
+      SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+      return Error(ErrorLoc, "operand must be a constant 64-bit integer");
+    }
+    return generateImmOutOfRangeError(Operands, ErrorInfo,
+                                      std::numeric_limits<int32_t>::min(),
+                                      std::numeric_limits<uint32_t>::max());
   case Match_InvalidUImmLog2XLen:
     if (isRV64())
       return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
@@ -613,8 +705,14 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSImm6:
     return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
                                       (1 << 5) - 1);
-  case Match_InvalidUImm6NonZero:
-    return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6) - 1);
+  case Match_InvalidSImm6NonZero:
+    return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
+                                      (1 << 5) - 1,
+        "immediate must be non-zero in the range");
+  case Match_InvalidCLUIImm:
+    return generateImmOutOfRangeError(
+        Operands, ErrorInfo, 1, (1 << 5) - 1,
+        "immediate must be in [0xfffe0, 0xfffff] or");
   case Match_InvalidUImm7Lsb00:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, 0, (1 << 7) - 4,
@@ -639,10 +737,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, 4, (1 << 10) - 4,
         "immediate must be a multiple of 4 bytes in the range");
-  case Match_InvalidSImm10Lsb0000:
+  case Match_InvalidSImm10Lsb0000NonZero:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 9), (1 << 9) - 16,
-        "immediate must be a multiple of 16 bytes in the range");
+        "immediate must be a multiple of 16 bytes and non-zero in the range");
   case Match_InvalidSImm12:
     return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11),
                                       (1 << 11) - 1);
@@ -674,6 +772,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         ErrorLoc,
         "operand must be a valid floating point rounding mode mnemonic");
   }
+  case Match_InvalidBareSymbol: {
+    SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+    return Error(ErrorLoc, "operand must be a bare symbol name");
+  }
   }
 
   llvm_unreachable("Unknown match type detected!");
@@ -838,12 +940,15 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-/// Looks at a token type and creates the relevant operand
-/// from this information, adding to Operands.
-/// If operand was parsed, returns false, else true.
-bool RISCVAsmParser::parseOperand(OperandVector &Operands) {
-  // Attempt to parse token as register
-  if (parseRegister(Operands, true) == MatchOperand_Success)
+/// Looks at a token type and creates the relevant operand from this
+/// information, adding to Operands. If operand was parsed, returns false, else
+/// true. If ForceImmediate is true, no attempt will be made to parse the
+/// operand as a register, which is needed for pseudoinstructions such as
+/// call.
+bool RISCVAsmParser::parseOperand(OperandVector &Operands,
+                                  bool ForceImmediate) {
+  // Attempt to parse token as register, unless ForceImmediate.
+  if (!ForceImmediate && parseRegister(Operands, true) == MatchOperand_Success)
     return false;
 
   // Attempt to parse token as an immediate
@@ -870,7 +975,8 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     return false;
 
   // Parse first operand
-  if (parseOperand(Operands))
+  bool ForceImmediate = (Name == "call" || Name == "tail");
+  if (parseOperand(Operands, ForceImmediate))
     return true;
 
   // Parse until end of statement, consuming commas between operands
@@ -879,7 +985,7 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     getLexer().Lex();
 
     // Parse next operand
-    if (parseOperand(Operands))
+    if (parseOperand(Operands, false))
       return true;
   }
 
@@ -924,7 +1030,7 @@ bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr,
       isa<MCSymbolRefExpr>(BE->getRHS()))
     return true;
 
-  // See if the addend is is a constant, otherwise there's more going
+  // See if the addend is a constant, otherwise there's more going
   // on here than we can deal with.
   auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
   if (!AddendExpr)
@@ -938,7 +1044,165 @@ bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr,
   return Kind != RISCVMCExpr::VK_RISCV_Invalid;
 }
 
-bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
+bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
+  // This returns false if this function recognizes the directive
+  // regardless of whether it is successfully handles or reports an
+  // error. Otherwise it returns true to give the generic parser a
+  // chance at recognizing it.
+  StringRef IDVal = DirectiveID.getString();
+
+  if (IDVal == ".option")
+    return parseDirectiveOption();
+
+  return true;
+}
+
+bool RISCVAsmParser::parseDirectiveOption() {
+  MCAsmParser &Parser = getParser();
+  // Get the option token.
+  AsmToken Tok = Parser.getTok();
+  // At the moment only identifiers are supported.
+  if (Tok.isNot(AsmToken::Identifier))
+    return Error(Parser.getTok().getLoc(),
+                 "unexpected token, expected identifier");
+
+  StringRef Option = Tok.getIdentifier();
+
+  if (Option == "rvc") {
+    getTargetStreamer().emitDirectiveOptionRVC();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    setFeatureBits(RISCV::FeatureStdExtC, "c");
+    return false;
+  }
+
+  if (Option == "norvc") {
+    getTargetStreamer().emitDirectiveOptionNoRVC();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    clearFeatureBits(RISCV::FeatureStdExtC, "c");
+    return false;
+  }
+
+  // Unknown option.
+  Warning(Parser.getTok().getLoc(),
+          "unknown option, expected 'rvc' or 'norvc'");
+  Parser.eatToEndOfStatement();
+  return false;
+}
+
+void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
+  MCInst CInst;
+  bool Res = compressInst(CInst, Inst, getSTI(), S.getContext());
+  CInst.setLoc(Inst.getLoc());
+  S.EmitInstruction((Res ? CInst : Inst), getSTI());
+}
+
+void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
+                                 MCStreamer &Out) {
+  if (isInt<32>(Value)) {
+    // Emits the MC instructions for loading a 32-bit constant into a register.
+    //
+    // Depending on the active bits in the immediate Value v, the following
+    // instruction sequences are emitted:
+    //
+    // v == 0                        : ADDI(W)
+    // v[0,12) != 0 && v[12,32) == 0 : ADDI(W)
+    // v[0,12) == 0 && v[12,32) != 0 : LUI
+    // v[0,32) != 0                  : LUI+ADDI(W)
+    //
+    int64_t Hi20 = ((Value + 0x800) >> 12) & 0xFFFFF;
+    int64_t Lo12 = SignExtend64<12>(Value);
+    unsigned SrcReg = RISCV::X0;
+
+    if (Hi20) {
+      emitToStreamer(Out,
+                     MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Hi20));
+      SrcReg = DestReg;
+    }
+
+    if (Lo12 || Hi20 == 0) {
+      unsigned AddiOpcode =
+          STI->hasFeature(RISCV::Feature64Bit) ? RISCV::ADDIW : RISCV::ADDI;
+      emitToStreamer(Out, MCInstBuilder(AddiOpcode)
+                              .addReg(DestReg)
+                              .addReg(SrcReg)
+                              .addImm(Lo12));
+    }
+    return;
+  }
+  assert(STI->hasFeature(RISCV::Feature64Bit) &&
+         "Target must be 64-bit to support a >32-bit constant");
+
+  // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
+  // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
+  // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
+  // while the following ADDI instructions contribute up to 12 bits each.
+  //
+  // On the first glance, implementing this seems to be possible by simply
+  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
+  // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
+  // fact that ADDI performs a sign extended addition, doing it like that would
+  // only be possible when at most 11 bits of the ADDI instructions are used.
+  // Using all 12 bits of the ADDI instructions, like done by GAS, actually
+  // requires that the constant is processed starting with the least significant
+  // bit.
+  //
+  // In the following, constants are processed from LSB to MSB but instruction
+  // emission is performed from MSB to LSB by recursively calling
+  // emitLoadImm. In each recursion, first the lowest 12 bits are removed
+  // from the constant and the optimal shift amount, which can be greater than
+  // 12 bits if the constant is sparse, is determined. Then, the shifted
+  // remaining constant is processed recursively and gets emitted as soon as it
+  // fits into 32 bits. The emission of the shifts and additions is subsequently
+  // performed when the recursion returns.
+  //
+  int64_t Lo12 = SignExtend64<12>(Value);
+  int64_t Hi52 = (Value + 0x800) >> 12;
+  int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
+  Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+
+  emitLoadImm(DestReg, Hi52, Out);
+
+  emitToStreamer(Out, MCInstBuilder(RISCV::SLLI)
+                          .addReg(DestReg)
+                          .addReg(DestReg)
+                          .addImm(ShiftAmount));
+
+  if (Lo12)
+    emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+                            .addReg(DestReg)
+                            .addReg(DestReg)
+                            .addImm(Lo12));
+}
+
+bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+                                        MCStreamer &Out) {
+  Inst.setLoc(IDLoc);
+
+  if (Inst.getOpcode() == RISCV::PseudoLI) {
+    auto Reg = Inst.getOperand(0).getReg();
+    int64_t Imm = Inst.getOperand(1).getImm();
+    // On RV32 the immediate here can either be a signed or an unsigned
+    // 32-bit number. Sign extension has to be performed to ensure that Imm
+    // represents the expected signed 64-bit number.
+    if (!isRV64())
+      Imm = SignExtend64<32>(Imm);
+    emitLoadImm(Reg, Imm, Out);
+    return false;
+  }
+
+  emitToStreamer(Out, Inst);
+  return false;
+}
 
 extern "C" void LLVMInitializeRISCVAsmParser() {
   RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());