diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp')
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp | 498 | 
1 files changed, 343 insertions, 155 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp index 325d0611817d..6eed6abd43e2 100644 --- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -37,6 +37,7 @@  #include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/Passes.h" +#include "llvm/InlineAsm.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/raw_ostream.h" @@ -126,10 +127,45 @@ namespace {      void bundleCFG(MachineFunction &MF);      MachineBasicBlock *MBB;     // Current basic block + +    // The hardware keeps track of how many FP registers are live, so we have +    // to model that exactly. Usually, each live register corresponds to an +    // FP<n> register, but when dealing with calls, returns, and inline +    // assembly, it is sometimes neccesary to have live scratch registers.      unsigned Stack[8];          // FP<n> Registers in each stack slot... -    unsigned RegMap[8];         // Track which stack slot contains each register      unsigned StackTop;          // The current top of the FP stack. +    enum { +      NumFPRegs = 16            // Including scratch pseudo-registers. +    }; + +    // For each live FP<n> register, point to its Stack[] entry. +    // The first entries correspond to FP0-FP6, the rest are scratch registers +    // used when we need slightly different live registers than what the +    // register allocator thinks. +    unsigned RegMap[NumFPRegs]; + +    // Pending fixed registers - Inline assembly needs FP registers to appear +    // in fixed stack slot positions. This is handled by copying FP registers +    // to ST registers before the instruction, and copying back after the +    // instruction. +    // +    // This is modeled with pending ST registers. NumPendingSTs is the number +    // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP +    // register that holds the ST value. The ST registers are not moved into +    // place until immediately before the instruction that needs them. +    // +    // It can happen that we need an ST register to be live when no FP register +    // holds the value: +    // +    //   %ST0 = COPY %FP4<kill> +    // +    // When that happens, we allocate a scratch FP register to hold the ST +    // value. That means every register in PendingST must be live. + +    unsigned NumPendingSTs; +    unsigned char PendingST[8]; +      // Set up our stack model to match the incoming registers to MBB.      void setupBlockStack(); @@ -142,13 +178,15 @@ namespace {          dbgs() << " FP" << Stack[i];          assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");        } +      for (unsigned i = 0; i != NumPendingSTs; ++i) +        dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);        dbgs() << "\n";      }      /// getSlot - Return the stack slot number a particular register number is      /// in.      unsigned getSlot(unsigned RegNo) const { -      assert(RegNo < 8 && "Regno out of range!"); +      assert(RegNo < NumFPRegs && "Regno out of range!");        return RegMap[RegNo];      } @@ -160,12 +198,17 @@ namespace {      /// getScratchReg - Return an FP register that is not currently in use.      unsigned getScratchReg() { -      for (int i = 7; i >= 0; --i) +      for (int i = NumFPRegs - 1; i >= 8; --i)          if (!isLive(i))            return i;        llvm_unreachable("Ran out of scratch FP registers");      } +    /// isScratchReg - Returns trus if RegNo is a scratch FP register. +    bool isScratchReg(unsigned RegNo) { +      return RegNo > 8 && RegNo < NumFPRegs; +    } +      /// getStackEntry - Return the X86::FP<n> register in register ST(i).      unsigned getStackEntry(unsigned STi) const {        if (STi >= StackTop) @@ -181,7 +224,7 @@ namespace {      // pushReg - Push the specified FP<n> register onto the stack.      void pushReg(unsigned Reg) { -      assert(Reg < 8 && "Register number out of range!"); +      assert(Reg < NumFPRegs && "Register number out of range!");        if (StackTop >= 8)          report_fatal_error("Stack overflow!");        Stack[StackTop] = Reg; @@ -236,7 +279,7 @@ namespace {      /// Adjust the live registers to be the set in Mask.      void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); -    /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is +    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is      /// st(0), FP reg FixStack[1] is st(1) etc.      void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,                           MachineBasicBlock::iterator I); @@ -251,7 +294,14 @@ namespace {      void handleCondMovFP(MachineBasicBlock::iterator &I);      void handleSpecialFP(MachineBasicBlock::iterator &I); -    bool translateCopy(MachineInstr*); +    // Check if a COPY instruction is using FP registers. +    bool isFPCopy(MachineInstr *MI) { +      unsigned DstReg = MI->getOperand(0).getReg(); +      unsigned SrcReg = MI->getOperand(1).getReg(); + +      return X86::RFP80RegClass.contains(DstReg) || +        X86::RFP80RegClass.contains(SrcReg); +    }    };    char FPS::ID = 0;  } @@ -341,6 +391,7 @@ void FPS::bundleCFG(MachineFunction &MF) {  bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {    bool Changed = false;    MBB = &BB; +  NumPendingSTs = 0;    setupBlockStack(); @@ -352,7 +403,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {      if (MI->isInlineAsm())        FPInstClass = X86II::SpecialFP; -    if (MI->isCopy() && translateCopy(MI)) +    if (MI->isCopy() && isFPCopy(MI))        FPInstClass = X86II::SpecialFP;      if (FPInstClass == X86II::NotFP) @@ -833,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {    // Kill registers by popping.    if (Kills && I != MBB->begin()) {      MachineBasicBlock::iterator I2 = llvm::prior(I); -    for (;;) { +    while (StackTop) {        unsigned KReg = getStackEntry(0);        if (!(Kills & (1 << KReg)))          break; @@ -881,7 +932,8 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,        continue;      // (Reg st0) (OldReg st0) = (Reg OldReg st0)      moveToTop(Reg, I); -    moveToTop(OldReg, I); +    if (FixCount > 0) +      moveToTop(OldReg, I);    }    DEBUG(dumpStack());  } @@ -1239,141 +1291,307 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {    MachineInstr *MI = I;    switch (MI->getOpcode()) {    default: llvm_unreachable("Unknown SpecialFP instruction!"); -  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! -  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! -  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! -    assert(StackTop == 0 && "Stack should be empty after a call!"); -    pushReg(getFPReg(MI->getOperand(0))); -    break; -  case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! -  case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! -  case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! -    // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. -    // The pattern we expect is: -    //  CALL -    //  FP1 = FpGET_ST0 -    //  FP4 = FpGET_ST1 -    // -    // At this point, we've pushed FP1 on the top of stack, so it should be -    // present if it isn't dead.  If it was dead, we already emitted a pop to -    // remove it from the stack and StackTop = 0. -     -    // Push FP4 as top of stack next. -    pushReg(getFPReg(MI->getOperand(0))); +  case TargetOpcode::COPY: { +    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. +    const MachineOperand &MO1 = MI->getOperand(1); +    const MachineOperand &MO0 = MI->getOperand(0); +    unsigned DstST = MO0.getReg() - X86::ST0; +    unsigned SrcST = MO1.getReg() - X86::ST0; +    bool KillsSrc = MI->killsRegister(MO1.getReg()); + +    // ST = COPY FP. Set up a pending ST register. +    if (DstST < 8) { +      unsigned SrcFP = getFPReg(MO1); +      assert(isLive(SrcFP) && "Cannot copy dead register"); +      assert(!MO0.isDead() && "Cannot copy to dead ST register"); + +      // Unallocated STs are marked as the nonexistent FP255. +      while (NumPendingSTs <= DstST) +        PendingST[NumPendingSTs++] = NumFPRegs; + +      // STi could still be live from a previous inline asm. +      if (isScratchReg(PendingST[DstST])) { +        DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST]) +                     << '\n'); +        freeStackSlotBefore(MI, PendingST[DstST]); +      } -    // If StackTop was 0 before we pushed our operand, then ST(0) must have been -    // dead.  In this case, the ST(1) value is the only thing that is live, so -    // it should be on the TOS (after the pop that was emitted) and is.  Just -    // continue in this case. -    if (StackTop == 1) +      // When the source is killed, allocate a scratch FP register. +      if (KillsSrc) { +        unsigned Slot = getSlot(SrcFP); +        unsigned SR = getScratchReg(); +        PendingST[DstST] = SR; +        Stack[Slot] = SR; +        RegMap[SR] = Slot; +      } else +        PendingST[DstST] = SrcFP;        break; -     -    // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top -    // elements so that our accounting is correct. -    unsigned RegOnTop = getStackEntry(0); -    unsigned RegNo = getStackEntry(1); -     -    // Swap the slots the regs are in. -    std::swap(RegMap[RegNo], RegMap[RegOnTop]); -     -    // Swap stack slot contents. -    if (RegMap[RegOnTop] >= StackTop) -      report_fatal_error("Access past stack top!"); -    std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); -    break; -  } -  case X86::FpSET_ST0_32: -  case X86::FpSET_ST0_64: -  case X86::FpSET_ST0_80: { -    // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm -    // arguments that use an st constraint. We expect a sequence of -    // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM -    unsigned Op0 = getFPReg(MI->getOperand(0)); - -    if (!MI->killsRegister(X86::FP0 + Op0)) { -      // Duplicate Op0 into a temporary on the stack top. -      duplicateToTop(Op0, getScratchReg(), I); -    } else { -      // Op0 is killed, so just swap it into position. -      moveToTop(Op0, I);      } -    --StackTop;   // "Forget" we have something on the top of stack! -    break; -  } -  case X86::FpSET_ST1_32: -  case X86::FpSET_ST1_64: -  case X86::FpSET_ST1_80: { -    // Set up st(1) for inline asm. We are assuming that st(0) has already been -    // set up by FpSET_ST0, and our StackTop is off by one because of it. -    unsigned Op0 = getFPReg(MI->getOperand(0)); -    // Restore the actual StackTop from before Fp_SET_ST0. -    // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we -    // are not enforcing the constraint. -    ++StackTop; -    unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). -    if (!MI->killsRegister(X86::FP0 + Op0)) { -      duplicateToTop(Op0, getScratchReg(), I); -      moveToTop(RegOnTop, I); -    } else if (getSTReg(Op0) != X86::ST1) { -      // We have the wrong value at st(1). Shuffle! Untested! -      moveToTop(getStackEntry(1), I); -      moveToTop(Op0, I); -      moveToTop(RegOnTop, I); + +    // FP = COPY ST. Extract fixed stack value. +    // Any instruction defining ST registers must have assigned them to a +    // scratch register. +    if (SrcST < 8) { +      unsigned DstFP = getFPReg(MO0); +      assert(!isLive(DstFP) && "Cannot copy ST to live FP register"); +      assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register"); +      unsigned SrcFP = PendingST[SrcST]; +      assert(isScratchReg(SrcFP) && "Expected ST in a scratch register"); +      assert(isLive(SrcFP) && "Scratch holding ST is dead"); + +      // DstFP steals the stack slot from SrcFP. +      unsigned Slot = getSlot(SrcFP); +      Stack[Slot] = DstFP; +      RegMap[DstFP] = Slot; + +      // Always treat the ST as killed. +      PendingST[SrcST] = NumFPRegs; +      while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) +        --NumPendingSTs; +      break;      } -    assert(StackTop >= 2 && "Too few live registers"); -    StackTop -= 2; // "Forget" both st(0) and st(1). -    break; -  } -  case X86::MOV_Fp3232: -  case X86::MOV_Fp3264: -  case X86::MOV_Fp6432: -  case X86::MOV_Fp6464:  -  case X86::MOV_Fp3280: -  case X86::MOV_Fp6480: -  case X86::MOV_Fp8032: -  case X86::MOV_Fp8064:  -  case X86::MOV_Fp8080: { -    const MachineOperand &MO1 = MI->getOperand(1); -    unsigned SrcReg = getFPReg(MO1); -    const MachineOperand &MO0 = MI->getOperand(0); -    unsigned DestReg = getFPReg(MO0); -    if (MI->killsRegister(X86::FP0+SrcReg)) { +    // FP <- FP copy. +    unsigned DstFP = getFPReg(MO0); +    unsigned SrcFP = getFPReg(MO1); +    assert(isLive(SrcFP) && "Cannot copy dead register"); +    if (KillsSrc) {        // If the input operand is killed, we can just change the owner of the        // incoming stack slot into the result. -      unsigned Slot = getSlot(SrcReg); -      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); -      Stack[Slot] = DestReg; -      RegMap[DestReg] = Slot; - +      unsigned Slot = getSlot(SrcFP); +      Stack[Slot] = DstFP; +      RegMap[DstFP] = Slot;      } else { -      // For FMOV we just duplicate the specified value to a new stack slot. +      // For COPY we just duplicate the specified value to a new stack slot.        // This could be made better, but would require substantial changes. -      duplicateToTop(SrcReg, DestReg, I); +      duplicateToTop(SrcFP, DstFP, I);      } +    break; +  } + +  case X86::FpPOP_RETVAL: { +    // The FpPOP_RETVAL instruction is used after calls that return a value on +    // the floating point stack. We cannot model this with ST defs since CALL +    // instructions have fixed clobber lists. This instruction is interpreted +    // to mean that there is one more live register on the stack than we +    // thought. +    // +    // This means that StackTop does not match the hardware stack between a +    // call and the FpPOP_RETVAL instructions.  We do tolerate FP instructions +    // between CALL and FpPOP_RETVAL as long as they don't overflow the +    // hardware stack. +    unsigned DstFP = getFPReg(MI->getOperand(0)); + +    // Move existing stack elements up to reflect reality. +    assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL"); +    if (StackTop) { +      std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1); +      for (unsigned i = 0; i != NumFPRegs; ++i) +        ++RegMap[i];      } +    ++StackTop; + +    // DstFP is the new bottom of the stack. +    Stack[0] = DstFP; +    RegMap[DstFP] = 0; + +    // DstFP will be killed by processBasicBlock if this was a dead def.      break; +  } +    case TargetOpcode::INLINEASM: {      // The inline asm MachineInstr currently only *uses* FP registers for the      // 'f' constraint.  These should be turned into the current ST(x) register -    // in the machine instr.  Also, any kills should be explicitly popped after -    // the inline asm. -    unsigned Kills = 0; +    // in the machine instr. +    // +    // There are special rules for x87 inline assembly. The compiler must know +    // exactly how many registers are popped and pushed implicitly by the asm. +    // Otherwise it is not possible to restore the stack state after the inline +    // asm. +    // +    // There are 3 kinds of input operands: +    // +    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A +    //    popped input operand must be in a fixed stack slot, and it is either +    //    tied to an output operand, or in the clobber list. The MI has ST use +    //    and def operands for these inputs. +    // +    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are +    //    preserved by the inline asm. The fixed stack slots must be STn-STm +    //    following the popped inputs. A fixed input operand cannot be tied to +    //    an output or appear in the clobber list. The MI has ST use operands +    //    and no defs for these inputs. +    // +    // 3. Preserved inputs. These inputs use the "f" constraint which is +    //    represented as an FP register. The inline asm won't change these +    //    stack slots. +    // +    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered +    // registers do not count as output operands. The inline asm changes the +    // stack as if it popped all the popped inputs and then pushed all the +    // output operands. + +    // Scan the assembly for ST registers used, defined and clobbered. We can +    // only tell clobbers from defs by looking at the asm descriptor. +    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; +    unsigned NumOps = 0; +    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); +         i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) { +      unsigned Flags = MI->getOperand(i).getImm(); +      NumOps = InlineAsm::getNumOperandRegisters(Flags); +      if (NumOps != 1) +        continue; +      const MachineOperand &MO = MI->getOperand(i + 1); +      if (!MO.isReg()) +        continue; +      unsigned STReg = MO.getReg() - X86::ST0; +      if (STReg >= 8) +        continue; + +      switch (InlineAsm::getKind(Flags)) { +      case InlineAsm::Kind_RegUse: +        STUses |= (1u << STReg); +        break; +      case InlineAsm::Kind_RegDef: +      case InlineAsm::Kind_RegDefEarlyClobber: +        STDefs |= (1u << STReg); +        if (MO.isDead()) +          STDeadDefs |= (1u << STReg); +        break; +      case InlineAsm::Kind_Clobber: +        STClobbers |= (1u << STReg); +        break; +      default: +        break; +      } +    } + +    if (STUses && !isMask_32(STUses)) +      MI->emitError("fixed input regs must be last on the x87 stack"); +    unsigned NumSTUses = CountTrailingOnes_32(STUses); + +    // Defs must be contiguous from the stack top. ST0-STn. +    if (STDefs && !isMask_32(STDefs)) { +      MI->emitError("output regs must be last on the x87 stack"); +      STDefs = NextPowerOf2(STDefs) - 1; +    } +    unsigned NumSTDefs = CountTrailingOnes_32(STDefs); + +    // So must the clobbered stack slots. ST0-STm, m >= n. +    if (STClobbers && !isMask_32(STDefs | STClobbers)) +      MI->emitError("clobbers must be last on the x87 stack"); + +    // Popped inputs are the ones that are also clobbered or defined. +    unsigned STPopped = STUses & (STDefs | STClobbers); +    if (STPopped && !isMask_32(STPopped)) +      MI->emitError("implicitly popped regs must be last on the x87 stack"); +    unsigned NumSTPopped = CountTrailingOnes_32(STPopped); + +    DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " +                 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n"); + +    // Scan the instruction for FP uses corresponding to "f" constraints. +    // Collect FP registers to kill afer the instruction. +    // Always kill all the scratch regs. +    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; +    unsigned FPUsed = 0;      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {        MachineOperand &Op = MI->getOperand(i);        if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)          continue; -      assert(Op.isUse() && "Only handle inline asm uses right now"); -       +      if (!Op.isUse()) +        MI->emitError("illegal \"f\" output constraint");        unsigned FPReg = getFPReg(Op); -      Op.setReg(getSTReg(FPReg)); -       +      FPUsed |= 1U << FPReg; +        // If we kill this operand, make sure to pop it from the stack after the        // asm.  We just remember it for now, and pop them all off at the end in        // a batch.        if (Op.isKill()) -        Kills |= 1U << FPReg; +        FPKills |= 1U << FPReg; +    } + +    // The popped inputs will be killed by the instruction, so duplicate them +    // if the FP register needs to be live after the instruction, or if it is +    // used in the instruction itself. We effectively treat the popped inputs +    // as early clobbers. +    for (unsigned i = 0; i < NumSTPopped; ++i) { +      if ((FPKills & ~FPUsed) & (1u << PendingST[i])) +        continue; +      unsigned SR = getScratchReg(); +      duplicateToTop(PendingST[i], SR, I); +      DEBUG(dbgs() << "Duplicating ST" << i << " in FP" +                   << unsigned(PendingST[i]) << " to avoid clobbering it.\n"); +      PendingST[i] = SR; +    } + +    // Make sure we have a unique live register for every fixed use. Some of +    // them could be undef uses, and we need to emit LD_F0 instructions. +    for (unsigned i = 0; i < NumSTUses; ++i) { +      if (i < NumPendingSTs && PendingST[i] < NumFPRegs) { +        // Check for shared assignments. +        for (unsigned j = 0; j < i; ++j) { +          if (PendingST[j] != PendingST[i]) +            continue; +          // STi and STj are inn the same register, create a copy. +          unsigned SR = getScratchReg(); +          duplicateToTop(PendingST[i], SR, I); +          DEBUG(dbgs() << "Duplicating ST" << i << " in FP" +                       << unsigned(PendingST[i]) +                       << " to avoid collision with ST" << j << '\n'); +          PendingST[i] = SR; +        } +        continue; +      } +      unsigned SR = getScratchReg(); +      DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n'); +      BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0)); +      pushReg(SR); +      PendingST[i] = SR; +      if (NumPendingSTs == i) +        ++NumPendingSTs; +    } +    assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned"); + +    // Now we can rearrange the live registers to match what was requested. +    shuffleStackTop(PendingST, NumPendingSTs, I); +    DEBUG({dbgs() << "Before asm: "; dumpStack();}); + +    // With the stack layout fixed, rewrite the FP registers. +    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { +      MachineOperand &Op = MI->getOperand(i); +      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) +        continue; +      unsigned FPReg = getFPReg(Op); +      Op.setReg(getSTReg(FPReg)); +    } + +    // Simulate the inline asm popping its inputs and pushing its outputs. +    StackTop -= NumSTPopped; + +    // Hold the fixed output registers in scratch FP registers. They will be +    // transferred to real FP registers by copies. +    NumPendingSTs = 0; +    for (unsigned i = 0; i < NumSTDefs; ++i) { +      unsigned SR = getScratchReg(); +      pushReg(SR); +      FPKills &= ~(1u << SR); +    } +    for (unsigned i = 0; i < NumSTDefs; ++i) +      PendingST[NumPendingSTs++] = getStackEntry(i); +    DEBUG({dbgs() << "After asm: "; dumpStack();}); + +    // If any of the ST defs were dead, pop them immediately. Our caller only +    // handles dead FP defs. +    MachineBasicBlock::iterator InsertPt = MI; +    for (unsigned i = 0; STDefs & (1u << i); ++i) { +      if (!(STDeadDefs & (1u << i))) +        continue; +      freeStackSlotAfter(InsertPt, PendingST[i]); +      PendingST[i] = NumFPRegs;      } +    while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) +      --NumPendingSTs;      // If this asm kills any FP registers (is the last use of them) we must      // explicitly emit pop instructions for them.  Do this now after the asm has @@ -1382,16 +1600,16 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {      //      // Note: this might be a non-optimal pop sequence.  We might be able to do      // better by trying to pop in stack order or something. -    MachineBasicBlock::iterator InsertPt = MI; -    while (Kills) { -      unsigned FPReg = CountTrailingZeros_32(Kills); -      freeStackSlotAfter(InsertPt, FPReg); -      Kills &= ~(1U << FPReg); +    while (FPKills) { +      unsigned FPReg = CountTrailingZeros_32(FPKills); +      if (isLive(FPReg)) +        freeStackSlotAfter(InsertPt, FPReg); +      FPKills &= ~(1U << FPReg);      }      // Don't delete the inline asm!      return;    } -       +    case X86::RET:    case X86::RETI:      // If RET has an FP register use operand, pass the first one in ST(0) and @@ -1489,33 +1707,3 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {    } else      --I;  } - -// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. -bool FPS::translateCopy(MachineInstr *MI) { -  unsigned DstReg = MI->getOperand(0).getReg(); -  unsigned SrcReg = MI->getOperand(1).getReg(); - -  if (DstReg == X86::ST0) { -    MI->setDesc(TII->get(X86::FpSET_ST0_80)); -    MI->RemoveOperand(0); -    return true; -  } -  if (DstReg == X86::ST1) { -    MI->setDesc(TII->get(X86::FpSET_ST1_80)); -    MI->RemoveOperand(0); -    return true; -  } -  if (SrcReg == X86::ST0) { -    MI->setDesc(TII->get(X86::FpGET_ST0_80)); -    return true; -  } -  if (SrcReg == X86::ST1) { -    MI->setDesc(TII->get(X86::FpGET_ST1_80)); -    return true; -  } -  if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { -    MI->setDesc(TII->get(X86::MOV_Fp8080)); -    return true; -  } -  return false; -}  | 
