diff options
Diffstat (limited to 'lib/Target/X86/X86Instr64bit.td')
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 468 |
1 files changed, 376 insertions, 92 deletions
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 472ba4c46285..ef19823a2831 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -28,26 +28,29 @@ def i64i32imm_pcrel : Operand<i64> { // 64-bits but only 8 bits are significant. -def i64i8imm : Operand<i64>; +def i64i8imm : Operand<i64> { + let ParserMatchClass = ImmSExt8AsmOperand; +} def lea64mem : Operand<i64> { let PrintMethod = "printlea64mem"; - let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm); + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } def lea64_32mem : Operand<i32> { let PrintMethod = "printlea64_32mem"; let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } //===----------------------------------------------------------------------===// // Complex Pattern Definitions. // def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", - [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper, - X86WrapperRIP], - []>; + [add, sub, mul, X86mul_imm, shl, or, frameindex, + X86WrapperRIP], []>; def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; @@ -129,13 +132,40 @@ let isCall = 1 in def CALL64pcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call\t$dst", []>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NotWin64]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR64:$dst)]>; + "call\t{*}$dst", [(X86call GR64:$dst)]>, + Requires<[NotWin64]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>; + "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + Requires<[NotWin64]>; + + def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), + "lcall{q}\t{*}$dst", []>; } + // FIXME: We need to teach codegen about single list of call-clobbered registers. +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP] in { + def WINCALL64pcrel32 : I<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call\t$dst", []>, + Requires<[IsWin64]>; + def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call\t{*}$dst", + [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -162,6 +192,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind GR64:$dst)]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", [(brind (loadi64 addr:$dst))]>; + def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), + "ljmp{q}\t{*}$dst", []>; } //===----------------------------------------------------------------------===// @@ -182,12 +214,18 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", []>; let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in +let mayLoad = 1 in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -let mayStore = 1 in +def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; +} +let mayStore = 1 in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; +} } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { @@ -246,6 +284,14 @@ let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)]>, REP; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>; + +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>; + +// Fast system-call instructions +def SYSEXIT64 : RI<0x35, RawFrm, + (outs), (ins), "sysexit", []>, TB; + //===----------------------------------------------------------------------===// // Move Instructions... // @@ -275,6 +321,25 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)]>; +def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins i8imm:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64o32a : RIi32<0xA1, RawFrm, (outs), (ins i32imm:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64ao8 : RIi8<0xA2, RawFrm, (outs i8imm:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +def MOV64ao32 : RIi32<0xA3, RawFrm, (outs i32imm:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + +// Moves to and from segment registers +def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; + // Sign/Zero extenders // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -332,13 +397,15 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; // Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may -// be copying from a truncate, but any other 32-bit operation will zero-extend +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. And x86's cmov doesn't do anything if the +// condition is false. But any other 32-bit operation will zero-extend // up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; + N->getOpcode() != ISD::CopyFromReg && + N->getOpcode() != X86ISD::CMOV; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, @@ -361,6 +428,10 @@ let neverHasSideEffects = 1 in { // let Defs = [EFLAGS] in { + +def ADD64i32 : RI<0x05, RawFrm, (outs), (ins i32imm:$src), + "add{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in @@ -386,6 +457,12 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem: "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; + +// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but +// differently encoded. +def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; + } // isTwoAddress // Memory-Register Addition @@ -403,6 +480,10 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), (implicit EFLAGS)]>; let Uses = [EFLAGS] in { + +def ADC64i32 : RI<0x15, RawFrm, (outs), (ins i32imm:$src), + "adc{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -458,6 +539,9 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (implicit EFLAGS)]>; } // isTwoAddress +def SUB64i32 : RI<0x2D, RawFrm, (outs), (ins i32imm:$src), + "sub{q}\t{$src, %rax|%rax, $src}", []>; + // Memory-Register Subtraction def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", @@ -494,6 +578,9 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm: [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress +def SBB64i32 : RI<0x1D, RawFrm, (outs), (ins i32imm:$src), + "sbb{q}\t{$src, %rax|%rax, $src}", []>; + def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; @@ -665,8 +752,10 @@ let isConvertibleToThreeAddress = 1 in // Can transform into LEA. def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is -// cheaper. +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper. +def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t$dst", []>; } // isTwoAddress let Uses = [CL] in @@ -729,6 +818,39 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; // Rotate instructions + +let isTwoAddress = 1 in { +def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +} + let isTwoAddress = 1 in { let Uses = [CL] in def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), @@ -839,6 +961,9 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)]>; let Defs = [EFLAGS] in { +def AND64i32 : RI<0x25, RawFrm, (outs), (ins i32imm:$src), + "and{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, @@ -912,6 +1037,9 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; +def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src), + "or{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -945,6 +1073,10 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), "xor{q}\t{$src, $dst|$dst, $src}", [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; + +def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), + "xor{q}\t{$src, %rax|%rax, $src}", []>; + } // Defs = [EFLAGS] //===----------------------------------------------------------------------===// @@ -953,6 +1085,8 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), // Integer comparison let Defs = [EFLAGS] in { +def TEST64i32 : RI<0xa9, RawFrm, (outs), (ins i32imm:$src), + "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", @@ -973,10 +1107,15 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0), (implicit EFLAGS)]>; + +def CMP64i32 : RI<0x3D, RawFrm, (outs), (ins i32imm:$src), + "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", [(X86cmp GR64:$src1, GR64:$src2), (implicit EFLAGS)]>; +def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", [(X86cmp (loadi64 addr:$src1), GR64:$src2), @@ -1306,14 +1445,12 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src) // Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's // equivalent due to implicit zero-extending, and it sometimes has a smaller // encoding. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. -// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove +// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let Defs = [EFLAGS], AddedComplexity = 1, - isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), - "xor{l}\t${dst:subreg32}, ${dst:subreg32}", - [(set GR64:$dst, 0)]>; +let AddedComplexity = 1 in +def : Pat<(i64 0), + (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; + // Materialize i64 constant where top 32-bits are zero. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in @@ -1343,12 +1480,12 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%gs:$src, $dst", [(set GR64:$dst, (gsload addr:$src))]>, SegGS; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%fs:$src, $dst", [(set GR64:$dst, (fsload addr:$src))]>, SegFS; @@ -1371,11 +1508,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xadd\t$val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; + def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xchg\t$val, $ptr", [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), + (ins i64mem:$dst, i64i32imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), + (ins i64mem:$dst, i64i32imm:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "lock\n\t" + "inc{q}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "lock\n\t" + "dec{q}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { @@ -1405,78 +1574,88 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; } +// Segmentation support instructions + +// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. +def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + +// String manipulation instructions + +def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small +// code model mode, should use 'movabs'. FIXME: This is really a hack, the +// 'movabs' predicate should handle this sort of thing. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; + +// In static codegen with small code model, we can get the address of a label +// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri64i32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; + +// In kernel code model, we can get the address of a label +// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri32 should accept these. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; // If we have small model and -static mode, it is safe to store global addresses // directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// should handle this sort of thing. +// for MOV64mi32 should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[SmallCode, IsStatic]>; - -// If we have small model and -static mode, it is safe to store global addresses -// directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// should handle this sort of thing. -def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst), - (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[SmallCode, IsStatic]>; - + Requires<[NearData, IsStatic]>; // Calls // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; - -def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; -def : Pat<(X86tailcall (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; - -def : Pat<(X86tailcall GR64:$dst), - (CALL64r GR64:$dst)>; + (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; // tailcall stuff -def : Pat<(X86tailcall GR32:$dst), - (TAILCALL)>; -def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), - (TAILCALL)>; -def : Pat<(X86tailcall (i64 texternalsym:$dst)), - (TAILCALL)>; - def : Pat<(X86tcret GR64:$dst, imm:$off), (TCRETURNri64 GR64:$dst, imm:$off)>; @@ -1540,30 +1719,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. def : Pat<(extloadi64i32 addr:$src), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), x86_subreg_32bit)>; -def : Pat<(extloadi16i1 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(extloadi16i8 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; - -// anyext -def : Pat<(i64 (anyext GR8:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>; -def : Pat<(i64 (anyext GR16:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>; -def : Pat<(i16 (anyext GR8:$src)), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext GR8:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; + +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -1661,6 +1825,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), @@ -1668,6 +1837,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)), x86_subreg_32bit)>; +def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), @@ -1906,6 +2082,102 @@ def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC64m addr:$dst)>; +// Register-Register Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (OR64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (OR64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (XOR64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (XOR64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (AND64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (AND64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mi32 addr:$dst, i64immSExt32:$src2)>; + //===----------------------------------------------------------------------===// // X86-64 SSE Instructions //===----------------------------------------------------------------------===// @@ -1977,3 +2249,15 @@ let isTwoAddress = 1 in { } defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; + +// -disable-16bit support. +def : Pat<(truncstorei16 (i64 imm:$src), addr:$dst), + (MOV16mi addr:$dst, imm:$src)>; +def : Pat<(truncstorei16 GR64:$src, addr:$dst), + (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; +def : Pat<(i64 (sextloadi16 addr:$dst)), + (MOVSX64rm16 addr:$dst)>; +def : Pat<(i64 (zextloadi16 addr:$dst)), + (MOVZX64rm16 addr:$dst)>; +def : Pat<(i64 (extloadi16 addr:$dst)), + (MOVZX64rm16 addr:$dst)>; |