Backport more retpoline patches

This commit is contained in:
Tom Stellard 2018-03-07 14:52:16 +00:00
parent 64558d28cf
commit 2653f0039c
5 changed files with 706 additions and 1 deletions

237
0001-Merging-r324449.patch Normal file
View File

@ -0,0 +1,237 @@
From 4e5fddc22a28e0e59d6409a98fb22eba32d0eae7 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:32:26 +0000
Subject: [PATCH 1/4] Merging r324449:
------------------------------------------------------------------------
r324449 | chandlerc | 2018-02-06 22:16:24 -0800 (Tue, 06 Feb 2018) | 15 lines
[x86/retpoline] Make the external thunk names exactly match the names
that happened to end up in GCC.
This is really unfortunate, as the names don't have much rhyme or reason
to them. Originally in the discussions it seemed fine to rely on aliases
to map different names to whatever external thunk code developers wished
to use but there are practical problems with that in the kernel it turns
out. And since we're discovering this practical problems late and since
GCC has already shipped a release with one set of names, we are forced,
yet again, to blindly match what is there.
Somewhat rushing this patch out for the Linux kernel folks to test and
so we can get it patched into our releases.
Differential Revision: https://reviews.llvm.org/D42998
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325088 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86ISelLowering.cpp | 59 +++++++++++++++++++++++++---------
test/CodeGen/X86/retpoline-external.ll | 48 +++++++++++++--------------
2 files changed, 68 insertions(+), 39 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2c2294d..9aa3023 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
unsigned Reg) {
+ if (Subtarget.useRetpolineExternalThunk()) {
+ // When using an external thunk for retpolines, we pick names that match the
+ // names GCC happens to use as well. This helps simplify the implementation
+ // of the thunks for kernels where they have no easy ability to create
+ // aliases and are doing non-trivial configuration of the thunk's body. For
+ // example, the Linux kernel will do boot-time hot patching of the thunk
+ // bodies and cannot easily export aliases of these to loaded modules.
+ //
+ // Note that at any point in the future, we may need to change the semantics
+ // of how we implement retpolines and at that time will likely change the
+ // name of the called thunk. Essentially, there is no hard guarantee that
+ // LLVM will generate calls to specific thunks, we merely make a best-effort
+ // attempt to help out kernels and other systems where duplicating the
+ // thunks is costly.
+ switch (Reg) {
+ case 0:
+ assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
+ return "__x86_indirect_thunk";
+ case X86::EAX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_eax";
+ case X86::ECX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_ecx";
+ case X86::EDX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_edx";
+ case X86::R11:
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__x86_indirect_thunk_r11";
+ }
+ llvm_unreachable("unexpected reg for retpoline");
+ }
+
+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
case 0:
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_push"
- : "__llvm_retpoline_push";
+ return "__llvm_retpoline_push";
case X86::EAX:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_eax"
- : "__llvm_retpoline_eax";
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_eax";
case X86::ECX:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_ecx"
- : "__llvm_retpoline_ecx";
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_ecx";
case X86::EDX:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_edx"
- : "__llvm_retpoline_edx";
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edx";
case X86::R11:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_r11"
- : "__llvm_retpoline_r11";
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__llvm_retpoline_r11";
}
llvm_unreachable("unexpected reg for retpoline");
}
diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll
index 66d32ba..2f21bb2 100644
--- a/test/CodeGen/X86/retpoline-external.ll
+++ b/test/CodeGen/X86/retpoline-external.ll
@@ -23,18 +23,18 @@ entry:
; X64: callq bar
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq %[[fp]], %r11
-; X64: callq __llvm_external_retpoline_r11
+; X64: callq __x86_indirect_thunk_r11
; X64: movl %[[x]], %edi
; X64: callq bar
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq %[[fp]], %r11
-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
; X64FAST-LABEL: icall_reg:
; X64FAST: callq bar
-; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: callq __x86_indirect_thunk_r11
; X64FAST: callq bar
-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
; X86-LABEL: icall_reg:
; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
@@ -43,19 +43,19 @@ entry:
; X86: calll bar
; X86: movl %[[fp]], %eax
; X86: pushl %[[x]]
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86: pushl %[[x]]
; X86: calll bar
; X86: movl %[[fp]], %eax
; X86: pushl %[[x]]
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86-NOT: # TAILCALL
; X86FAST-LABEL: icall_reg:
; X86FAST: calll bar
-; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: calll __x86_indirect_thunk_eax
; X86FAST: calll bar
-; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: calll __x86_indirect_thunk_eax
@global_fp = external global void (i32)*
@@ -72,28 +72,28 @@ define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
; X64-LABEL: icall_global_fp:
; X64-DAG: movl %edi, %[[x:[^ ]*]]
; X64-DAG: movq global_fp(%rip), %r11
-; X64: callq __llvm_external_retpoline_r11
+; X64: callq __x86_indirect_thunk_r11
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq global_fp(%rip), %r11
-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
; X64FAST-LABEL: icall_global_fp:
; X64FAST: movq global_fp(%rip), %r11
-; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: callq __x86_indirect_thunk_r11
; X64FAST: movq global_fp(%rip), %r11
-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
; X86-LABEL: icall_global_fp:
; X86: movl global_fp, %eax
; X86: pushl 4(%esp)
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86: addl $4, %esp
; X86: movl global_fp, %eax
-; X86: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86: jmp __x86_indirect_thunk_eax # TAILCALL
; X86FAST-LABEL: icall_global_fp:
-; X86FAST: calll __llvm_external_retpoline_eax
-; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86FAST: calll __x86_indirect_thunk_eax
+; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL
%struct.Foo = type { void (%struct.Foo*)** }
@@ -114,14 +114,14 @@ define void @vcall(%struct.Foo* %obj) #0 {
; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]
; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
; X64: movq %[[fp]], %r11
-; X64: callq __llvm_external_retpoline_r11
+; X64: callq __x86_indirect_thunk_r11
; X64-DAG: movq %[[obj]], %rdi
; X64-DAG: movq %[[fp]], %r11
-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
; X64FAST-LABEL: vcall:
-; X64FAST: callq __llvm_external_retpoline_r11
-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64FAST: callq __x86_indirect_thunk_r11
+; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
; X86-LABEL: vcall:
; X86: movl 8(%esp), %[[obj:[^ ]*]]
@@ -129,14 +129,14 @@ define void @vcall(%struct.Foo* %obj) #0 {
; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
; X86: movl %[[fp]], %eax
; X86: pushl %[[obj]]
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86: addl $4, %esp
; X86: movl %[[fp]], %eax
-; X86: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86: jmp __x86_indirect_thunk_eax # TAILCALL
; X86FAST-LABEL: vcall:
-; X86FAST: calll __llvm_external_retpoline_eax
-; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86FAST: calll __x86_indirect_thunk_eax
+; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL
declare void @direct_callee()
--
1.8.3.1

View File

@ -0,0 +1,88 @@
From 8f5f7f9cb15387ddb010894c17e788b3116fe26d Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:33:00 +0000
Subject: [PATCH 2/4] Merging r324645:
------------------------------------------------------------------------
r324645 | dwmw2 | 2018-02-08 12:06:05 -0800 (Thu, 08 Feb 2018) | 5 lines
[X86] Support 'V' register operand modifier
This allows the register name to be printed without the leading '%'.
This can be used for emitting calls to the retpoline thunks from inline
asm.
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325089 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86AsmPrinter.cpp | 11 ++++++++++-
test/CodeGen/X86/inline-asm-modifier-V.ll | 14 ++++++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
create mode 100644 test/CodeGen/X86/inline-asm-modifier-V.ll
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index dc15aea..8c7ddd9 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -344,6 +344,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
unsigned Reg = MO.getReg();
+ bool EmitPercent = true;
+
switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
@@ -358,6 +360,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
case 'k': // Print SImode register
Reg = getX86SubSuperRegister(Reg, 32);
break;
+ case 'V':
+ EmitPercent = false;
+ LLVM_FALLTHROUGH;
case 'q':
// Print 64-bit register names if 64-bit integer registers are available.
// Otherwise, print 32-bit register names.
@@ -365,7 +370,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
break;
}
- O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ if (EmitPercent)
+ O << '%';
+
+ O << X86ATTInstPrinter::getRegisterName(Reg);
return false;
}
@@ -438,6 +446,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 'w': // Print HImode register
case 'k': // Print SImode register
case 'q': // Print DImode register
+ case 'V': // Print native register without '%'
if (MO.isReg())
return printAsmMRegister(*this, MO, ExtraCode[0], O);
printOperand(*this, MI, OpNo, O);
diff --git a/test/CodeGen/X86/inline-asm-modifier-V.ll b/test/CodeGen/X86/inline-asm-modifier-V.ll
new file mode 100644
index 0000000..5a7f3fd
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-modifier-V.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s
+
+; If the target does not have 64-bit integer registers, emit 32-bit register
+; names.
+
+; X86: call __x86_indirect_thunk_e{{[abcd]}}x
+; X64: call __x86_indirect_thunk_r
+
+define void @q_modifier(i32* %p) {
+entry:
+ tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
+ ret void
+}
--
1.8.3.1

308
0003-Merging-r325049.patch Normal file
View File

@ -0,0 +1,308 @@
From 4594a6164d5ae9252825e23a95aa6f2fce304d6e Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:34:13 +0000
Subject: [PATCH 3/4] Merging r325049:
------------------------------------------------------------------------
r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines
[X86] Use EDI for retpoline when no scratch regs are left
Summary:
Instead of solving the hard problem of how to pass the callee to the indirect
jump thunk without a register, just use a CSR. At a call boundary, there's
nothing stopping us from using a CSR to hold the callee as long as we save and
restore it in the prologue.
Also, add tests for this mregparm=3 case. I wrote execution tests for
__llvm_retpoline_push, but they never got committed as lit tests, either
because I never rewrote them or because they got lost in merge conflicts.
Reviewers: chandlerc, dwmw2
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D43214
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325090 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++++----------------------
lib/Target/X86/X86RetpolineThunks.cpp | 42 ++++++++---------------------
test/CodeGen/X86/retpoline-regparm.ll | 42 +++++++++++++++++++++++++++++
test/CodeGen/X86/retpoline.ll | 14 ++++------
4 files changed, 76 insertions(+), 72 deletions(-)
create mode 100644 test/CodeGen/X86/retpoline-regparm.ll
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9aa3023..59a9832 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -26265,9 +26265,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
// attempt to help out kernels and other systems where duplicating the
// thunks is costly.
switch (Reg) {
- case 0:
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
- return "__x86_indirect_thunk";
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_eax";
@@ -26277,6 +26274,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_edi";
case X86::R11:
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
@@ -26286,9 +26286,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
// When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
- case 0:
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
- return "__llvm_retpoline_push";
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_eax";
@@ -26298,6 +26295,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edi";
case X86::R11:
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__llvm_retpoline_r11";
@@ -26319,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
// just use R11, but we scan for uses anyway to ensure we don't generate
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
// already a register use operand to the call to hold the callee. If none
- // are available, push the callee instead. This is less efficient, but is
- // necessary for functions using 3 regparms. Such function calls are
- // (currently) not eligible for tail call optimization, because there is no
- // scratch register available to hold the address of the callee.
+ // are available, use EDI instead. EDI is chosen because EBX is the PIC base
+ // register and ESI is the base pointer to realigned stack frames with VLAs.
SmallVector<unsigned, 3> AvailableRegs;
if (Subtarget.is64Bit())
AvailableRegs.push_back(X86::R11);
else
- AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
+ AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
// Zero out any registers that are already used.
for (const auto &MO : MI.operands()) {
@@ -26345,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
break;
}
}
+ if (!AvailableReg)
+ report_fatal_error("calling convention incompatible with retpoline, no "
+ "available registers");
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
- if (AvailableReg == 0) {
- // No register available. Use PUSH. This must not be a tailcall, and this
- // must not be x64.
- if (Subtarget.is64Bit())
- report_fatal_error(
- "Cannot make an indirect call on x86-64 using both retpoline and a "
- "calling convention that preservers r11");
- if (Opc != X86::CALLpcrel32)
- report_fatal_error("Cannot make an indirect tail call on x86 using "
- "retpoline without a preserved register");
- BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
- MI.getOperand(0).ChangeToES(Symbol);
- MI.setDesc(TII->get(Opc));
- } else {
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
- .addReg(CalleeVReg);
- MI.getOperand(0).ChangeToES(Symbol);
- MI.setDesc(TII->get(Opc));
- MachineInstrBuilder(*BB->getParent(), &MI)
- .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
- }
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
+ .addReg(CalleeVReg);
+ MI.getOperand(0).ChangeToES(Symbol);
+ MI.setDesc(TII->get(Opc));
+ MachineInstrBuilder(*BB->getParent(), &MI)
+ .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
return BB;
}
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 223fa57..59ace3f 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
static const char EAXThunkName[] = "__llvm_retpoline_eax";
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
static const char EDXThunkName[] = "__llvm_retpoline_edx";
-static const char PushThunkName[] = "__llvm_retpoline_push";
+static const char EDIThunkName[] = "__llvm_retpoline_edi";
namespace {
class X86RetpolineThunks : public MachineFunctionPass {
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
createThunkFunction(M, R11ThunkName);
else
for (StringRef Name :
- {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
+ {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
createThunkFunction(M, Name);
InsertedThunks = true;
return true;
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
populateThunk(MF, X86::R11);
} else {
// For 32-bit targets we need to emit a collection of thunks for various
- // possible scratch registers as well as a fallback that is used when
- // there are no scratch registers and assumes the retpoline target has
- // been pushed.
+ // possible scratch registers as well as a fallback that uses EDI, which is
+ // normally callee saved.
// __llvm_retpoline_eax:
// calll .Leax_call_target
// .Leax_capture_spec:
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
// movl %edx, (%esp)
// retl
//
- // This last one is a bit more special and so needs a little extra
- // handling.
- // __llvm_retpoline_push:
- // calll .Lpush_call_target
- // .Lpush_capture_spec:
- // pause
- // lfence
- // jmp .Lpush_capture_spec
- // .align 16
- // .Lpush_call_target:
- // # Clear pause_loop return address.
- // addl $4, %esp
- // # Top of stack words are: Callee, RA. Exchange Callee and RA.
- // pushl 4(%esp) # Push callee
- // pushl 4(%esp) # Push RA
- // popl 8(%esp) # Pop RA to final RA
- // popl (%esp) # Pop callee to next top of stack
- // retl # Ret to callee
+ // __llvm_retpoline_edi:
+ // ... # Same setup
+ // movl %edi, (%esp)
+ // retl
if (MF.getName() == EAXThunkName)
populateThunk(MF, X86::EAX);
else if (MF.getName() == ECXThunkName)
populateThunk(MF, X86::ECX);
else if (MF.getName() == EDXThunkName)
populateThunk(MF, X86::EDX);
- else if (MF.getName() == PushThunkName)
- populateThunk(MF);
+ else if (MF.getName() == EDIThunkName)
+ populateThunk(MF, X86::EDI);
else
llvm_unreachable("Invalid thunk name on x86-32!");
}
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CaptureSpec->addSuccessor(CaptureSpec);
CallTarget->setAlignment(4);
- if (Reg) {
- insertRegReturnAddrClobber(*CallTarget, *Reg);
- } else {
- assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
- insert32BitPushReturnAddrClobber(*CallTarget);
- }
+ insertRegReturnAddrClobber(*CallTarget, *Reg);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
diff --git a/test/CodeGen/X86/retpoline-regparm.ll b/test/CodeGen/X86/retpoline-regparm.ll
new file mode 100644
index 0000000..13b3274
--- /dev/null
+++ b/test/CodeGen/X86/retpoline-regparm.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
+
+; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
+; because there are no available scratch registers. The Linux kernel builds
+; with -mregparm=3, so we need to support it. TCO should fail because we need
+; to restore EDI.
+
+define void @call_edi(void (i32, i32, i32)* %fp) #0 {
+entry:
+ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+ ret void
+}
+
+; CHECK-LABEL: call_edi:
+; EDI is used, so it must be saved.
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __llvm_retpoline_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+define void @edi_external(void (i32, i32, i32)* %fp) #1 {
+entry:
+ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+ ret void
+}
+
+; CHECK-LABEL: edi_external:
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __x86_indirect_thunk_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+attributes #0 = { "target-features"="+retpoline" }
+attributes #1 = { "target-features"="+retpoline-external-thunk" }
diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll
index b0d4c85..562386e 100644
--- a/test/CodeGen/X86/retpoline.ll
+++ b/test/CodeGen/X86/retpoline.ll
@@ -336,10 +336,10 @@ latch:
; X86-NEXT: movl %edx, (%esp)
; X86-NEXT: retl
;
-; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
-; X86-NEXT: .hidden __llvm_retpoline_push
-; X86-NEXT: .weak __llvm_retpoline_push
-; X86: __llvm_retpoline_push:
+; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
+; X86-NEXT: .hidden __llvm_retpoline_edi
+; X86-NEXT: .weak __llvm_retpoline_edi
+; X86: __llvm_retpoline_edi:
; X86-NEXT: # {{.*}} # %entry
; X86-NEXT: calll [[CALL_TARGET:.*]]
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
@@ -351,11 +351,7 @@ latch:
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: # %entry
-; X86-NEXT: addl $4, %esp
-; X86-NEXT: pushl 4(%esp)
-; X86-NEXT: pushl 4(%esp)
-; X86-NEXT: popl 8(%esp)
-; X86-NEXT: popl (%esp)
+; X86-NEXT: movl %edi, (%esp)
; X86-NEXT: retl
--
1.8.3.1

View File

@ -0,0 +1,65 @@
From de9a0f9c449d4b13c70eff8c9a3023948dc21cb7 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:34:35 +0000
Subject: [PATCH 4/4] Merging r325085:
------------------------------------------------------------------------
r325085 | rnk | 2018-02-13 16:24:29 -0800 (Tue, 13 Feb 2018) | 3 lines
[X86] Remove dead code from retpoline thunk generation
Follow-up to r325049
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325091 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86RetpolineThunks.cpp | 26 --------------------------
1 file changed, 26 deletions(-)
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 59ace3f..d03826b 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -74,7 +74,6 @@ private:
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
};
@@ -225,31 +224,6 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
.addReg(Reg);
}
-void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
- MachineBasicBlock &MBB) {
- // The instruction sequence we use to replace the return address without
- // a scratch register is somewhat complicated:
- // # Clear capture_spec from return address.
- // addl $4, %esp
- // # Top of stack words are: Callee, RA. Exchange Callee and RA.
- // pushl 4(%esp) # Push callee
- // pushl 4(%esp) # Push RA
- // popl 8(%esp) # Pop RA to final RA
- // popl (%esp) # Pop callee to next top of stack
- // retl # Ret to callee
- BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP)
- .addReg(X86::ESP)
- .addImm(4);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
- false, 4);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
- false, 4);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
- false, 8);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
- false, 0);
-}
-
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
Optional<unsigned> Reg) {
// Set MF properties. We never use vregs...
--
1.8.3.1

View File

@ -12,7 +12,7 @@
Name: llvm
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
Release: 3%{?dist}
Release: 4%{?dist}
Summary: The Low Level Virtual Machine
License: NCSA
@ -30,6 +30,10 @@ Patch6: 0001-Ignore-all-duplicate-frame-index-expression.patch
Patch7: 0002-Reinstantiate-old-bad-deduplication-logic-that-was-r.patch
Patch8: 0001-Merging-r323155.patch
Patch9: 0001-Merging-r323915.patch
Patch10: 0001-Merging-r324449.patch
Patch11: 0002-Merging-r324645.patch
Patch12: 0003-Merging-r325049.patch
Patch13: 0004-Merging-r325085.patch
BuildRequires: cmake
BuildRequires: zlib-devel
@ -220,6 +224,9 @@ fi
%{_libdir}/cmake/llvm/LLVMStaticExports.cmake
%changelog
* Wed Mar 07 2018 Tom Stellard <tstellar@redhat.com> - 5.0.1-4
- Backport more retpoline patches
* Tue Feb 06 2018 Tom Stellard <tstellar@redhat.com> - 5.0.1-3
- Backport retpoline support