5.0.2 Release

This commit is contained in:
Tom Stellard 2018-05-04 04:23:03 +00:00
parent 6640c739a1
commit 6114ffe50f
9 changed files with 7 additions and 2999 deletions

1
.gitignore vendored
View File

@ -34,3 +34,4 @@
/llvm-4.0.1.src.tar.xz
/llvm-5.0.0.src.tar.xz
/llvm-5.0.1.src.tar.xz
/llvm-5.0.2.src.tar.xz

File diff suppressed because it is too large Load Diff

View File

@ -1,287 +0,0 @@
From b4b2cc0cca3595185683aa7aa4d29c4a151a679e Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Thu, 1 Feb 2018 21:31:35 +0000
Subject: [PATCH] Merging r323915:
------------------------------------------------------------------------
r323915 | chandlerc | 2018-01-31 12:56:37 -0800 (Wed, 31 Jan 2018) | 17 lines
[x86] Make the retpoline thunk insertion a machine function pass.
Summary:
This removes the need for a machine module pass using some deeply
questionable hacks. This should address PR36123 which is a case where in
full LTO the memory usage of a machine module pass actually ended up
being significant.
We should revert this on trunk as soon as we understand and fix the
memory usage issue, but we should include this in any backports of
retpolines themselves.
Reviewers: echristo, MatzeB
Subscribers: sanjoy, mcrosier, mehdi_amini, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D42726
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@324009 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86.h | 2 +-
lib/Target/X86/X86RetpolineThunks.cpp | 135 +++++++++++++++++++++-------------
test/CodeGen/X86/O0-pipeline.ll | 3 +-
3 files changed, 87 insertions(+), 53 deletions(-)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 25e4b89..2e3ace2 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -100,7 +100,7 @@ void initializeFixupBWInstPassPass(PassRegistry &);
FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
-ModulePass *createX86RetpolineThunksPass();
+FunctionPass *createX86RetpolineThunksPass();
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 6b4bc8a..223fa57 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -38,18 +38,27 @@ using namespace llvm;
#define DEBUG_TYPE "x86-retpoline-thunks"
+static const char ThunkNamePrefix[] = "__llvm_retpoline_";
+static const char R11ThunkName[] = "__llvm_retpoline_r11";
+static const char EAXThunkName[] = "__llvm_retpoline_eax";
+static const char ECXThunkName[] = "__llvm_retpoline_ecx";
+static const char EDXThunkName[] = "__llvm_retpoline_edx";
+static const char PushThunkName[] = "__llvm_retpoline_push";
+
namespace {
-class X86RetpolineThunks : public ModulePass {
+class X86RetpolineThunks : public MachineFunctionPass {
public:
static char ID;
- X86RetpolineThunks() : ModulePass(ID) {}
+ X86RetpolineThunks() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
- bool runOnModule(Module &M) override;
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfo>();
AU.addPreserved<MachineModuleInfo>();
}
@@ -61,51 +70,74 @@ private:
const X86Subtarget *STI;
const X86InstrInfo *TII;
- Function *createThunkFunction(Module &M, StringRef Name);
+ bool InsertedThunks;
+
+ void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
- void createThunk(Module &M, StringRef NameSuffix,
- Optional<unsigned> Reg = None);
+ void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
};
} // end anonymous namespace
-ModulePass *llvm::createX86RetpolineThunksPass() {
+FunctionPass *llvm::createX86RetpolineThunksPass() {
return new X86RetpolineThunks();
}
char X86RetpolineThunks::ID = 0;
-bool X86RetpolineThunks::runOnModule(Module &M) {
- DEBUG(dbgs() << getPassName() << '\n');
+bool X86RetpolineThunks::doInitialization(Module &M) {
+ InsertedThunks = false;
+ return false;
+}
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- assert(TPC && "X86-specific target pass should not be run without a target "
- "pass config!");
+bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << getPassName() << '\n');
- MMI = &getAnalysis<MachineModuleInfo>();
- TM = &TPC->getTM<TargetMachine>();
+ TM = &MF.getTarget();;
+ STI = &MF.getSubtarget<X86Subtarget>();
+ TII = STI->getInstrInfo();
Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
- // Only add a thunk if we have at least one function that has the retpoline
- // feature enabled in its subtarget.
- // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
- // nothing will end up calling it.
- // FIXME: It's a little silly to look at every function just to enumerate
- // the subtargets, but eventually we'll want to look at them for indirect
- // calls, so maybe this is OK.
- if (!llvm::any_of(M, [&](const Function &F) {
- // Save the subtarget we find for use in emitting the subsequent
- // thunk.
- STI = &TM->getSubtarget<X86Subtarget>(F);
- return STI->useRetpoline() && !STI->useRetpolineExternalThunk();
- }))
- return false;
-
- // If we have a relevant subtarget, get the instr info as well.
- TII = STI->getInstrInfo();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ Module &M = const_cast<Module &>(*MMI->getModule());
+
+ // If this function is not a thunk, check to see if we need to insert
+ // a thunk.
+ if (!MF.getName().startswith(ThunkNamePrefix)) {
+ // If we've already inserted a thunk, nothing else to do.
+ if (InsertedThunks)
+ return false;
+
+ // Only add a thunk if one of the functions has the retpoline feature
+ // enabled in its subtarget, and doesn't enable external thunks.
+ // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+ // nothing will end up calling it.
+ // FIXME: It's a little silly to look at every function just to enumerate
+ // the subtargets, but eventually we'll want to look at them for indirect
+ // calls, so maybe this is OK.
+ if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
+ return false;
+
+ // Otherwise, we need to insert the thunk.
+ // WARNING: This is not really a well behaving thing to do in a function
+ // pass. We extract the module and insert a new function (and machine
+ // function) directly into the module.
+ if (Is64Bit)
+ createThunkFunction(M, R11ThunkName);
+ else
+ for (StringRef Name :
+ {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
+ createThunkFunction(M, Name);
+ InsertedThunks = true;
+ return true;
+ }
+ // If this *is* a thunk function, we need to populate it with the correct MI.
if (Is64Bit) {
+ assert(MF.getName() == "__llvm_retpoline_r11" &&
+ "Should only have an r11 thunk on 64-bit targets");
+
// __llvm_retpoline_r11:
// callq .Lr11_call_target
// .Lr11_capture_spec:
@@ -116,8 +148,7 @@ bool X86RetpolineThunks::runOnModule(Module &M) {
// .Lr11_call_target:
// movq %r11, (%rsp)
// retq
-
- createThunk(M, "r11", X86::R11);
+ populateThunk(MF, X86::R11);
} else {
// For 32-bit targets we need to emit a collection of thunks for various
// possible scratch registers as well as a fallback that is used when
@@ -161,16 +192,25 @@ bool X86RetpolineThunks::runOnModule(Module &M) {
// popl 8(%esp) # Pop RA to final RA
// popl (%esp) # Pop callee to next top of stack
// retl # Ret to callee
- createThunk(M, "eax", X86::EAX);
- createThunk(M, "ecx", X86::ECX);
- createThunk(M, "edx", X86::EDX);
- createThunk(M, "push");
+ if (MF.getName() == EAXThunkName)
+ populateThunk(MF, X86::EAX);
+ else if (MF.getName() == ECXThunkName)
+ populateThunk(MF, X86::ECX);
+ else if (MF.getName() == EDXThunkName)
+ populateThunk(MF, X86::EDX);
+ else if (MF.getName() == PushThunkName)
+ populateThunk(MF);
+ else
+ llvm_unreachable("Invalid thunk name on x86-32!");
}
return true;
}
-Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
+void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
+ assert(Name.startswith(ThunkNamePrefix) &&
+ "Created a thunk with an unexpected prefix!");
+
LLVMContext &Ctx = M.getContext();
auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
Function *F =
@@ -190,7 +230,6 @@ Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
IRBuilder<> Builder(Entry);
Builder.CreateRetVoid();
- return F;
}
void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
@@ -200,6 +239,7 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
.addReg(Reg);
}
+
void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
MachineBasicBlock &MBB) {
// The instruction sequence we use to replace the return address without
@@ -225,21 +265,16 @@ void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
false, 0);
}
-void X86RetpolineThunks::createThunk(Module &M, StringRef NameSuffix,
- Optional<unsigned> Reg) {
- Function &F =
- *createThunkFunction(M, (Twine("__llvm_retpoline_") + NameSuffix).str());
- MachineFunction &MF = MMI->getOrCreateMachineFunction(F);
-
+void X86RetpolineThunks::populateThunk(MachineFunction &MF,
+ Optional<unsigned> Reg) {
// Set MF properties. We never use vregs...
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
- BasicBlock &OrigEntryBB = F.getEntryBlock();
- MachineBasicBlock *Entry = MF.CreateMachineBasicBlock(&OrigEntryBB);
- MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(&OrigEntryBB);
- MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(&OrigEntryBB);
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
- MF.push_back(Entry);
+ MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MF.push_back(CaptureSpec);
MF.push_back(CallTarget);
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index f9bd66f..123dcf6 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll
@@ -56,8 +56,7 @@
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
-; CHECK-NEXT: X86 Retpoline Thunks
-; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: X86 Retpoline Thunks
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: MachineDominator Tree Construction
--
1.8.3.1

View File

@ -1,237 +0,0 @@
From 4e5fddc22a28e0e59d6409a98fb22eba32d0eae7 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:32:26 +0000
Subject: [PATCH 1/4] Merging r324449:
------------------------------------------------------------------------
r324449 | chandlerc | 2018-02-06 22:16:24 -0800 (Tue, 06 Feb 2018) | 15 lines
[x86/retpoline] Make the external thunk names exactly match the names
that happened to end up in GCC.
This is really unfortunate, as the names don't have much rhyme or reason
to them. Originally in the discussions it seemed fine to rely on aliases
to map different names to whatever external thunk code developers wished
to use but there are practical problems with that in the kernel it turns
out. And since we're discovering this practical problems late and since
GCC has already shipped a release with one set of names, we are forced,
yet again, to blindly match what is there.
Somewhat rushing this patch out for the Linux kernel folks to test and
so we can get it patched into our releases.
Differential Revision: https://reviews.llvm.org/D42998
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325088 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86ISelLowering.cpp | 59 +++++++++++++++++++++++++---------
test/CodeGen/X86/retpoline-external.ll | 48 +++++++++++++--------------
2 files changed, 68 insertions(+), 39 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2c2294d..9aa3023 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
unsigned Reg) {
+ if (Subtarget.useRetpolineExternalThunk()) {
+ // When using an external thunk for retpolines, we pick names that match the
+ // names GCC happens to use as well. This helps simplify the implementation
+ // of the thunks for kernels where they have no easy ability to create
+ // aliases and are doing non-trivial configuration of the thunk's body. For
+ // example, the Linux kernel will do boot-time hot patching of the thunk
+ // bodies and cannot easily export aliases of these to loaded modules.
+ //
+ // Note that at any point in the future, we may need to change the semantics
+ // of how we implement retpolines and at that time will likely change the
+ // name of the called thunk. Essentially, there is no hard guarantee that
+ // LLVM will generate calls to specific thunks, we merely make a best-effort
+ // attempt to help out kernels and other systems where duplicating the
+ // thunks is costly.
+ switch (Reg) {
+ case 0:
+ assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
+ return "__x86_indirect_thunk";
+ case X86::EAX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_eax";
+ case X86::ECX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_ecx";
+ case X86::EDX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_edx";
+ case X86::R11:
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__x86_indirect_thunk_r11";
+ }
+ llvm_unreachable("unexpected reg for retpoline");
+ }
+
+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
case 0:
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_push"
- : "__llvm_retpoline_push";
+ return "__llvm_retpoline_push";
case X86::EAX:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_eax"
- : "__llvm_retpoline_eax";
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_eax";
case X86::ECX:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_ecx"
- : "__llvm_retpoline_ecx";
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_ecx";
case X86::EDX:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_edx"
- : "__llvm_retpoline_edx";
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edx";
case X86::R11:
- return Subtarget.useRetpolineExternalThunk()
- ? "__llvm_external_retpoline_r11"
- : "__llvm_retpoline_r11";
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__llvm_retpoline_r11";
}
llvm_unreachable("unexpected reg for retpoline");
}
diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll
index 66d32ba..2f21bb2 100644
--- a/test/CodeGen/X86/retpoline-external.ll
+++ b/test/CodeGen/X86/retpoline-external.ll
@@ -23,18 +23,18 @@ entry:
; X64: callq bar
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq %[[fp]], %r11
-; X64: callq __llvm_external_retpoline_r11
+; X64: callq __x86_indirect_thunk_r11
; X64: movl %[[x]], %edi
; X64: callq bar
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq %[[fp]], %r11
-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
; X64FAST-LABEL: icall_reg:
; X64FAST: callq bar
-; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: callq __x86_indirect_thunk_r11
; X64FAST: callq bar
-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
; X86-LABEL: icall_reg:
; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
@@ -43,19 +43,19 @@ entry:
; X86: calll bar
; X86: movl %[[fp]], %eax
; X86: pushl %[[x]]
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86: pushl %[[x]]
; X86: calll bar
; X86: movl %[[fp]], %eax
; X86: pushl %[[x]]
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86-NOT: # TAILCALL
; X86FAST-LABEL: icall_reg:
; X86FAST: calll bar
-; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: calll __x86_indirect_thunk_eax
; X86FAST: calll bar
-; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: calll __x86_indirect_thunk_eax
@global_fp = external global void (i32)*
@@ -72,28 +72,28 @@ define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
; X64-LABEL: icall_global_fp:
; X64-DAG: movl %edi, %[[x:[^ ]*]]
; X64-DAG: movq global_fp(%rip), %r11
-; X64: callq __llvm_external_retpoline_r11
+; X64: callq __x86_indirect_thunk_r11
; X64-DAG: movl %[[x]], %edi
; X64-DAG: movq global_fp(%rip), %r11
-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
; X64FAST-LABEL: icall_global_fp:
; X64FAST: movq global_fp(%rip), %r11
-; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: callq __x86_indirect_thunk_r11
; X64FAST: movq global_fp(%rip), %r11
-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
; X86-LABEL: icall_global_fp:
; X86: movl global_fp, %eax
; X86: pushl 4(%esp)
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86: addl $4, %esp
; X86: movl global_fp, %eax
-; X86: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86: jmp __x86_indirect_thunk_eax # TAILCALL
; X86FAST-LABEL: icall_global_fp:
-; X86FAST: calll __llvm_external_retpoline_eax
-; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86FAST: calll __x86_indirect_thunk_eax
+; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL
%struct.Foo = type { void (%struct.Foo*)** }
@@ -114,14 +114,14 @@ define void @vcall(%struct.Foo* %obj) #0 {
; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]
; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
; X64: movq %[[fp]], %r11
-; X64: callq __llvm_external_retpoline_r11
+; X64: callq __x86_indirect_thunk_r11
; X64-DAG: movq %[[obj]], %rdi
; X64-DAG: movq %[[fp]], %r11
-; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
; X64FAST-LABEL: vcall:
-; X64FAST: callq __llvm_external_retpoline_r11
-; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+; X64FAST: callq __x86_indirect_thunk_r11
+; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
; X86-LABEL: vcall:
; X86: movl 8(%esp), %[[obj:[^ ]*]]
@@ -129,14 +129,14 @@ define void @vcall(%struct.Foo* %obj) #0 {
; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
; X86: movl %[[fp]], %eax
; X86: pushl %[[obj]]
-; X86: calll __llvm_external_retpoline_eax
+; X86: calll __x86_indirect_thunk_eax
; X86: addl $4, %esp
; X86: movl %[[fp]], %eax
-; X86: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86: jmp __x86_indirect_thunk_eax # TAILCALL
; X86FAST-LABEL: vcall:
-; X86FAST: calll __llvm_external_retpoline_eax
-; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
+; X86FAST: calll __x86_indirect_thunk_eax
+; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL
declare void @direct_callee()
--
1.8.3.1

View File

@ -1,88 +0,0 @@
From 8f5f7f9cb15387ddb010894c17e788b3116fe26d Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:33:00 +0000
Subject: [PATCH 2/4] Merging r324645:
------------------------------------------------------------------------
r324645 | dwmw2 | 2018-02-08 12:06:05 -0800 (Thu, 08 Feb 2018) | 5 lines
[X86] Support 'V' register operand modifier
This allows the register name to be printed without the leading '%'.
This can be used for emitting calls to the retpoline thunks from inline
asm.
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325089 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86AsmPrinter.cpp | 11 ++++++++++-
test/CodeGen/X86/inline-asm-modifier-V.ll | 14 ++++++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
create mode 100644 test/CodeGen/X86/inline-asm-modifier-V.ll
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index dc15aea..8c7ddd9 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -344,6 +344,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
unsigned Reg = MO.getReg();
+ bool EmitPercent = true;
+
switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
@@ -358,6 +360,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
case 'k': // Print SImode register
Reg = getX86SubSuperRegister(Reg, 32);
break;
+ case 'V':
+ EmitPercent = false;
+ LLVM_FALLTHROUGH;
case 'q':
// Print 64-bit register names if 64-bit integer registers are available.
// Otherwise, print 32-bit register names.
@@ -365,7 +370,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
break;
}
- O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ if (EmitPercent)
+ O << '%';
+
+ O << X86ATTInstPrinter::getRegisterName(Reg);
return false;
}
@@ -438,6 +446,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 'w': // Print HImode register
case 'k': // Print SImode register
case 'q': // Print DImode register
+ case 'V': // Print native register without '%'
if (MO.isReg())
return printAsmMRegister(*this, MO, ExtraCode[0], O);
printOperand(*this, MI, OpNo, O);
diff --git a/test/CodeGen/X86/inline-asm-modifier-V.ll b/test/CodeGen/X86/inline-asm-modifier-V.ll
new file mode 100644
index 0000000..5a7f3fd
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-modifier-V.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s
+
+; If the target does not have 64-bit integer registers, emit 32-bit register
+; names.
+
+; X86: call __x86_indirect_thunk_e{{[abcd]}}x
+; X64: call __x86_indirect_thunk_r
+
+define void @q_modifier(i32* %p) {
+entry:
+ tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
+ ret void
+}
--
1.8.3.1

View File

@ -1,308 +0,0 @@
From 4594a6164d5ae9252825e23a95aa6f2fce304d6e Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:34:13 +0000
Subject: [PATCH 3/4] Merging r325049:
------------------------------------------------------------------------
r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines
[X86] Use EDI for retpoline when no scratch regs are left
Summary:
Instead of solving the hard problem of how to pass the callee to the indirect
jump thunk without a register, just use a CSR. At a call boundary, there's
nothing stopping us from using a CSR to hold the callee as long as we save and
restore it in the prologue.
Also, add tests for this mregparm=3 case. I wrote execution tests for
__llvm_retpoline_push, but they never got committed as lit tests, either
because I never rewrote them or because they got lost in merge conflicts.
Reviewers: chandlerc, dwmw2
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D43214
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325090 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++++----------------------
lib/Target/X86/X86RetpolineThunks.cpp | 42 ++++++++---------------------
test/CodeGen/X86/retpoline-regparm.ll | 42 +++++++++++++++++++++++++++++
test/CodeGen/X86/retpoline.ll | 14 ++++------
4 files changed, 76 insertions(+), 72 deletions(-)
create mode 100644 test/CodeGen/X86/retpoline-regparm.ll
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9aa3023..59a9832 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -26265,9 +26265,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
// attempt to help out kernels and other systems where duplicating the
// thunks is costly.
switch (Reg) {
- case 0:
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
- return "__x86_indirect_thunk";
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_eax";
@@ -26277,6 +26274,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__x86_indirect_thunk_edi";
case X86::R11:
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
@@ -26286,9 +26286,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
// When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
- case 0:
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
- return "__llvm_retpoline_push";
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_eax";
@@ -26298,6 +26295,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edi";
case X86::R11:
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__llvm_retpoline_r11";
@@ -26319,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
// just use R11, but we scan for uses anyway to ensure we don't generate
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
// already a register use operand to the call to hold the callee. If none
- // are available, push the callee instead. This is less efficient, but is
- // necessary for functions using 3 regparms. Such function calls are
- // (currently) not eligible for tail call optimization, because there is no
- // scratch register available to hold the address of the callee.
+ // are available, use EDI instead. EDI is chosen because EBX is the PIC base
+ // register and ESI is the base pointer to realigned stack frames with VLAs.
SmallVector<unsigned, 3> AvailableRegs;
if (Subtarget.is64Bit())
AvailableRegs.push_back(X86::R11);
else
- AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
+ AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
// Zero out any registers that are already used.
for (const auto &MO : MI.operands()) {
@@ -26345,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
break;
}
}
+ if (!AvailableReg)
+ report_fatal_error("calling convention incompatible with retpoline, no "
+ "available registers");
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
- if (AvailableReg == 0) {
- // No register available. Use PUSH. This must not be a tailcall, and this
- // must not be x64.
- if (Subtarget.is64Bit())
- report_fatal_error(
- "Cannot make an indirect call on x86-64 using both retpoline and a "
- "calling convention that preservers r11");
- if (Opc != X86::CALLpcrel32)
- report_fatal_error("Cannot make an indirect tail call on x86 using "
- "retpoline without a preserved register");
- BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
- MI.getOperand(0).ChangeToES(Symbol);
- MI.setDesc(TII->get(Opc));
- } else {
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
- .addReg(CalleeVReg);
- MI.getOperand(0).ChangeToES(Symbol);
- MI.setDesc(TII->get(Opc));
- MachineInstrBuilder(*BB->getParent(), &MI)
- .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
- }
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
+ .addReg(CalleeVReg);
+ MI.getOperand(0).ChangeToES(Symbol);
+ MI.setDesc(TII->get(Opc));
+ MachineInstrBuilder(*BB->getParent(), &MI)
+ .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
return BB;
}
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 223fa57..59ace3f 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
static const char EAXThunkName[] = "__llvm_retpoline_eax";
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
static const char EDXThunkName[] = "__llvm_retpoline_edx";
-static const char PushThunkName[] = "__llvm_retpoline_push";
+static const char EDIThunkName[] = "__llvm_retpoline_edi";
namespace {
class X86RetpolineThunks : public MachineFunctionPass {
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
createThunkFunction(M, R11ThunkName);
else
for (StringRef Name :
- {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
+ {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
createThunkFunction(M, Name);
InsertedThunks = true;
return true;
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
populateThunk(MF, X86::R11);
} else {
// For 32-bit targets we need to emit a collection of thunks for various
- // possible scratch registers as well as a fallback that is used when
- // there are no scratch registers and assumes the retpoline target has
- // been pushed.
+ // possible scratch registers as well as a fallback that uses EDI, which is
+ // normally callee saved.
// __llvm_retpoline_eax:
// calll .Leax_call_target
// .Leax_capture_spec:
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
// movl %edx, (%esp)
// retl
//
- // This last one is a bit more special and so needs a little extra
- // handling.
- // __llvm_retpoline_push:
- // calll .Lpush_call_target
- // .Lpush_capture_spec:
- // pause
- // lfence
- // jmp .Lpush_capture_spec
- // .align 16
- // .Lpush_call_target:
- // # Clear pause_loop return address.
- // addl $4, %esp
- // # Top of stack words are: Callee, RA. Exchange Callee and RA.
- // pushl 4(%esp) # Push callee
- // pushl 4(%esp) # Push RA
- // popl 8(%esp) # Pop RA to final RA
- // popl (%esp) # Pop callee to next top of stack
- // retl # Ret to callee
+ // __llvm_retpoline_edi:
+ // ... # Same setup
+ // movl %edi, (%esp)
+ // retl
if (MF.getName() == EAXThunkName)
populateThunk(MF, X86::EAX);
else if (MF.getName() == ECXThunkName)
populateThunk(MF, X86::ECX);
else if (MF.getName() == EDXThunkName)
populateThunk(MF, X86::EDX);
- else if (MF.getName() == PushThunkName)
- populateThunk(MF);
+ else if (MF.getName() == EDIThunkName)
+ populateThunk(MF, X86::EDI);
else
llvm_unreachable("Invalid thunk name on x86-32!");
}
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CaptureSpec->addSuccessor(CaptureSpec);
CallTarget->setAlignment(4);
- if (Reg) {
- insertRegReturnAddrClobber(*CallTarget, *Reg);
- } else {
- assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
- insert32BitPushReturnAddrClobber(*CallTarget);
- }
+ insertRegReturnAddrClobber(*CallTarget, *Reg);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
diff --git a/test/CodeGen/X86/retpoline-regparm.ll b/test/CodeGen/X86/retpoline-regparm.ll
new file mode 100644
index 0000000..13b3274
--- /dev/null
+++ b/test/CodeGen/X86/retpoline-regparm.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
+
+; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
+; because there are no available scratch registers. The Linux kernel builds
+; with -mregparm=3, so we need to support it. TCO should fail because we need
+; to restore EDI.
+
+define void @call_edi(void (i32, i32, i32)* %fp) #0 {
+entry:
+ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+ ret void
+}
+
+; CHECK-LABEL: call_edi:
+; EDI is used, so it must be saved.
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __llvm_retpoline_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+define void @edi_external(void (i32, i32, i32)* %fp) #1 {
+entry:
+ tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+ ret void
+}
+
+; CHECK-LABEL: edi_external:
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __x86_indirect_thunk_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+attributes #0 = { "target-features"="+retpoline" }
+attributes #1 = { "target-features"="+retpoline-external-thunk" }
diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll
index b0d4c85..562386e 100644
--- a/test/CodeGen/X86/retpoline.ll
+++ b/test/CodeGen/X86/retpoline.ll
@@ -336,10 +336,10 @@ latch:
; X86-NEXT: movl %edx, (%esp)
; X86-NEXT: retl
;
-; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
-; X86-NEXT: .hidden __llvm_retpoline_push
-; X86-NEXT: .weak __llvm_retpoline_push
-; X86: __llvm_retpoline_push:
+; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
+; X86-NEXT: .hidden __llvm_retpoline_edi
+; X86-NEXT: .weak __llvm_retpoline_edi
+; X86: __llvm_retpoline_edi:
; X86-NEXT: # {{.*}} # %entry
; X86-NEXT: calll [[CALL_TARGET:.*]]
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
@@ -351,11 +351,7 @@ latch:
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: # %entry
-; X86-NEXT: addl $4, %esp
-; X86-NEXT: pushl 4(%esp)
-; X86-NEXT: pushl 4(%esp)
-; X86-NEXT: popl 8(%esp)
-; X86-NEXT: popl (%esp)
+; X86-NEXT: movl %edi, (%esp)
; X86-NEXT: retl
--
1.8.3.1

View File

@ -1,65 +0,0 @@
From de9a0f9c449d4b13c70eff8c9a3023948dc21cb7 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 14 Feb 2018 00:34:35 +0000
Subject: [PATCH 4/4] Merging r325085:
------------------------------------------------------------------------
r325085 | rnk | 2018-02-13 16:24:29 -0800 (Tue, 13 Feb 2018) | 3 lines
[X86] Remove dead code from retpoline thunk generation
Follow-up to r325049
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@325091 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86RetpolineThunks.cpp | 26 --------------------------
1 file changed, 26 deletions(-)
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 59ace3f..d03826b 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -74,7 +74,6 @@ private:
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
};
@@ -225,31 +224,6 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
.addReg(Reg);
}
-void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
- MachineBasicBlock &MBB) {
- // The instruction sequence we use to replace the return address without
- // a scratch register is somewhat complicated:
- // # Clear capture_spec from return address.
- // addl $4, %esp
- // # Top of stack words are: Callee, RA. Exchange Callee and RA.
- // pushl 4(%esp) # Push callee
- // pushl 4(%esp) # Push RA
- // popl 8(%esp) # Pop RA to final RA
- // popl (%esp) # Pop callee to next top of stack
- // retl # Ret to callee
- BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP)
- .addReg(X86::ESP)
- .addImm(4);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
- false, 4);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
- false, 4);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
- false, 8);
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
- false, 0);
-}
-
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
Optional<unsigned> Reg) {
// Set MF properties. We never use vregs...
--
1.8.3.1

View File

@ -8,11 +8,11 @@
%global llvm_bindir %{_libdir}/%{name}
%global maj_ver 5
%global min_ver 0
%global patch_ver 1
%global patch_ver 2
Name: llvm
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
Release: 6%{?dist}
Release: 1%{?dist}
Summary: The Low Level Virtual Machine
License: NCSA
@ -28,12 +28,6 @@ Patch4: 0001-Revert-Add-a-linker-script-to-version-LLVM-symbols.patch
Patch5: 0001-PowerPC-Don-t-use-xscvdpspn-on-the-P7.patch
Patch6: 0001-Ignore-all-duplicate-frame-index-expression.patch
Patch7: 0002-Reinstantiate-old-bad-deduplication-logic-that-was-r.patch
Patch8: 0001-Merging-r323155.patch
Patch9: 0001-Merging-r323915.patch
Patch10: 0001-Merging-r324449.patch
Patch11: 0002-Merging-r324645.patch
Patch12: 0003-Merging-r325049.patch
Patch13: 0004-Merging-r325085.patch
Patch14: 0001-PPC-Avoid-non-simple-MVT-in-STBRX-optimization.patch
BuildRequires: cmake
@ -218,6 +212,9 @@ fi
%{_libdir}/cmake/llvm/LLVMStaticExports.cmake
%changelog
* Thu May 03 2018 Tom Stellard <tstellar@redhat.com> - 5.0.2-1
- 5.0.2 Release
* Tue Mar 27 2018 Tom Stellard <tstellar@redhat.com> - 5.0.1-6
- Re-enable arm tests that used to hang

View File

@ -1 +1 @@
SHA512 (llvm-5.0.1.src.tar.xz) = bee1d45fca15ce725b1f2b1339b13eb6f750a3a321cfd099075477ec25835a8ca55b5366172c4aad46592dfd8afe372349ecf264f581463d017f9cee2d63c1cb
SHA512 (llvm-5.0.2.src.tar.xz) = 3588be5ed969c3f7f6f16f56a12a6af2814d3d3c960d4a36ffebb0446cc75f19220bccee7fc605f9b01f5d5c188a905a046193cc12dec42dd5922048b5c27fe1