Merge branch 'master' into f20
Signed-off-by: Adam Jackson <ajax@redhat.com>
This commit is contained in:
commit
8e35e823b9
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
||||
/compiler-rt-*.src.tar.gz
|
||||
/lldb-*.src.tar.gz
|
||||
/llvm-*.src.tar.gz
|
||||
/clang-3.4.src.tar.gz
|
||||
|
@ -1,22 +1,12 @@
|
||||
Hack the linker flags for shared libs for speed and memory usage
|
||||
diff -up llvm-3.4/tools/llvm-shlib/Makefile.orig llvm-3.4/tools/llvm-shlib/Makefile
|
||||
--- llvm-3.4/tools/llvm-shlib/Makefile.orig 2013-11-01 00:35:00.000000000 +1000
|
||||
+++ llvm-3.4/tools/llvm-shlib/Makefile 2014-01-14 10:13:20.069858909 +1000
|
||||
@@ -75,7 +75,7 @@ endif
|
||||
|
||||
---
|
||||
tools/llvm-shlib/Makefile | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
|
||||
index 6d6c6e9..4038df4 100644
|
||||
--- a/tools/llvm-shlib/Makefile
|
||||
+++ b/tools/llvm-shlib/Makefile
|
||||
@@ -76,7 +76,7 @@ endif
|
||||
|
||||
ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU))
|
||||
ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU GNU/kFreeBSD))
|
||||
# Don't allow unresolved symbols.
|
||||
- LLVMLibsOptions += -Wl,--no-undefined
|
||||
+ LLVMLibsOptions += -Wl,--no-undefined -Wl,-Bsymbolic
|
||||
endif
|
||||
|
||||
ifeq ($(HOST_OS),SunOS)
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
@ -1,23 +0,0 @@
|
||||
Fixes the build with gcc in gnu++98 and gnu++11 mode.
|
||||
|
||||
https://github.com/llvm-mirror/llvm/commit/d1bf52275daa86e838ebbffc71efd43fc8c416f4
|
||||
|
||||
---
|
||||
lib/Support/Unix/Memory.inc | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
|
||||
index 72a8af6..91dc7da 100644
|
||||
--- a/lib/Support/Unix/Memory.inc
|
||||
+++ b/lib/Support/Unix/Memory.inc
|
||||
@@ -33,6 +33,7 @@
|
||||
#endif
|
||||
|
||||
extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
|
||||
+extern "C" void __clear_cache(void *, void *);
|
||||
|
||||
namespace {
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
459
llvm-3.4-radeonsi-backport.patch
Normal file
459
llvm-3.4-radeonsi-backport.patch
Normal file
@ -0,0 +1,459 @@
|
||||
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
|
||||
index 99e1377..7105879 100644
|
||||
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
|
||||
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
|
||||
@@ -316,6 +316,37 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
|
||||
}
|
||||
}
|
||||
|
||||
+void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
|
||||
+ raw_ostream &O) {
|
||||
+ unsigned SImm16 = MI->getOperand(OpNo).getImm();
|
||||
+ unsigned Msg = SImm16 & 0xF;
|
||||
+ if (Msg == 2 || Msg == 3) {
|
||||
+ unsigned Op = (SImm16 >> 4) & 0xF;
|
||||
+ if (Msg == 3)
|
||||
+ O << "Gs_done(";
|
||||
+ else
|
||||
+ O << "Gs(";
|
||||
+ if (Op == 0) {
|
||||
+ O << "nop";
|
||||
+ } else {
|
||||
+ unsigned Stream = (SImm16 >> 8) & 0x3;
|
||||
+ if (Op == 1)
|
||||
+ O << "cut";
|
||||
+ else if (Op == 2)
|
||||
+ O << "emit";
|
||||
+ else if (Op == 3)
|
||||
+ O << "emit-cut";
|
||||
+ O << " stream " << Stream;
|
||||
+ }
|
||||
+ O << "), [m0] ";
|
||||
+ } else if (Msg == 1)
|
||||
+ O << "interrupt ";
|
||||
+ else if (Msg == 15)
|
||||
+ O << "system ";
|
||||
+ else
|
||||
+ O << "unknown(" << Msg << ") ";
|
||||
+}
|
||||
+
|
||||
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
// Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs
|
||||
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
|
||||
index 77af942..2876dd2 100644
|
||||
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
|
||||
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
|
||||
@@ -53,6 +53,7 @@ private:
|
||||
void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
+ void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
};
|
||||
|
||||
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
|
||||
index 7ef662e..695ec40 100644
|
||||
--- a/lib/Target/R600/SIInsertWaits.cpp
|
||||
+++ b/lib/Target/R600/SIInsertWaits.cpp
|
||||
@@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
|
||||
|
||||
Counters Result = ZeroCounts;
|
||||
|
||||
+ // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
|
||||
+ // but we also want to wait for any other outstanding transfers before
|
||||
+ // signalling other hardware blocks
|
||||
+ if (MI.getOpcode() == AMDGPU::S_SENDMSG)
|
||||
+ return LastIssued;
|
||||
+
|
||||
// For each register affected by this
|
||||
// instruction increase the result sequence
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
|
||||
index 4cd0daa..19d2171 100644
|
||||
--- a/lib/Target/R600/SIInstrInfo.td
|
||||
+++ b/lib/Target/R600/SIInstrInfo.td
|
||||
@@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
|
||||
|
||||
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
|
||||
|
||||
- let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
|
||||
- mayLoad = 1 in {
|
||||
-
|
||||
- let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
|
||||
- def _OFFEN : MUBUF <op, (outs regClass:$vdata),
|
||||
- (ins SReg_128:$srsrc, VReg_32:$vaddr),
|
||||
- asm#" $vdata, $srsrc + $vaddr", []>;
|
||||
- }
|
||||
-
|
||||
- let offen = 0, idxen = 1, addr64 = 0 in {
|
||||
- def _IDXEN : MUBUF <op, (outs regClass:$vdata),
|
||||
- (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
|
||||
- asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
|
||||
- }
|
||||
+ let lds = 0, mayLoad = 1 in {
|
||||
+
|
||||
+ let addr64 = 0 in {
|
||||
+
|
||||
+ let offen = 0, idxen = 0 in {
|
||||
+ def _OFFSET : MUBUF <op, (outs regClass:$vdata),
|
||||
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||
+ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
|
||||
+ i1imm:$slc, i1imm:$tfe),
|
||||
+ asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
+ }
|
||||
+
|
||||
+ let offen = 1, idxen = 0, offset = 0 in {
|
||||
+ def _OFFEN : MUBUF <op, (outs regClass:$vdata),
|
||||
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||
+ SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
|
||||
+ i1imm:$tfe),
|
||||
+ asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
+ }
|
||||
+
|
||||
+ let offen = 0, idxen = 1 in {
|
||||
+ def _IDXEN : MUBUF <op, (outs regClass:$vdata),
|
||||
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||
+ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
|
||||
+ i1imm:$slc, i1imm:$tfe),
|
||||
+ asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
+ }
|
||||
+
|
||||
+ let offen = 1, idxen = 1 in {
|
||||
+ def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
|
||||
+ (ins SReg_128:$srsrc, VReg_64:$vaddr,
|
||||
+ SSrc_32:$soffset, i1imm:$glc,
|
||||
+ i1imm:$slc, i1imm:$tfe),
|
||||
+ asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- let offen = 0, idxen = 0, addr64 = 1 in {
|
||||
- def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
|
||||
- (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
|
||||
- asm#" $vdata, $srsrc + $vaddr + $offset", []>;
|
||||
- }
|
||||
+ let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
|
||||
+ def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
|
||||
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
|
||||
+ asm#" $vdata, $srsrc + $vaddr + $offset", []>;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
|
||||
index 76f05eb..9acb9b6 100644
|
||||
--- a/lib/Target/R600/SIInstructions.td
|
||||
+++ b/lib/Target/R600/SIInstructions.td
|
||||
@@ -22,6 +22,10 @@ def InterpSlot : Operand<i32> {
|
||||
let PrintMethod = "printInterpSlot";
|
||||
}
|
||||
|
||||
+def SendMsgImm : Operand<i32> {
|
||||
+ let PrintMethod = "printSendMsg";
|
||||
+}
|
||||
+
|
||||
def isSI : Predicate<"Subtarget.getGeneration() "
|
||||
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
|
||||
|
||||
@@ -826,17 +830,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
|
||||
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
|
||||
[]
|
||||
>;
|
||||
-} // End hasSideEffects
|
||||
//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
|
||||
//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
|
||||
//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
|
||||
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
|
||||
+
|
||||
+let Uses = [EXEC] in {
|
||||
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
|
||||
+ [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
|
||||
+ > {
|
||||
+ let DisableEncoding = "$m0";
|
||||
+ }
|
||||
+} // End Uses = [EXEC]
|
||||
+
|
||||
//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
|
||||
//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
|
||||
//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
|
||||
//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
|
||||
//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
|
||||
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
|
||||
+} // End hasSideEffects
|
||||
|
||||
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
|
||||
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
|
||||
@@ -1305,8 +1317,8 @@ def SI_END_CF : InstSI <
|
||||
|
||||
def SI_KILL : InstSI <
|
||||
(outs),
|
||||
- (ins VReg_32:$src),
|
||||
- "SI_KIL $src",
|
||||
+ (ins VSrc_32:$src),
|
||||
+ "SI_KILL $src",
|
||||
[(int_AMDGPU_kill f32:$src)]
|
||||
>;
|
||||
|
||||
@@ -1397,13 +1409,13 @@ def : Pat<
|
||||
|
||||
def : Pat <
|
||||
(int_AMDGPU_kilp),
|
||||
- (SI_KILL (V_MOV_B32_e32 0xbf800000))
|
||||
+ (SI_KILL 0xbf800000)
|
||||
>;
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
|
||||
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
/* int_SI_export */
|
||||
@@ -1809,7 +1821,7 @@ def : Pat <
|
||||
// 3. Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant i128:$sbase, i32:$voff),
|
||||
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
|
||||
+ (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
// The multiplication scales from [0,1] to the unsigned integer range
|
||||
@@ -1970,6 +1982,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
|
||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
|
||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
|
||||
|
||||
+// BUFFER_LOAD_DWORD*, addr64=0
|
||||
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
|
||||
+ MUBUF bothen> {
|
||||
+
|
||||
+ def : Pat <
|
||||
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
+ imm:$offset, 0, 0, imm:$glc, imm:$slc,
|
||||
+ imm:$tfe)),
|
||||
+ (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||
+ (as_i1imm $slc), (as_i1imm $tfe))
|
||||
+ >;
|
||||
+
|
||||
+ def : Pat <
|
||||
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
+ imm, 1, 0, imm:$glc, imm:$slc,
|
||||
+ imm:$tfe)),
|
||||
+ (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||
+ (as_i1imm $tfe))
|
||||
+ >;
|
||||
+
|
||||
+ def : Pat <
|
||||
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
+ imm:$offset, 0, 1, imm:$glc, imm:$slc,
|
||||
+ imm:$tfe)),
|
||||
+ (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||
+ (as_i1imm $slc), (as_i1imm $tfe))
|
||||
+ >;
|
||||
+
|
||||
+ def : Pat <
|
||||
+ (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
|
||||
+ imm, 1, 1, imm:$glc, imm:$slc,
|
||||
+ imm:$tfe)),
|
||||
+ (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||
+ (as_i1imm $tfe))
|
||||
+ >;
|
||||
+}
|
||||
+
|
||||
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
|
||||
+ BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
|
||||
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
|
||||
+ BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
|
||||
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
|
||||
+ BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
|
||||
+
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
|
||||
index 7fcc964..00e32c0 100644
|
||||
--- a/lib/Target/R600/SIIntrinsics.td
|
||||
+++ b/lib/Target/R600/SIIntrinsics.td
|
||||
@@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||
llvm_i32_ty], // tfe(imm)
|
||||
[]>;
|
||||
|
||||
+ // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
|
||||
+ def int_SI_buffer_load_dword : Intrinsic <
|
||||
+ [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
|
||||
+ [llvm_anyint_ty, // rsrc(SGPR)
|
||||
+ llvm_anyint_ty, // vaddr(VGPR)
|
||||
+ llvm_i32_ty, // soffset(SGPR)
|
||||
+ llvm_i32_ty, // inst_offset(imm)
|
||||
+ llvm_i32_ty, // offen(imm)
|
||||
+ llvm_i32_ty, // idxen(imm)
|
||||
+ llvm_i32_ty, // glc(imm)
|
||||
+ llvm_i32_ty, // slc(imm)
|
||||
+ llvm_i32_ty], // tfe(imm)
|
||||
+ [IntrReadArgMem]>;
|
||||
+
|
||||
+ def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
+
|
||||
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_SI_sample : Sample;
|
||||
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
|
||||
index 958763d..254f3a6 100644
|
||||
--- a/lib/Target/R600/SILowerControlFlow.cpp
|
||||
+++ b/lib/Target/R600/SILowerControlFlow.cpp
|
||||
@@ -55,6 +55,7 @@
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
+#include "llvm/IR/Constants.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@@ -145,7 +146,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
- if (!shouldSkip(&MBB, &MBB.getParent()->back()))
|
||||
+ if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
|
||||
+ ShaderType::PIXEL ||
|
||||
+ !shouldSkip(&MBB, &MBB.getParent()->back()))
|
||||
return;
|
||||
|
||||
MachineBasicBlock::iterator Insert = &MI;
|
||||
@@ -295,15 +298,27 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
+ const MachineOperand &Op = MI.getOperand(0);
|
||||
|
||||
- // Kill is only allowed in pixel shaders
|
||||
+ // Kill is only allowed in pixel / geometry shaders
|
||||
assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
|
||||
- ShaderType::PIXEL);
|
||||
-
|
||||
- // Clear this pixel from the exec mask if the operand is negative
|
||||
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
|
||||
- .addImm(0)
|
||||
- .addOperand(MI.getOperand(0));
|
||||
+ ShaderType::PIXEL ||
|
||||
+ MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
|
||||
+ ShaderType::GEOMETRY);
|
||||
+
|
||||
+ // Clear this thread from the exec mask if the operand is negative
|
||||
+ if ((Op.isImm() || Op.isFPImm())) {
|
||||
+ // Constant operand: Set exec mask to 0 or do nothing
|
||||
+ if (Op.isImm() ? (Op.getImm() & 0x80000000) :
|
||||
+ Op.getFPImm()->isNegative()) {
|
||||
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
|
||||
+ .addImm(0);
|
||||
+ }
|
||||
+ } else {
|
||||
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
|
||||
+ .addImm(0)
|
||||
+ .addOperand(Op);
|
||||
+ }
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
|
||||
new file mode 100644
|
||||
index 0000000..4ab6a8a
|
||||
--- /dev/null
|
||||
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
|
||||
@@ -0,0 +1,22 @@
|
||||
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
+
|
||||
+; SI-LABEL: @kill_gs_const
|
||||
+; SI-NOT: V_CMPX_LE_F32
|
||||
+; SI: S_MOV_B64 exec, 0
|
||||
+
|
||||
+define void @kill_gs_const() #0 {
|
||||
+main_body:
|
||||
+ %0 = icmp ule i32 0, 3
|
||||
+ %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
|
||||
+ call void @llvm.AMDGPU.kill(float %1)
|
||||
+ %2 = icmp ule i32 3, 0
|
||||
+ %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
|
||||
+ call void @llvm.AMDGPU.kill(float %3)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+declare void @llvm.AMDGPU.kill(float)
|
||||
+
|
||||
+attributes #0 = { "ShaderType"="2" }
|
||||
+
|
||||
+!0 = metadata !{metadata !"const", null, i32 1}
|
||||
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
|
||||
new file mode 100644
|
||||
index 0000000..a622775
|
||||
--- /dev/null
|
||||
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
|
||||
@@ -0,0 +1,40 @@
|
||||
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
+
|
||||
+; Example of a simple geometry shader loading vertex attributes from the
|
||||
+; ESGS ring buffer
|
||||
+
|
||||
+; CHECK-LABEL: @main
|
||||
+; CHECK: BUFFER_LOAD_DWORD
|
||||
+; CHECK: BUFFER_LOAD_DWORD
|
||||
+; CHECK: BUFFER_LOAD_DWORD
|
||||
+; CHECK: BUFFER_LOAD_DWORD
|
||||
+
|
||||
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
|
||||
+main_body:
|
||||
+ %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
|
||||
+ %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
|
||||
+ %12 = shl i32 %6, 2
|
||||
+ %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
|
||||
+ %14 = bitcast i32 %13 to float
|
||||
+ %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
+ %16 = bitcast i32 %15 to float
|
||||
+ %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
|
||||
+ %18 = bitcast i32 %17 to float
|
||||
+ %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
+ %20 = bitcast i32 %19 to float
|
||||
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: nounwind readonly
|
||||
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
+
|
||||
+; Function Attrs: nounwind readonly
|
||||
+declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
+
|
||||
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
+
|
||||
+attributes #0 = { "ShaderType"="1" }
|
||||
+attributes #1 = { nounwind readonly }
|
||||
+
|
||||
+!0 = metadata !{metadata !"const", null, i32 1}
|
||||
diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
|
||||
new file mode 100644
|
||||
index 0000000..581d422
|
||||
--- /dev/null
|
||||
+++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
|
||||
@@ -0,0 +1,21 @@
|
||||
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
+
|
||||
+; CHECK-LABEL: @main
|
||||
+; CHECK: S_SENDMSG Gs(emit stream 0)
|
||||
+; CHECK: S_SENDMSG Gs(cut stream 1)
|
||||
+; CHECK: S_SENDMSG Gs(emit-cut stream 2)
|
||||
+; CHECK: S_SENDMSG Gs_done(nop)
|
||||
+
|
||||
+define void @main() {
|
||||
+main_body:
|
||||
+ call void @llvm.SI.sendmsg(i32 34, i32 0);
|
||||
+ call void @llvm.SI.sendmsg(i32 274, i32 0);
|
||||
+ call void @llvm.SI.sendmsg(i32 562, i32 0);
|
||||
+ call void @llvm.SI.sendmsg(i32 3, i32 0);
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: nounwind
|
||||
+declare void @llvm.SI.sendmsg(i32, i32) #0
|
||||
+
|
||||
+attributes #0 = { nounwind }
|
78
llvm.spec
78
llvm.spec
@ -16,8 +16,8 @@
|
||||
%else
|
||||
%bcond_with gold
|
||||
%endif
|
||||
# ppc64 fails to build lldb upstream
|
||||
%ifnarch ppc ppc64
|
||||
# lldb not ported to anything but x86 so far.
|
||||
%ifarch x86_64 %{ix86}
|
||||
%bcond_without lldb
|
||||
%else
|
||||
%bcond_with lldb
|
||||
@ -31,11 +31,12 @@
|
||||
%global llvmdocdir() %{_docdir}/%1
|
||||
%endif
|
||||
|
||||
%global downloadurl http://llvm.org/releases/%{version}
|
||||
#global prerel rc3
|
||||
%global downloadurl http://llvm.org/%{?prerel:pre-}releases/%{version}%{?prerel:/%{prerel}}
|
||||
|
||||
Name: llvm
|
||||
Version: 3.3
|
||||
Release: 5%{?dist}
|
||||
Version: 3.4
|
||||
Release: 6%{?dist}
|
||||
Summary: The Low Level Virtual Machine
|
||||
|
||||
Group: Development/Languages
|
||||
@ -43,10 +44,12 @@ License: NCSA
|
||||
URL: http://llvm.org/
|
||||
|
||||
# source archives
|
||||
Source0: %{downloadurl}/llvm-%{version}.src.tar.gz
|
||||
Source1: %{downloadurl}/cfe-%{version}.src.tar.gz
|
||||
Source2: %{downloadurl}/compiler-rt-%{version}.src.tar.gz
|
||||
Source3: %{downloadurl}/lldb-%{version}.src.tar.gz
|
||||
Source0: %{downloadurl}/llvm-%{version}%{?prerel}.src.tar.gz
|
||||
Source1: %{downloadurl}/clang-%{version}%{?prerel}.src.tar.gz
|
||||
Source2: %{downloadurl}/compiler-rt-%{version}%{?prerel}.src.tar.gz
|
||||
%if %{with lldb}
|
||||
Source3: %{downloadurl}/lldb-%{version}%{?prerel}.src.tar.gz
|
||||
%endif
|
||||
|
||||
# multilib fixes
|
||||
Source10: llvm-Config-config.h
|
||||
@ -55,7 +58,9 @@ Source11: llvm-Config-llvm-config.h
|
||||
# patches
|
||||
Patch1: 0001-data-install-preserve-timestamps.patch
|
||||
Patch2: 0002-linker-flags-speedup-memory.patch
|
||||
Patch3: 0003-fix-clear-cache-declaration.patch
|
||||
|
||||
# radeonsi GL 3.3 backport
|
||||
Patch3: llvm-3.4-radeonsi-backport.patch
|
||||
|
||||
BuildRequires: bison
|
||||
BuildRequires: chrpath
|
||||
@ -69,6 +74,7 @@ BuildRequires: binutils-devel
|
||||
%if %{with ocaml}
|
||||
BuildRequires: ocaml-ocamldoc
|
||||
%endif
|
||||
BuildRequires: ncurses-devel
|
||||
BuildRequires: zip
|
||||
# for DejaGNU test suite
|
||||
BuildRequires: dejagnu tcl-devel python
|
||||
@ -94,7 +100,7 @@ Group: Development/Languages
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Requires: libffi-devel
|
||||
Requires: libstdc++-devel >= 3.4
|
||||
|
||||
Requires: ncurses-devel
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
@ -118,6 +124,12 @@ Documentation for the LLVM compiler infrastructure.
|
||||
%package libs
|
||||
Summary: LLVM shared libraries
|
||||
Group: System Environment/Libraries
|
||||
## retire OpenGTL/libQtGTL here
|
||||
Obsoletes: OpenGTL < 0.9.18-50
|
||||
Obsoletes: OpenGTL-libs < 0.9.18-50
|
||||
Obsoletes: OpenGTL-devel < 0.9.18-50
|
||||
Obsoletes: libQtGTL < 0.9.3-50
|
||||
Obsoletes: libQtGTL-devel < 0.9.3-50
|
||||
|
||||
%description libs
|
||||
Shared libraries for the LLVM compiler infrastructure.
|
||||
@ -197,6 +209,14 @@ LLDB is a next generation, high-performance debugger. It is built as a set
|
||||
of reusable components which highly leverage existing libraries in the
|
||||
larger LLVM Project, such as the Clang expression parser and LLVM
|
||||
disassembler.
|
||||
|
||||
%package -n lldb-devel
|
||||
Summary: Header files for LLDB
|
||||
Group: Development/Languages
|
||||
Requires: lldb%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description -n lldb-devel
|
||||
This package contains header files for the LLDB debugger.
|
||||
%endif
|
||||
|
||||
%if %{with doxygen}
|
||||
@ -259,16 +279,16 @@ HTML documentation for LLVM's OCaml binding.
|
||||
|
||||
|
||||
%prep
|
||||
%setup -q -n llvm-%{version}.src %{?with_clang:-a1} %{?with_crt:-a2} %{?with_lldb:-a3}
|
||||
%setup -q %{?with_clang:-a1} %{?with_crt:-a2} %{?with_lldb:-a3}
|
||||
rm -rf tools/clang tools/lldb projects/compiler-rt
|
||||
%if %{with clang}
|
||||
mv cfe-%{version}.src tools/clang
|
||||
mv clang-%{version} tools/clang
|
||||
%endif
|
||||
%if %{with crt}
|
||||
mv compiler-rt-%{version}.src projects/compiler-rt
|
||||
mv compiler-rt-%{version} projects/compiler-rt
|
||||
%endif
|
||||
%if %{with lldb}
|
||||
mv lldb-%{version}.src tools/lldb
|
||||
mv lldb-%{version} tools/lldb
|
||||
%endif
|
||||
|
||||
%patch1 -p1
|
||||
@ -527,6 +547,7 @@ exit 0
|
||||
%{_bindir}/bugpoint
|
||||
%{_bindir}/llc
|
||||
%{_bindir}/lli
|
||||
%{_bindir}/lli-child-target
|
||||
%exclude %{_bindir}/llvm-config-%{__isa_bits}
|
||||
%{_bindir}/llvm*
|
||||
%{_bindir}/macho-dump
|
||||
@ -596,6 +617,10 @@ exit 0
|
||||
%{_bindir}/lldb-platform
|
||||
%{_libdir}/%{name}/liblldb.so
|
||||
%doc %{_mandir}/man1/lldb.1.*
|
||||
|
||||
%files -n lldb-devel
|
||||
%defattr(-,root,root,-)
|
||||
%{_includedir}/lldb
|
||||
%endif
|
||||
|
||||
%files doc
|
||||
@ -607,7 +632,8 @@ exit 0
|
||||
%defattr(-,root,root,-)
|
||||
%{_libdir}/ocaml/*.cma
|
||||
%{_libdir}/ocaml/*.cmi
|
||||
%{_libdir}/ocaml/META.llvm
|
||||
%{_libdir}/ocaml/dll*.so
|
||||
%{_libdir}/ocaml/META.llvm*
|
||||
|
||||
%files ocaml-devel
|
||||
%defattr(-,root,root,-)
|
||||
@ -633,8 +659,24 @@ exit 0
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Jan 17 2014 Dave Airlie <airlied@redhat.com> 3.3-5
|
||||
- build after disabling lldb for ppc64
|
||||
* Thu Mar 27 2014 Rex Dieter <rdieter@fedoraproject.org> 3.4-6
|
||||
- -libs: Obsoletes: OpenGTL libQtGTL
|
||||
|
||||
* Wed Mar 19 2014 Dave Airlie <airlied@redhat.com> 3.4-5
|
||||
- backport patches from 3.5 to enable GL3.3 on radeonsi
|
||||
|
||||
* Fri Jan 31 2014 Kyle McMartin <kyle@redhat.com> 3.4-4
|
||||
- Disable lldb on everything but x86_64, and i686. It hasn't been ported
|
||||
beyond those platforms so far.
|
||||
|
||||
* Fri Jan 17 2014 Dave Airlie <airlied@redhat.com> 3.4-3
|
||||
- bump nvr for lldb on ppc disable
|
||||
|
||||
* Tue Jan 14 2014 Dave Airlie <airlied@redhat.com> 3.4-2
|
||||
- add ncurses-devel BR and Requires
|
||||
|
||||
* Tue Jan 14 2014 Dave Airlie <airlied@redhat.com> 3.4-1
|
||||
- update to llvm 3.4 release
|
||||
|
||||
* Fri Dec 20 2013 Jan Vcelak <jvcelak@fedoraproject.org> 3.3-4
|
||||
- remove RPATHs
|
||||
|
8
sources
8
sources
@ -1,4 +1,4 @@
|
||||
8284891e3e311829b8e44ac813d0c9ef cfe-3.3.src.tar.gz
|
||||
9c129ce24514467cfe492cf2fed8e2c4 compiler-rt-3.3.src.tar.gz
|
||||
c583c80c25e56a41e3e5ae7c2f442929 lldb-3.3.src.tar.gz
|
||||
40564e1dc390f9844f1711c08b08e391 llvm-3.3.src.tar.gz
|
||||
b378f1e2c424e03289effc75268d3d2c clang-3.4.src.tar.gz
|
||||
7ed60a0463f9fdfa20db7109d4624cee lldb-3.4.src.tar.gz
|
||||
7938353e3a3bda85733a165e7ac4bb84 compiler-rt-3.4.src.tar.gz
|
||||
46ed668a1ce38985120dbf6344cf6116 llvm-3.4.src.tar.gz
|
||||
|
Loading…
Reference in New Issue
Block a user