backport upstream patch for power8 support
This commit is contained in:
parent
f086598d07
commit
13262e006a
|
@ -9,3 +9,4 @@ K7323DNow.tgz
|
||||||
/ARMv732NEON.tar.bz2
|
/ARMv732NEON.tar.bz2
|
||||||
/lapack-3.5.0.tgz
|
/lapack-3.5.0.tgz
|
||||||
/atlas3.10.2.tar.bz2
|
/atlas3.10.2.tar.bz2
|
||||||
|
/POWER864LEVSXp4.tar.bz2
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
Subject: atlas new archdef for ppc64le
|
||||||
|
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
Date: Sun, 13 Jun 2014 18:02:47 +0200
|
||||||
|
|
||||||
|
Need to define different archdef names
|
||||||
|
for ppc64 (that is Big Endian) and ppc64le (that is Little Endian).
|
||||||
|
This is already done upstream in atlas 3.11.30 with issue
|
||||||
|
https://sourceforge.net/p/math-atlas/patches/66/
|
||||||
|
|
||||||
|
Required at least as long as I need the bypass of
|
||||||
|
atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
|
||||||
|
|
||||||
|
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
---
|
||||||
|
CONFIG/src/SpewMakeInc.c | 4 ++++
|
||||||
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
|
Index: ATLAS/CONFIG/src/SpewMakeInc.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/CONFIG/src/SpewMakeInc.c
|
||||||
|
+++ ATLAS/CONFIG/src/SpewMakeInc.c
|
||||||
|
@@ -542,6 +542,10 @@ int main(int nargs, char **args)
|
||||||
|
fprintf(fpout, "# -------------------------------------------------\n");
|
||||||
|
fprintf(fpout, " ARCH = %s", machnam[mach]);
|
||||||
|
fprintf(fpout, "%d", ptrbits);
|
||||||
|
+ /* for ppc64le archi add 'LE' characters */
|
||||||
|
+ #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||||
|
+ fprintf(fpout, "%s", "LE");
|
||||||
|
+ #endif
|
||||||
|
if (ISAX)
|
||||||
|
fprintf(fpout, "%s", ISAXNAM[ISAX]);
|
||||||
|
if (!USEIEEE)
|
|
@ -0,0 +1,131 @@
|
||||||
|
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
Subject: atlas.3.10.2 add power8 cpu
|
||||||
|
Date: Thu, 18 Sep 2014 15:13:24 +0200
|
||||||
|
|
||||||
|
atlas.3.10.2 add Power8 cpu
|
||||||
|
tracked upstream by issue 67
|
||||||
|
https://sourceforge.net/p/math-atlas/patches/67/
|
||||||
|
|
||||||
|
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
---
|
||||||
|
CONFIG/ARCHS/Make.ext | 7 +++++++
|
||||||
|
CONFIG/include/atlconf.h | 6 +++---
|
||||||
|
CONFIG/src/atlcomp.txt | 6 ++++++
|
||||||
|
CONFIG/src/backend/archinfo_aix.c | 2 ++
|
||||||
|
CONFIG/src/backend/archinfo_linux.c | 1 +
|
||||||
|
include/atlas_pca.h | 2 +-
|
||||||
|
6 files changed, 20 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
Index: ATLAS/CONFIG/ARCHS/Make.ext
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/CONFIG/ARCHS/Make.ext
|
||||||
|
+++ ATLAS/CONFIG/ARCHS/Make.ext
|
||||||
|
@@ -33,6 +33,7 @@ files = AMD64K10h32SSE3.tar.bz2 AMD64K10
|
||||||
|
MIPSR1xK64.tar.bz2 Makefile P432SSE2.tar.bz2 P4E32SSE3.tar.bz2 \
|
||||||
|
P4E64SSE3.tar.bz2 PIII32SSE1.tar.bz2 POWER432.tar.bz2 \
|
||||||
|
POWER464.tar.bz2 POWER564.tar.bz2 POWER764VSX.tar.bz2 \
|
||||||
|
+ POWER864VSX.tar.bz2 \
|
||||||
|
PPCG432AltiVec.tar.bz2 PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 \
|
||||||
|
PPRO32.tar.bz2 USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 \
|
||||||
|
USIV64.tar.bz2 UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \
|
||||||
|
@@ -308,6 +309,12 @@ POWER764VSX.tar.bz2 : $(basdr)/POWER764V
|
||||||
|
/tmp/POWER764VSX.tar POWER764VSX
|
||||||
|
bzip2 /tmp/POWER764VSX.tar
|
||||||
|
mv /tmp/POWER764VSX.tar.bz2 ./.
|
||||||
|
+POWER864VSX.tar.bz2 : $(basdr)/POWER864VSX
|
||||||
|
+ - rm -f /tmp/POWER864VSX.tar /tmp/POWER864VSX.tar.bz2
|
||||||
|
+ cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
|
||||||
|
+ /tmp/POWER864VSX.tar POWER864VSX
|
||||||
|
+ bzip2 /tmp/POWER864VSX.tar
|
||||||
|
+ mv /tmp/POWER864VSX.tar.bz2 ./.
|
||||||
|
IBMz1032.tar.bz2 : $(basdr)/IBMz1032
|
||||||
|
- rm -f /tmp/IBMz1032.tar /tmp/IBMz1032.tar.bz2
|
||||||
|
cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
|
||||||
|
Index: ATLAS/CONFIG/include/atlconf.h
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/CONFIG/include/atlconf.h
|
||||||
|
+++ ATLAS/CONFIG/include/atlconf.h
|
||||||
|
@@ -18,10 +18,10 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS
|
||||||
|
enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||||
|
AFARM, AFS390};
|
||||||
|
|
||||||
|
-#define NMACH 52
|
||||||
|
+#define NMACH 53
|
||||||
|
static char *machnam[NMACH] =
|
||||||
|
{"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5",
|
||||||
|
- "POWER6", "POWER7", "POWERe6500", "IBMz9", "IBMz10", "IBMz196",
|
||||||
|
+ "POWER6", "POWER7", "POWER8", "POWERe6500", "IBMz9", "IBMz10", "IBMz196",
|
||||||
|
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
|
||||||
|
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
|
||||||
|
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
|
||||||
|
@@ -31,7 +31,7 @@ static char *machnam[NMACH] =
|
||||||
|
"USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS",
|
||||||
|
"MIPSR1xK", "MIPSICE9", "ARMv7"};
|
||||||
|
enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5,
|
||||||
|
- IbmPwr6, IbmPwr7, Pwre6500,
|
||||||
|
+ IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
|
||||||
|
IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */
|
||||||
|
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
|
||||||
|
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||||
|
Index: ATLAS/CONFIG/src/atlcomp.txt
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/CONFIG/src/atlcomp.txt
|
||||||
|
+++ ATLAS/CONFIG/src/atlcomp.txt
|
||||||
|
@@ -190,6 +190,10 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
|
||||||
|
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2'
|
||||||
|
MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
|
||||||
|
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
|
||||||
|
+MACH=POWER8 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||||
|
+ 'gcc' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops'
|
||||||
|
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
|
||||||
|
+ 'gfortran' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops'
|
||||||
|
MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||||
|
'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
|
||||||
|
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
|
||||||
|
@@ -210,6 +214,8 @@ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dm
|
||||||
|
'gcc' '-mcpu=power4 -mtune=power4 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
|
||||||
|
MACH=POWER4 OS=ALL LVL=1010 COMPS=f77
|
||||||
|
'xlf' '-qtune=pwr4 -qarch=pwr4 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
||||||
|
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
|
||||||
|
+ 'xlf' '-qtune=pwr8 -qarch=pwr8 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
||||||
|
#
|
||||||
|
# IBM System z or zEnterprise.
|
||||||
|
# These compiler flags given by IBM; -O3 -funroll-loops are chosen because
|
||||||
|
Index: ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c
|
||||||
|
+++ ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||||
|
@@ -77,6 +77,7 @@ enum MACHTYPE ProbeArch()
|
||||||
|
else if (strstr(res, "7455")) mach = PPCG4;
|
||||||
|
else if (strstr(res, "PPC970FX")) mach = PPCG5;
|
||||||
|
else if (strstr(res, "PPC970MP")) mach = PPCG5;
|
||||||
|
+ else if (strstr(res, "POWER8")) mach = IbmPwr8;
|
||||||
|
else if (strstr(res, "POWER7")) mach = IbmPwr7;
|
||||||
|
else if (strstr(res, "POWER6")) mach = IbmPwr6;
|
||||||
|
else if (strstr(res, "POWER5")) mach = IbmPwr5;
|
||||||
|
Index: ATLAS/include/atlas_pca.h
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/include/atlas_pca.h
|
||||||
|
+++ ATLAS/include/atlas_pca.h
|
||||||
|
@@ -26,7 +26,7 @@
|
||||||
|
#endif
|
||||||
|
#elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
|
||||||
|
defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
|
||||||
|
- defined(ATL_ARCH_POWER7)
|
||||||
|
+ defined(ATL_ARCH_POWER7) || defined(ATL_ARCH_POWER8)
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define ATL_membarrier __asm__ __volatile__ ("dcs")
|
||||||
|
/* #define ATL_USEPCA 1 */
|
||||||
|
Index: ATLAS/CONFIG/src/backend/archinfo_aix.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/CONFIG/src/backend/archinfo_aix.c
|
||||||
|
+++ ATLAS/CONFIG/src/backend/archinfo_aix.c
|
||||||
|
@@ -67,6 +67,8 @@ enum MACHTYPE ProbeArch()
|
||||||
|
{
|
||||||
|
if (strstr(res, "PowerPC_POWER5"))
|
||||||
|
mach = IbmPwr5;
|
||||||
|
+ else if (strstr(res, "PowerPC_POWER8"))
|
||||||
|
+ mach = IbmPwr8;
|
||||||
|
else if (strstr(res, "PowerPC_POWER7"))
|
||||||
|
mach = IbmPwr7;
|
||||||
|
else if (strstr(res, "PowerPC_POWER6"))
|
|
@ -0,0 +1,220 @@
|
||||||
|
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
Subject: atlas.3.10.2 ppc64le abiv2 patch
|
||||||
|
Date: Mon, 28 Jul 2014 04:29:05 -0400
|
||||||
|
|
||||||
|
atlas.3.10.2 abiv2 step2 complete the changes already present in atlas 3.10.2
|
||||||
|
* still some files with opd ABI V1 to be disabled for ABI V2
|
||||||
|
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||||
|
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||||
|
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||||
|
|
||||||
|
atlas.3.10.2 ppc64le abiv2 step3
|
||||||
|
* change offsets of parameters read from stack to avoid some segfaults.
|
||||||
|
(values changes 120 => 104 and 128 => 112 identified by gdb investigation)
|
||||||
|
|
||||||
|
Despite this step3 patch there are two Remaining problems for ppc64le archi:
|
||||||
|
* TODO: still have seg-faults in console during build/check
|
||||||
|
but is not critical (without make check) and rpm are generated on fedora.
|
||||||
|
unable to investigate because of problem tracked by issue 950
|
||||||
|
https://sourceforge.net/p/math-atlas/support-requests/950/
|
||||||
|
|
||||||
|
* TODO: make check failure because xsslvtst execution failure
|
||||||
|
related to vector assembly code that assumes big-endian env
|
||||||
|
as written in ATL_cmm4x4x128_av.c and ATL_smm4x4x128_av.c.
|
||||||
|
Would need significant work to support little-endian as per
|
||||||
|
endianess comments of all PowerPC vector instructions in:
|
||||||
|
https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D/$file/vector_simd_pem.ppc.2005AUG23.pdf
|
||||||
|
|
||||||
|
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
---
|
||||||
|
tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 7 +++++++
|
||||||
|
tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 7 +++++++
|
||||||
|
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 9 ++++++++-
|
||||||
|
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 20 ++++++++++++++++++--
|
||||||
|
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 23 ++++++++++++++++++++++-
|
||||||
|
5 files changed, 62 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||||
|
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
|
||||||
|
.globl Mjoin(_,ATL_USERMM)
|
||||||
|
Mjoin(_,ATL_USERMM):
|
||||||
|
#else
|
||||||
|
- #if defined(ATL_USE64BITS)
|
||||||
|
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||||
|
/*
|
||||||
|
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||||
|
*/
|
||||||
|
@@ -324,8 +324,15 @@ ATL_USERMM:
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ATL_USE64BITS
|
||||||
|
+#if _CALL_ELF == 2
|
||||||
|
+/* ABIv2 */
|
||||||
|
+ ld pC0, 104(r1)
|
||||||
|
+ ld incCn, 112(r1)
|
||||||
|
+#else
|
||||||
|
+/* ABIv1 */
|
||||||
|
ld pC0, 120(r1)
|
||||||
|
ld incCn, 128(r1)
|
||||||
|
+#endif
|
||||||
|
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
||||||
|
lwz pC0, 68(r1)
|
||||||
|
lwz incCn, 72(r1)
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||||
|
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
|
||||||
|
const TYPE beta, TYPE *C, const int ldc)
|
||||||
|
(r10) 8(r1)
|
||||||
|
*******************************************************************************
|
||||||
|
-64 bit ABIs:
|
||||||
|
+64 bit ABIv1s:
|
||||||
|
r3 r4 r5 r6/f1
|
||||||
|
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
||||||
|
r7 r8 r9 r10
|
||||||
|
const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
||||||
|
f2 120(r1) 128(r1)
|
||||||
|
const TYPE beta, TYPE *C, const int ldc)
|
||||||
|
+
|
||||||
|
+64 bit ABIv2s:
|
||||||
|
+ r3 r4 r5 r6/f1
|
||||||
|
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
||||||
|
+ r7 r8 r9 r10
|
||||||
|
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
||||||
|
+ f2 104(r1) 112(r1)
|
||||||
|
+ const TYPE beta, TYPE *C, const int ldc)
|
||||||
|
#endif
|
||||||
|
#ifdef ATL_AS_AIX_PPC
|
||||||
|
.csect .text[PR]
|
||||||
|
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
|
||||||
|
.globl Mjoin(_,ATL_USERMM)
|
||||||
|
Mjoin(_,ATL_USERMM):
|
||||||
|
#else
|
||||||
|
- #if defined(ATL_USE64BITS)
|
||||||
|
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||||
|
/*
|
||||||
|
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||||
|
*/
|
||||||
|
@@ -257,9 +265,17 @@ ATL_USERMM:
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+
|
||||||
|
#if defined (ATL_USE64BITS)
|
||||||
|
+#if _CALL_ELF == 2
|
||||||
|
+/* ABIv2 */
|
||||||
|
+ ld pC0, 104(r1)
|
||||||
|
+ ld incCn, 112(r1)
|
||||||
|
+#else
|
||||||
|
+/* ABIv1 */
|
||||||
|
ld pC0, 120(r1)
|
||||||
|
ld incCn, 128(r1)
|
||||||
|
+#endif
|
||||||
|
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
||||||
|
lwz pC0, 68(r1)
|
||||||
|
lwz incCn, 72(r1)
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||||
|
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
|
||||||
|
.globl Mjoin(_,ATL_USERMM)
|
||||||
|
Mjoin(_,ATL_USERMM):
|
||||||
|
#else
|
||||||
|
- #if defined(ATL_USE64BITS)
|
||||||
|
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||||
|
/*
|
||||||
|
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||||
|
*/
|
||||||
|
@@ -221,8 +221,15 @@ ATL_USERMM:
|
||||||
|
* kernel instead
|
||||||
|
*/
|
||||||
|
#if defined (ATL_USE64BITS)
|
||||||
|
+#if _CALL_ELF == 2
|
||||||
|
+/* ABIv2 */
|
||||||
|
+ ld r10, 104(r1)
|
||||||
|
+ ld r5, 112(r1)
|
||||||
|
+#else
|
||||||
|
+/* ABIv1 */
|
||||||
|
ld r10, 120(r1)
|
||||||
|
ld r5, 128(r1)
|
||||||
|
+#endif
|
||||||
|
#elif defined(ATL_AS_OSX_PPC)
|
||||||
|
lwz r10, 60(r1)
|
||||||
|
lwz r5, 64(r1)
|
||||||
|
@@ -285,8 +292,15 @@ ATL_USERMM:
|
||||||
|
eqv r0, r0, r0 /* all 1s */
|
||||||
|
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
||||||
|
#if defined (ATL_USE64BITS)
|
||||||
|
+#if _CALL_ELF == 2
|
||||||
|
+ /* ABIv2 */
|
||||||
|
+ ld pC0, FSIZE+104(r1)
|
||||||
|
+ ld ldc, FSIZE+112(r1)
|
||||||
|
+#else
|
||||||
|
+ /* ABIv1 */
|
||||||
|
ld pC0, FSIZE+120(r1)
|
||||||
|
ld ldc, FSIZE+128(r1)
|
||||||
|
+#endif
|
||||||
|
#elif defined(ATL_AS_OSX_PPC)
|
||||||
|
lwz pC0, FSIZE+60(r1)
|
||||||
|
lwz ldc, FSIZE+64(r1)
|
||||||
|
@@ -4258,8 +4272,15 @@ UNALIGNED_C:
|
||||||
|
eqv r0, r0, r0 /* all 1s */
|
||||||
|
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
||||||
|
#if defined (ATL_USE64BITS)
|
||||||
|
+#if _CALL_ELF == 2
|
||||||
|
+ /* ABIv2 */
|
||||||
|
+ ld pC0, FSIZE+104(r1)
|
||||||
|
+ ld ldc, FSIZE+112(r1)
|
||||||
|
+#else
|
||||||
|
+ /* ABIv1 */
|
||||||
|
ld pC0, FSIZE+120(r1)
|
||||||
|
ld ldc, FSIZE+128(r1)
|
||||||
|
+#endif
|
||||||
|
#elif defined(ATL_AS_OSX_PPC)
|
||||||
|
lwz pC0, FSIZE+60(r1)
|
||||||
|
lwz ldc, FSIZE+64(r1)
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
||||||
|
@@ -258,8 +258,15 @@ ATL_USERMM:
|
||||||
|
eqv r0, r0, r0 /* all 1s */
|
||||||
|
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
||||||
|
#if defined (ATL_USE64BITS)
|
||||||
|
+#if _CALL_ELF == 2
|
||||||
|
+/* ABIv2 */
|
||||||
|
+ ld pC0, FSIZE+104(r1)
|
||||||
|
+ ld ldc, FSIZE+112(r1)
|
||||||
|
+#else
|
||||||
|
+/* ABIv1 */
|
||||||
|
ld pC0, FSIZE+120(r1)
|
||||||
|
ld ldc, FSIZE+128(r1)
|
||||||
|
+#endif
|
||||||
|
#elif defined(ATL_AS_OSX_PPC)
|
||||||
|
lwz pC0, FSIZE+60(r1)
|
||||||
|
lwz ldc, FSIZE+64(r1)
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
||||||
|
@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM):
|
||||||
|
*/
|
||||||
|
#ifdef ATL_GAS_LINUX_PPC
|
||||||
|
#ifdef ATL_USE64BITS
|
||||||
|
+ #if _CALL_ELF == 2
|
||||||
|
+ /* ABIv2 */
|
||||||
|
+ ld pC0, 104(r1)
|
||||||
|
+ ld incCn, 112(r1)
|
||||||
|
+ #else
|
||||||
|
+ /* ABIv1 */
|
||||||
|
ld pC0, 120(r1)
|
||||||
|
ld incCn, 128(r1)
|
||||||
|
+ #endif
|
||||||
|
#else
|
||||||
|
lwz incCn, FSIZE+8(r1)
|
||||||
|
#endif
|
|
@ -0,0 +1,151 @@
|
||||||
|
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
Subject: atlas.3.10.2 ppc64le do not use files with lvx
|
||||||
|
Date: Tue, 12 Aug 2014 16:07:06 +0200
|
||||||
|
|
||||||
|
ppc64le do not use files with lvx
|
||||||
|
This is a temporary patch as long as the related files
|
||||||
|
are not ported yet to ppc64 little-endian.
|
||||||
|
|
||||||
|
Warning: patch to be applied only for ppc64le architecture
|
||||||
|
and will also need atlas-new_archdef_for_ppc64le.patch
|
||||||
|
|
||||||
|
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||||
|
---
|
||||||
|
tune/blas/gemm/CASES/ccases.flg | 6 +-----
|
||||||
|
tune/blas/gemm/CASES/dcases.flg | 8 +-------
|
||||||
|
tune/blas/gemm/CASES/dcases.vnb | 4 ----
|
||||||
|
tune/blas/gemm/CASES/scases.flg | 9 +--------
|
||||||
|
tune/blas/gemm/CASES/scases.vnb | 3 ---
|
||||||
|
tune/blas/gemm/CASES/zcases.flg | 8 +-------
|
||||||
|
6 files changed, 4 insertions(+), 34 deletions(-)
|
||||||
|
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/ccases.flg
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/ccases.flg
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/ccases.flg
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||||
|
-24
|
||||||
|
+22
|
||||||
|
304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
|
||||||
|
@@ -48,13 +48,9 @@ gcc
|
||||||
|
328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-fomit-frame-pointer -O2 -fno-tree-loop-optimize
|
||||||
|
-329 192 4 4 4 1 16 4 4 4 ATL_cmm4x4x128_av.c "R. Clint Whaley" \
|
||||||
|
-gcc
|
||||||
|
--x assembler-with-cpp
|
||||||
|
331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mips4
|
||||||
|
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM"
|
||||||
|
333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mfpu=vfpv3
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/scases.flg
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/scases.flg
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/scases.flg
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||||
|
-25
|
||||||
|
+22
|
||||||
|
304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
|
||||||
|
@@ -48,16 +48,9 @@ gcc
|
||||||
|
328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-fomit-frame-pointer -O2 -fno-tree-loop-optimize
|
||||||
|
-329 192 4 4 4 1 16 4 4 4 ATL_smm4x4x128_av.c "R. Clint Whaley" \
|
||||||
|
-gcc
|
||||||
|
--x assembler-with-cpp
|
||||||
|
-330 200 92 92 92 1 16 92 92 92 ATL_smm4x4x128_av.c "R. Clint Whaley" \
|
||||||
|
-gcc
|
||||||
|
--x assembler-with-cpp
|
||||||
|
331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mips4
|
||||||
|
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM"
|
||||||
|
333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mfpu=vfpv3
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/scases.vnb
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/scases.vnb
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/scases.vnb
|
||||||
|
@@ -31,9 +31,6 @@
|
||||||
|
# Defaults: TA='t', TB='n', SSE=0, X87=0, LDBOT=1, RTKU=0, AOUTER=0,
|
||||||
|
# KBMAX=KU, KBMIN=KU, BETAN1=0, RTMN=1
|
||||||
|
#
|
||||||
|
-ID=1 ROUT='ATL_smm4x4x128_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=4 \
|
||||||
|
- LDKB=1 LDBOT=1 KBMIN=4 KBMAX=128 ASM=GAS_PPC \
|
||||||
|
- COMP='gcc' FLAGS='-x assembler-with-cpp'
|
||||||
|
ID=2 ROUT='ATL_smm4x4x16_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=16 \
|
||||||
|
LDKB=1 LDBOT=0 KBMIN=16 KBMAX=2048 ASM=GAS_SPARC \
|
||||||
|
COMP='gcc' FLAGS='-x assembler-with-cpp'
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/dcases.flg
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.flg
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/dcases.flg
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||||
|
-32
|
||||||
|
+30
|
||||||
|
306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2
|
||||||
|
@@ -79,12 +79,6 @@ gcc
|
||||||
|
336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mips4
|
||||||
|
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
|
||||||
|
-gcc
|
||||||
|
--x assembler-with-cpp
|
||||||
|
-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \
|
||||||
|
-gcc
|
||||||
|
--O3 -mvsx
|
||||||
|
339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mfpu=vfpv3
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/dcases.vnb
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.vnb
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/dcases.vnb
|
||||||
|
@@ -53,10 +53,6 @@ ID=6 ROUT='ATL_dmm4x1x90_x87.c' AUTH='R
|
||||||
|
ID=7 ROUT='ATL_dmm8x1x120_sse2.c' AUTH='R. Clint Whaley' \
|
||||||
|
MU=8 NU=1 KU=1 KBMAX=512 ASM=GAS_x8664 BETAN1=1 \
|
||||||
|
COMP='gcc' FLAGS='-m64 -x assembler-with-cpp'
|
||||||
|
-ID=70 ROUT='ATL_dmm4x4x80_ppc.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
|
||||||
|
- MU=4 NU=4 KU=1 KBMIN=1 KBMAX=80 ASM=GAS_PPC BETAN1=0 LDBOT=0 \
|
||||||
|
- LDAB=0 LDISKB=1 RTN=1 RTM=1 RTK=0 \
|
||||||
|
- COMP='gcc' FLAGS='-x assembler-with-cpp'
|
||||||
|
ID=80 ROUT='ATL_dmm4x4x16r8_US.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
|
||||||
|
MU=4 NU=4 KU=24 KBMIN=24 KBMAX=512 ASM=GAS_SPARC BETAN1=0 \
|
||||||
|
LDAB=0 RTK=1 RTN=1 RTM=1 LDBOT=0 LDISKB=1 LDAB=1 \
|
||||||
|
Index: ATLAS/tune/blas/gemm/CASES/zcases.flg
|
||||||
|
===================================================================
|
||||||
|
--- ATLAS.orig/tune/blas/gemm/CASES/zcases.flg
|
||||||
|
+++ ATLAS/tune/blas/gemm/CASES/zcases.flg
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||||
|
-31
|
||||||
|
+29
|
||||||
|
306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2
|
||||||
|
@@ -76,12 +76,6 @@ gcc
|
||||||
|
336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mips4
|
||||||
|
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
|
||||||
|
-gcc
|
||||||
|
--x assembler-with-cpp
|
||||||
|
-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \
|
||||||
|
-gcc
|
||||||
|
--O3 -mvsx
|
||||||
|
339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||||
|
gcc
|
||||||
|
-x assembler-with-cpp -mfpu=vfpv3
|
63
atlas.spec
63
atlas.spec
|
@ -5,7 +5,7 @@ Version: 3.10.2
|
||||||
%if "%{?enable_native_atlas}" != "0"
|
%if "%{?enable_native_atlas}" != "0"
|
||||||
%define dist .native
|
%define dist .native
|
||||||
%endif
|
%endif
|
||||||
Release: 9%{?dist}
|
Release: 10%{?dist}
|
||||||
Summary: Automatically Tuned Linear Algebra Software
|
Summary: Automatically Tuned Linear Algebra Software
|
||||||
|
|
||||||
Group: System Environment/Libraries
|
Group: System Environment/Libraries
|
||||||
|
@ -27,6 +27,7 @@ Source12: IBMz932.tar.bz2
|
||||||
Source13: IBMz964.tar.bz2
|
Source13: IBMz964.tar.bz2
|
||||||
#upstream arm uses softfp abi, fedora arm uses hard
|
#upstream arm uses softfp abi, fedora arm uses hard
|
||||||
Source14: ARMv732NEON.tar.bz2
|
Source14: ARMv732NEON.tar.bz2
|
||||||
|
Source15: POWER864LEVSXp4.tar.bz2
|
||||||
|
|
||||||
Patch2: atlas-fedora-arm.patch
|
Patch2: atlas-fedora-arm.patch
|
||||||
# Properly pass -melf_* to the linker with -Wl, fixes FTBFS bug 817552
|
# Properly pass -melf_* to the linker with -Wl, fixes FTBFS bug 817552
|
||||||
|
@ -45,13 +46,16 @@ Patch8: atlas-genparse.patch
|
||||||
# Unbundle LAPACK (BZ #1181369)
|
# Unbundle LAPACK (BZ #1181369)
|
||||||
Patch9: atlas.3.10.1-unbundle.patch
|
Patch9: atlas.3.10.1-unbundle.patch
|
||||||
|
|
||||||
# ppc64le patches
|
# for ppc64 ppc64le
|
||||||
Patch95: initialize_malloc_memory.invtrsm.wms.oct23.patch
|
# https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40
|
||||||
Patch96: xlf.command.not.found.patch
|
Patch95: getdoublearr.stripwhite.patch
|
||||||
Patch98: getdoublearr.stripwhite.patch
|
Patch96: initialize_malloc_memory.invtrsm.wms.oct23.patch
|
||||||
Patch99: ppc64le-remove-vsx.patch
|
Patch97: atlas.3.10.2-ppc64le_abiv2.patch
|
||||||
Patch100: ppc64le-abiv2.patch
|
Patch98: atlas-new_archdef_for_ppc64le.patch
|
||||||
Patch110: p8-mem-barrier.patch
|
Patch99: atlas.3.10.2-add_power8_cpu.patch
|
||||||
|
|
||||||
|
# for ppc64le
|
||||||
|
Patch100: atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
|
||||||
|
|
||||||
BuildRequires: gcc-gfortran, lapack-static
|
BuildRequires: gcc-gfortran, lapack-static
|
||||||
|
|
||||||
|
@ -304,19 +308,6 @@ ix86 architecture.
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
# disable the archdef for ppc64le
|
|
||||||
# do it only one time.
|
|
||||||
%ifarch ppc64le
|
|
||||||
%define arch_option -Si archdef 0
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%ifarch ppc64
|
|
||||||
%global arch_option -A 7
|
|
||||||
%global assembler_option -Wa,--noexecstack,-mpower7
|
|
||||||
%else
|
|
||||||
%global assembler_option -Wa,--noexecstack
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
#uname -a
|
#uname -a
|
||||||
#cat /proc/cpuinfo
|
#cat /proc/cpuinfo
|
||||||
|
@ -337,7 +328,6 @@ ix86 architecture.
|
||||||
%patch7 -p1 -b .aarch64
|
%patch7 -p1 -b .aarch64
|
||||||
%endif
|
%endif
|
||||||
%patch8 -p1 -b .genparse
|
%patch8 -p1 -b .genparse
|
||||||
|
|
||||||
%patch9 -p1 -b .unbundle
|
%patch9 -p1 -b .unbundle
|
||||||
|
|
||||||
cp %{SOURCE1} CONFIG/ARCHS/
|
cp %{SOURCE1} CONFIG/ARCHS/
|
||||||
|
@ -347,16 +337,20 @@ cp %{SOURCE11} CONFIG/ARCHS/
|
||||||
cp %{SOURCE12} CONFIG/ARCHS/
|
cp %{SOURCE12} CONFIG/ARCHS/
|
||||||
cp %{SOURCE13} CONFIG/ARCHS/
|
cp %{SOURCE13} CONFIG/ARCHS/
|
||||||
cp %{SOURCE14} CONFIG/ARCHS/
|
cp %{SOURCE14} CONFIG/ARCHS/
|
||||||
|
cp %{SOURCE15} CONFIG/ARCHS/
|
||||||
#cp %{SOURCE8} CONFIG/ARCHS/
|
#cp %{SOURCE8} CONFIG/ARCHS/
|
||||||
#cp %{SOURCE9} CONFIG/ARCHS/
|
#cp %{SOURCE9} CONFIG/ARCHS/
|
||||||
|
|
||||||
%ifarch ppc64le
|
%ifarch ppc64le ppc64
|
||||||
|
%patch95 -p1 -b .than
|
||||||
|
%patch96 -p1
|
||||||
|
%patch97 -p1
|
||||||
|
%patch98 -p1
|
||||||
%patch99 -p1
|
%patch99 -p1
|
||||||
%patch98 -p2
|
%endif
|
||||||
%patch96 -p2
|
|
||||||
%patch95 -p2
|
%ifarch ppc64le
|
||||||
%patch100 -p2
|
%patch100 -p1
|
||||||
%patch110 -p1
|
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifarch %{arm}
|
%ifarch %{arm}
|
||||||
|
@ -392,7 +386,7 @@ for type in %{types}; do
|
||||||
|
|
||||||
mkdir -p %{_arch}_${type}
|
mkdir -p %{_arch}_${type}
|
||||||
pushd %{_arch}_${type}
|
pushd %{_arch}_${type}
|
||||||
../configure %{mode} %{?threads_option} %{?arch_option} -D c -DWALL -Fa alg '%{armflags} -g %{assembler_option} -fPIC'\
|
../configure %{mode} %{?threads_option} %{?arch_option} -D c -DWALL -Fa alg '%{armflags} -g -Wa,--noexecstack -fPIC'\
|
||||||
--prefix=%{buildroot}%{_prefix} \
|
--prefix=%{buildroot}%{_prefix} \
|
||||||
--incdir=%{buildroot}%{_includedir} \
|
--incdir=%{buildroot}%{_includedir} \
|
||||||
--libdir=%{buildroot}%{_libdir}/${libname}
|
--libdir=%{buildroot}%{_libdir}/${libname}
|
||||||
|
@ -509,14 +503,6 @@ for type in %{types}; do
|
||||||
sed -i 's#-m64#-m32#g' Make.inc
|
sed -i 's#-m64#-m32#g' Make.inc
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifarch ppc64le
|
|
||||||
sed -i 's#-mvsx##g' Make.inc
|
|
||||||
sed -i 's#-DATL_VSX##g' Make.inc
|
|
||||||
sed -i 's#-DATL_AltiVec##g' Make.inc
|
|
||||||
sed -i 's#-maltivec##g' Make.inc
|
|
||||||
sed -i 's#ARCH =.*#ARCH = POWER464#' Make.inc
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%endif
|
%endif
|
||||||
make build
|
make build
|
||||||
cd lib
|
cd lib
|
||||||
|
@ -833,6 +819,9 @@ fi
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Nov 26 2015 Than Ngo <than@redhat.com> 3.10.2-10
|
||||||
|
- backport upstream patch for power8 support
|
||||||
|
|
||||||
* Fri Nov 13 2015 Than Ngo <than@redhat.com> 3.10.2-9
|
* Fri Nov 13 2015 Than Ngo <than@redhat.com> 3.10.2-9
|
||||||
- add correct assembler option for ppc64
|
- add correct assembler option for ppc64
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,6 @@
|
||||||
Subject: getdoublearr.stripwhite
|
diff -up ATLAS/include/atlas_genparse.h.than ATLAS/include/atlas_genparse.h
|
||||||
From: Michel Normand <normand@fr.ibm.com>
|
--- ATLAS/include/atlas_genparse.h.than 2015-11-26 10:53:55.056586198 -0500
|
||||||
|
+++ ATLAS/include/atlas_genparse.h 2015-11-26 10:56:00.168537914 -0500
|
||||||
GetDoubleArr must only handle the comma delimited list at string head
|
|
||||||
and ignore anything after the first blank character.
|
|
||||||
|
|
||||||
Signed-off-by: Michel Normand <normand@fr.ibm.com>
|
|
||||||
---
|
|
||||||
ATLAS/include/atlas_genparse.h | 16 ++++++++++++++--
|
|
||||||
1 file changed, 14 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
Index: atlas/ATLAS/include/atlas_genparse.h
|
|
||||||
===================================================================
|
|
||||||
--- atlas.orig/ATLAS/include/atlas_genparse.h
|
|
||||||
+++ atlas/ATLAS/include/atlas_genparse.h
|
|
||||||
@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str)
|
@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,7 +28,7 @@ Index: atlas/ATLAS/include/atlas_genparse.h
|
||||||
assert(sscanf(str, "%le", d) == 1);
|
assert(sscanf(str, "%le", d) == 1);
|
||||||
while (i < N)
|
while (i < N)
|
||||||
{
|
{
|
||||||
@@ -166,6 +177,7 @@ static int GetDoubleArr(char *str, int N
|
@@ -167,6 +178,7 @@ static int GetDoubleArr(char *str, int N
|
||||||
break;
|
break;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
diff -Naur ATLAS.orig/include/atlas_pca.h ATLAS/include/atlas_pca.h
|
|
||||||
--- ATLAS.orig/include/atlas_pca.h 2013-01-08 19:15:40.000000000 +0100
|
|
||||||
+++ ATLAS/include/atlas_pca.h 2014-10-23 13:45:36.956698637 +0200
|
|
||||||
@@ -26,7 +26,7 @@
|
|
||||||
#endif
|
|
||||||
#elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
|
|
||||||
defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
|
|
||||||
- defined(ATL_ARCH_POWER7)
|
|
||||||
+ defined(ATL_ARCH_POWER7) || 1
|
|
||||||
#ifdef __GNUC__
|
|
||||||
#define ATL_membarrier __asm__ __volatile__ ("dcs")
|
|
||||||
/* #define ATL_USEPCA 1 */
|
|
|
@ -1,60 +0,0 @@
|
||||||
--- atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2013-12-05 19:19:57.000000000 +0100
|
|
||||||
+++ atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c.new 2013-12-06 16:29:57.000000000 +0100
|
|
||||||
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
|
|
||||||
const TYPE beta, TYPE *C, const int ldc)
|
|
||||||
(r10) 8(r1)
|
|
||||||
*******************************************************************************
|
|
||||||
-64 bit ABIs:
|
|
||||||
+64 bit ABIv1s:
|
|
||||||
r3 r4 r5 r6/f1
|
|
||||||
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
|
||||||
r7 r8 r9 r10
|
|
||||||
const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
|
||||||
f2 120(r1) 128(r1)
|
|
||||||
const TYPE beta, TYPE *C, const int ldc)
|
|
||||||
+
|
|
||||||
+64 bit ABIv2s:
|
|
||||||
+ r3 r4 r5 r6/f1
|
|
||||||
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
|
||||||
+ r7 r8 r9 r10
|
|
||||||
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
|
||||||
+ f2 104(r1) 112(r1)
|
|
||||||
+ const TYPE beta, TYPE *C, const int ldc)
|
|
||||||
#endif
|
|
||||||
#ifdef ATL_AS_AIX_PPC
|
|
||||||
.csect .text[PR]
|
|
||||||
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
|
|
||||||
.globl Mjoin(_,ATL_USERMM)
|
|
||||||
Mjoin(_,ATL_USERMM):
|
|
||||||
#else
|
|
||||||
- #if defined(ATL_USE64BITS)
|
|
||||||
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
|
||||||
/*
|
|
||||||
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
|
||||||
*/
|
|
||||||
@@ -217,6 +225,7 @@ ATL_USERMM:
|
|
||||||
.globl Mjoin(.,ATL_USERMM)
|
|
||||||
Mjoin(.,ATL_USERMM):
|
|
||||||
#else
|
|
||||||
+/* ppc64 have no longer function descriptors in ABIv2 */
|
|
||||||
.globl ATL_USERMM
|
|
||||||
ATL_USERMM:
|
|
||||||
#endif
|
|
||||||
@@ -257,9 +266,17 @@ ATL_USERMM:
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+
|
|
||||||
#if defined (ATL_USE64BITS)
|
|
||||||
+#if _CALL_ELF == 2
|
|
||||||
+/* ABIv2 */
|
|
||||||
+ ld pC0, 104(r1)
|
|
||||||
+ ld incCn, 112(r1)
|
|
||||||
+#else
|
|
||||||
+/* ABIv1 */
|
|
||||||
ld pC0, 120(r1)
|
|
||||||
ld incCn, 128(r1)
|
|
||||||
+#endif
|
|
||||||
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
|
||||||
lwz pC0, 68(r1)
|
|
||||||
lwz incCn, 72(r1)
|
|
|
@ -1,37 +0,0 @@
|
||||||
Subject: ppc64le remove vsx
|
|
||||||
From: Michel Normand <normand@fr.ibm.com>
|
|
||||||
|
|
||||||
temporarily remove the vsx related flags
|
|
||||||
as long as not supported for ppc64le
|
|
||||||
Note that also force as power4
|
|
||||||
|
|
||||||
Signed-off-by: Michel Normand <normand@fr.ibm.com>
|
|
||||||
diff -up ATLAS/CONFIG/src/atlcomp.txt.orig ATLAS/CONFIG/src/atlcomp.txt
|
|
||||||
--- ATLAS/CONFIG/src/atlcomp.txt.orig 2014-07-10 18:22:02.000000000 +0200
|
|
||||||
+++ ATLAS/CONFIG/src/atlcomp.txt 2015-07-09 09:44:07.270264073 +0200
|
|
||||||
@@ -191,9 +191,9 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
|
|
||||||
MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
|
|
||||||
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
|
|
||||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
|
||||||
- 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
|
|
||||||
+ 'gcc' '-O2 -m64 -mvrsave -funroll-all-loops'
|
|
||||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
|
|
||||||
- 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
|
|
||||||
+ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
|
|
||||||
MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
|
||||||
'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2'
|
|
||||||
MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
|
||||||
diff -up ATLAS/CONFIG/src/probe_comp.c.orig ATLAS/CONFIG/src/probe_comp.c
|
|
||||||
--- ATLAS/CONFIG/src/probe_comp.c.orig 2015-07-09 09:44:07.280264074 +0200
|
|
||||||
+++ ATLAS/CONFIG/src/probe_comp.c 2015-07-09 09:45:51.480266328 +0200
|
|
||||||
@@ -450,8 +450,8 @@ COMPNODE **GetDefaultComps(enum OSTYPE O
|
|
||||||
vp = "-mavx -mno-sse2avx -mfma";
|
|
||||||
else if ((vecexts & (1<<ISA_AVXMAC)))
|
|
||||||
vp = "-mavx2 -mfma";
|
|
||||||
- else if (vecexts & (1<<ISA_VSX))
|
|
||||||
- vp = "-mvsx";
|
|
||||||
+ /*else if (vecexts & (1<<ISA_VSX))
|
|
||||||
+ vp = "-mvsx";*/
|
|
||||||
else if (vecexts & (1<<ISA_AV))
|
|
||||||
vp = "-maltivec";
|
|
||||||
else if (vecexts & (1<<ISA_AVX))
|
|
1
sources
1
sources
|
@ -11,3 +11,4 @@ ebb4732aff468bbc223e7f734252173b USII32.tgz
|
||||||
f3e4ca175b5ffc49dce7f3c37f791827 IBMz964.tar.bz2
|
f3e4ca175b5ffc49dce7f3c37f791827 IBMz964.tar.bz2
|
||||||
27e7baa49c588299260188afa78303dc POWER332.tar.bz2
|
27e7baa49c588299260188afa78303dc POWER332.tar.bz2
|
||||||
af1f95e19d7afaf0342fb1377ec94817 ARMv732NEON.tar.bz2
|
af1f95e19d7afaf0342fb1377ec94817 ARMv732NEON.tar.bz2
|
||||||
|
a88a9bf2037174ac6d43eb65357a4465 POWER864LEVSXp4.tar.bz2
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
Subject: xlf.command.not.found
|
|
||||||
From: Michel Normand <normand@fr.ibm.com>
|
|
||||||
|
|
||||||
try to bypass error while building ppc64le
|
|
||||||
"make[2]: xlf: Command not found"
|
|
||||||
|
|
||||||
Signed-off-by: Michel Normand <normand@fr.ibm.com>
|
|
||||||
---
|
|
||||||
ATLAS/CONFIG/src/atlcomp.txt | 4 ++--
|
|
||||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
|
|
||||||
===================================================================
|
|
||||||
--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
|
|
||||||
+++ atlas/ATLAS/CONFIG/src/atlcomp.txt
|
|
||||||
@@ -199,7 +199,7 @@ MACH=POWER6 OS=ALL LVL=1010 COMPS=f77
|
|
||||||
MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
|
|
||||||
'gfortran' '-mcpu=power5 -mtune=power5 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
|
|
||||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
|
|
||||||
- 'xlf' '-qtune=pwr7 -qarch=pwr7 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
|
||||||
+ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
|
|
||||||
MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
|
|
||||||
'xlf' '-qtune=pwr5 -qarch=pwr5 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
|
||||||
MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dmc,smc,dkc,skc,xcc,gcc
|
|
Loading…
Reference in New Issue