patching for Power8 to pass performance tunings and tests on P8 builders
This commit is contained in:
parent
42cf841726
commit
8ffdb3e93b
32
atlas.spec
32
atlas.spec
|
@ -5,7 +5,7 @@ Version: 3.10.1
|
|||
%if "%{?enable_native_atlas}" != "0"
|
||||
%define dist .native
|
||||
%endif
|
||||
Release: 15%{?dist}
|
||||
Release: 16%{?dist}
|
||||
Summary: Automatically Tuned Linear Algebra Software
|
||||
|
||||
Group: System Environment/Libraries
|
||||
|
@ -46,6 +46,14 @@ Patch8: atlas-genparse.patch
|
|||
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
||||
|
||||
# ppc64le patches
|
||||
Patch95: initialize_malloc_memory.invtrsm.wms.oct23.patch
|
||||
Patch96: xlf.command.not.found.patch
|
||||
Patch98: getdoublearr.stripwhite.patch
|
||||
Patch99: ppc64le-remove-vsx.patch
|
||||
Patch100: ppc64le-abiv2.patch
|
||||
Patch110: p8-mem-barrier.patch
|
||||
|
||||
BuildRequires: gcc-gfortran
|
||||
|
||||
Provides: bundled(lapack)
|
||||
|
@ -334,6 +342,16 @@ cp %{SOURCE13} CONFIG/ARCHS/
|
|||
cp %{SOURCE14} CONFIG/ARCHS/
|
||||
#cp %{SOURCE8} CONFIG/ARCHS/
|
||||
#cp %{SOURCE9} CONFIG/ARCHS/
|
||||
|
||||
%ifarch ppc64le
|
||||
%patch99 -p2
|
||||
%patch98 -p2
|
||||
%patch96 -p2
|
||||
%patch95 -p2
|
||||
%patch100 -p2
|
||||
%patch110 -p1
|
||||
%endif
|
||||
|
||||
%ifarch %{arm}
|
||||
# Set arm flags in atlcomp.txt
|
||||
sed -i -e 's,-mfpu=vfpv3,-mfpu=neon,' CONFIG/src/atlcomp.txt
|
||||
|
@ -344,6 +362,7 @@ sed -i -e 's,-mfpu=vfpv3,,' tune/blas/gemm/CASES/*.flg
|
|||
# Debug
|
||||
#sed -i -e 's,> \(.*\)/ptsanity.out,> \1/ptsanity.out || cat \1/ptsanity.out \&\& exit 1,' makes/Make.*
|
||||
|
||||
|
||||
%build
|
||||
for type in %{types}; do
|
||||
if [ "$type" = "base" ]; then
|
||||
|
@ -469,6 +488,14 @@ for type in %{types}; do
|
|||
sed -i 's#-m64#-m32#g' Make.inc
|
||||
%endif
|
||||
|
||||
%ifarch ppc64le
|
||||
sed -i 's#-mvsx##g' Make.inc
|
||||
sed -i 's#-DATL_VSX##g' Make.inc
|
||||
sed -i 's#-DATL_AltiVec##g' Make.inc
|
||||
sed -i 's#-maltivec##g' Make.inc
|
||||
sed -i 's#ARCH =.*#ARCH = POWER464#' Make.inc
|
||||
%endif
|
||||
|
||||
%endif
|
||||
make build
|
||||
cd lib
|
||||
|
@ -821,6 +848,9 @@ fi
|
|||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Oct 31 2014 Jaromir Capik <jcapik@redhat.com> - 3.10.1-16
|
||||
- patching for Power8 to pass performance tunings and tests on P8 builders
|
||||
|
||||
* Fri Oct 24 2014 Frantisek Kluknavsky <fkluknav@redhat.com> - 3.10.1-15
|
||||
- added pkgconfig file
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
Subject: getdoublearr.stripwhite
|
||||
From: Michel Normand <normand@fr.ibm.com>
|
||||
|
||||
GetDoubleArr must only handle the comma delimited list at string head
|
||||
and ignore anything after the first blank character.
|
||||
|
||||
Signed-off-by: Michel Normand <normand@fr.ibm.com>
|
||||
---
|
||||
ATLAS/include/atlas_genparse.h | 16 ++++++++++++++--
|
||||
1 file changed, 14 insertions(+), 2 deletions(-)
|
||||
|
||||
Index: atlas/ATLAS/include/atlas_genparse.h
|
||||
===================================================================
|
||||
--- atlas.orig/ATLAS/include/atlas_genparse.h
|
||||
+++ atlas/ATLAS/include/atlas_genparse.h
|
||||
@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str)
|
||||
}
|
||||
|
||||
/* procedure 7 */
|
||||
-static int GetDoubleArr(char *str, int N, double *d)
|
||||
+static int GetDoubleArr(char *callerstr, int N, double *d)
|
||||
/*
|
||||
* Reads in a list with form "%le,%le...,%le"; N-length d recieves doubles.
|
||||
* RETURNS: the number of doubles found, or N, whichever is less
|
||||
*/
|
||||
{
|
||||
- int i=1;
|
||||
+ int i;
|
||||
+ char *dupstr = DupString(callerstr);
|
||||
+ char *str = dupstr;
|
||||
+ /* strip the string to end on first white space */
|
||||
+ for (i=0; dupstr[i]; i++)
|
||||
+ {
|
||||
+ if (isspace(dupstr[i])) {
|
||||
+ dupstr[i] = '\0';
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ i = 1;
|
||||
assert(sscanf(str, "%le", d) == 1);
|
||||
while (i < N)
|
||||
{
|
||||
@@ -166,6 +177,7 @@ static int GetDoubleArr(char *str, int N
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
+ free(dupstr);
|
||||
return(i);
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
--- ./ATLAS.first/tune/blas/level3/invtrsm.c 2013-10-22 19:35:03.000000000 +0000
|
||||
+++ ./ATLAS/tune/blas/level3/invtrsm.c 2013-10-23 21:24:01.000000000 +0000
|
||||
@@ -525,6 +525,7 @@
|
||||
a = A = malloc(i * ATL_MulBySize(incA));
|
||||
if (A)
|
||||
{
|
||||
+ memset(A,0,i*ATL_MulBySize(incA)); /* wms (!!) malloc call above returns non-initialized memory. */
|
||||
if (Uplo == TestGE)
|
||||
for (i=0; i < k; i++)
|
||||
Mjoin(PATL,gegen)(N, N, A+i*incA, lda, N+lda);
|
|
@ -0,0 +1,12 @@
|
|||
diff -Naur ATLAS.orig/include/atlas_pca.h ATLAS/include/atlas_pca.h
|
||||
--- ATLAS.orig/include/atlas_pca.h 2013-01-08 19:15:40.000000000 +0100
|
||||
+++ ATLAS/include/atlas_pca.h 2014-10-23 13:45:36.956698637 +0200
|
||||
@@ -26,7 +26,7 @@
|
||||
#endif
|
||||
#elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
|
||||
defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
|
||||
- defined(ATL_ARCH_POWER7)
|
||||
+ defined(ATL_ARCH_POWER7) || 1
|
||||
#ifdef __GNUC__
|
||||
#define ATL_membarrier __asm__ __volatile__ ("dcs")
|
||||
/* #define ATL_USEPCA 1 */
|
|
@ -0,0 +1,60 @@
|
|||
--- atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2013-12-05 19:19:57.000000000 +0100
|
||||
+++ atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c.new 2013-12-06 16:29:57.000000000 +0100
|
||||
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
|
||||
const TYPE beta, TYPE *C, const int ldc)
|
||||
(r10) 8(r1)
|
||||
*******************************************************************************
|
||||
-64 bit ABIs:
|
||||
+64 bit ABIv1s:
|
||||
r3 r4 r5 r6/f1
|
||||
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
||||
r7 r8 r9 r10
|
||||
const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
||||
f2 120(r1) 128(r1)
|
||||
const TYPE beta, TYPE *C, const int ldc)
|
||||
+
|
||||
+64 bit ABIv2s:
|
||||
+ r3 r4 r5 r6/f1
|
||||
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
||||
+ r7 r8 r9 r10
|
||||
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
||||
+ f2 104(r1) 112(r1)
|
||||
+ const TYPE beta, TYPE *C, const int ldc)
|
||||
#endif
|
||||
#ifdef ATL_AS_AIX_PPC
|
||||
.csect .text[PR]
|
||||
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
|
||||
.globl Mjoin(_,ATL_USERMM)
|
||||
Mjoin(_,ATL_USERMM):
|
||||
#else
|
||||
- #if defined(ATL_USE64BITS)
|
||||
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||
/*
|
||||
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||
*/
|
||||
@@ -217,6 +225,7 @@ ATL_USERMM:
|
||||
.globl Mjoin(.,ATL_USERMM)
|
||||
Mjoin(.,ATL_USERMM):
|
||||
#else
|
||||
+/* ppc64 have no longer function descriptors in ABIv2 */
|
||||
.globl ATL_USERMM
|
||||
ATL_USERMM:
|
||||
#endif
|
||||
@@ -257,9 +266,17 @@ ATL_USERMM:
|
||||
#endif
|
||||
#endif
|
||||
|
||||
+
|
||||
#if defined (ATL_USE64BITS)
|
||||
+#if _CALL_ELF == 2
|
||||
+/* ABIv2 */
|
||||
+ ld pC0, 104(r1)
|
||||
+ ld incCn, 112(r1)
|
||||
+#else
|
||||
+/* ABIv1 */
|
||||
ld pC0, 120(r1)
|
||||
ld incCn, 128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
||||
lwz pC0, 68(r1)
|
||||
lwz incCn, 72(r1)
|
|
@ -0,0 +1,39 @@
|
|||
Subject: ppc64le remove vsx
|
||||
From: Michel Normand <normand@fr.ibm.com>
|
||||
|
||||
temporarily remove the vsx related flags
|
||||
as long as not supported for ppc64le
|
||||
Note that also force as power4
|
||||
|
||||
Signed-off-by: Michel Normand <normand@fr.ibm.com>
|
||||
Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
|
||||
===================================================================
|
||||
--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
|
||||
+++ atlas/ATLAS/CONFIG/src/atlcomp.txt
|
||||
@@ -187,9 +187,9 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
|
||||
MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
|
||||
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
|
||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||
- 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
|
||||
+ 'gcc' '-O2 -m64 -mvrsave -funroll-all-loops'
|
||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
|
||||
- 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
|
||||
+ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
|
||||
MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||
'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2'
|
||||
MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||
Index: atlas/ATLAS/CONFIG/src/probe_comp.c
|
||||
===================================================================
|
||||
--- atlas.orig/ATLAS/CONFIG/src/probe_comp.c
|
||||
+++ atlas/ATLAS/CONFIG/src/probe_comp.c
|
||||
@@ -446,8 +446,8 @@ COMPNODE **GetDefaultComps(enum OSTYPE O
|
||||
|
||||
if ((vecexts & (1<<ISA_AVXFMA4)) && arch == AmdDozer)
|
||||
vp = "-msse4.2 -mfma4";
|
||||
- else if (vecexts & (1<<ISA_VSX))
|
||||
- vp = "-mvsx";
|
||||
+ /*else if (vecexts & (1<<ISA_VSX))
|
||||
+ vp = "-mvsx";*/
|
||||
else if (vecexts & (1<<ISA_AV))
|
||||
vp = "-maltivec";
|
||||
else if (vecexts & (1<<ISA_AVX))
|
|
@ -0,0 +1,24 @@
|
|||
Subject: xlf.command.not.found
|
||||
From: Michel Normand <normand@fr.ibm.com>
|
||||
|
||||
try to bypass error while building ppc64le
|
||||
"make[2]: xlf: Command not found"
|
||||
|
||||
Signed-off-by: Michel Normand <normand@fr.ibm.com>
|
||||
---
|
||||
ATLAS/CONFIG/src/atlcomp.txt | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
Index: atlas/ATLAS/CONFIG/src/atlcomp.txt
|
||||
===================================================================
|
||||
--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt
|
||||
+++ atlas/ATLAS/CONFIG/src/atlcomp.txt
|
||||
@@ -199,7 +199,7 @@ MACH=POWER6 OS=ALL LVL=1010 COMPS=f77
|
||||
MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
|
||||
'gfortran' '-mcpu=power5 -mtune=power5 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
|
||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
|
||||
- 'xlf' '-qtune=pwr7 -qarch=pwr7 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
||||
+ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops'
|
||||
MACH=POWER5 OS=ALL LVL=1010 COMPS=f77
|
||||
'xlf' '-qtune=pwr5 -qarch=pwr5 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
||||
MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dmc,smc,dkc,skc,xcc,gcc
|
Loading…
Reference in New Issue