Compare commits
19 Commits
Author | SHA1 | Date |
---|---|---|
Fedora Release Engineering | 3a167b07eb | |
Fedora Release Engineering | 5734674256 | |
Fedora Release Engineering | a3b3418e94 | |
Jakub Martisko | 1a1186f8d1 | |
Jakub Martisko | b5460c6f32 | |
Jakub Martisko | 9be268e969 | |
Jakub Martisko | 10df96cb29 | |
Fedora Release Engineering | 0aa9d04485 | |
Tom Stellard | 7eb291bdaa | |
Jakub Martisko | bb25731117 | |
Jakub Martisko | 9a83c6c2ac | |
Fedora Release Engineering | 03729bff03 | |
Jakub Martisko | 64fd4d5d97 | |
Jakub Martisko | 31b9a6429f | |
Fedora Release Engineering | eb55349d9e | |
Fedora Release Engineering | 0cceea8722 | |
Igor Gnatenko | 2eca604087 | |
Peter Robinson | e42eaa8726 | |
Fedora Release Engineering | 06105294d3 |
|
@ -10,3 +10,6 @@ K7323DNow.tgz
|
|||
/lapack-3.5.0.tgz
|
||||
/atlas3.10.2.tar.bz2
|
||||
/POWER864LEVSXp4.tar.bz2
|
||||
/IBMz1364VXZ.tar.bz2
|
||||
/IBMz1464VXZ2.tar.bz2
|
||||
/IBMz1564VXZ2.tar.bz2
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Tue, 19 Feb 2019 19:03:52 +0100
|
||||
Subject: [PATCH 1/8] Avoid c99 standard compiler
|
||||
|
||||
When probing for a usable GCC, the existing code already dropped path
|
||||
names that contained "c89" or "c90", because these compilers don't have
|
||||
the GCC extensions enabled. This patch also drops names with "c99" in
|
||||
them.
|
||||
---
|
||||
CONFIG/src/atlconf_misc.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c
|
||||
index 63cb1ef..fb62214 100644
|
||||
--- a/CONFIG/src/atlconf_misc.c
|
||||
+++ b/CONFIG/src/atlconf_misc.c
|
||||
@@ -824,7 +824,8 @@ int CompIsGcc(char *comp)
|
||||
int i;
|
||||
|
||||
cmpname = NameWithoutPath(comp);
|
||||
- if (strstr(cmpname, "c89") || strstr(cmpname, "c90"))
|
||||
+ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") ||
|
||||
+ strstr(cmpname, "c99"))
|
||||
{
|
||||
free(cmpname);
|
||||
return(0);
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Tue, 19 Feb 2019 19:20:19 +0100
|
||||
Subject: [PATCH 2/8] Fix -rpath-link command line options
|
||||
|
||||
The "-rpath-link" command line options were written in the wrong syntax,
|
||||
causing errors in the build. This is fixed.
|
||||
---
|
||||
makes/Make.lib | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/makes/Make.lib b/makes/Make.lib
|
||||
index 4ceff02..b322a32 100644
|
||||
--- a/makes/Make.lib
|
||||
+++ b/makes/Make.lib
|
||||
@@ -47,11 +47,11 @@ cshared : fat_cshared
|
||||
#
|
||||
LDTRY_WIN:
|
||||
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
|
||||
- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \
|
||||
+ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \
|
||||
--whole-archive $(libas) --no-whole-archive $(LIBS)
|
||||
GCCTRY_WIN:
|
||||
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
|
||||
- -Wl,"-rpath-link $(LIBINSTdir)" \
|
||||
+ -Wl,"-rpath-link=$(LIBINSTdir)" \
|
||||
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
|
||||
GCCTRY_norp_WIN:
|
||||
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
|
||||
@@ -113,7 +113,7 @@ TRYALL_WIN :
|
||||
#
|
||||
LDTRY:
|
||||
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
|
||||
- -rpath-link $(LIBINSTdir) \
|
||||
+ -rpath-link=$(LIBINSTdir) \
|
||||
--whole-archive $(libas) --no-whole-archive $(LIBS)
|
||||
GCCTRY:
|
||||
$(GOODGCC) -shared -o $(outso).$(so_ver) \
|
|
@ -0,0 +1,55 @@
|
|||
From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 5 Dec 2018 18:59:15 +0100
|
||||
Subject: [PATCH 3/8] Fix SIMD support on IBM z13
|
||||
|
||||
The header file atlas_simd.h contained a syntax error and a few functional
|
||||
errors that affected IBM z13. It prevented any SIMD kernels from being
|
||||
compiled successfully for that platform. This is fixed. The macro
|
||||
vec_madd is avoided, because some GCC versions don't implement it
|
||||
correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used
|
||||
instead.
|
||||
---
|
||||
include/atlas_simd.h | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
|
||||
index baee6b1..68daf79 100644
|
||||
--- a/include/atlas_simd.h
|
||||
+++ b/include/atlas_simd.h
|
||||
@@ -69,7 +69,7 @@
|
||||
#define ATL_FRCGNUVEC
|
||||
#endif
|
||||
#elif defined(ATL_VXZ)
|
||||
- #if ATL_VLEN != 2;
|
||||
+ #if ATL_VLEN != 2
|
||||
#define ATL_FRCGNUVEC
|
||||
#endif
|
||||
#elif defined(ATL_NEON)
|
||||
@@ -390,19 +390,19 @@
|
||||
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
|
||||
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
|
||||
#endif
|
||||
- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
|
||||
+ #define ATL_vzero(v_) v_ = vec_splats((double)0.0)
|
||||
#define ATL_vcopy(d_, s_) d_ = s_
|
||||
- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
|
||||
+ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_)))
|
||||
#define ATL_vuld(v_, p_) ATL_vld(v_, p_)
|
||||
#define ATL_vust(p_, v_) ATL_vst(p_, v_)
|
||||
#define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
|
||||
#define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
|
||||
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
|
||||
- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_)
|
||||
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
|
||||
#define ATL_vvrsum1(s0_) \
|
||||
{ ATL_VTYPE t_;\
|
||||
t_ = vec_splat(s0_, 1); \
|
||||
- s0 += t_; \
|
||||
+ s0_ += t_; \
|
||||
}
|
||||
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 12 Dec 2018 19:44:32 +0100
|
||||
Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible
|
||||
|
||||
The probing of the L1 data cache size is sometimes not reliable. This can
|
||||
cause the tuning to yield varying, sub-obtimal results. But on Linux the
|
||||
L1 data cache size can usually be retrieved with sysconf instead, which is
|
||||
faster and more reliable. Do this whenever possible.
|
||||
---
|
||||
tune/sysinfo/L1CacheSize.c | 12 +++++++++++-
|
||||
1 file changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c
|
||||
index e62a273..dffa76e 100644
|
||||
--- a/tune/sysinfo/L1CacheSize.c
|
||||
+++ b/tune/sysinfo/L1CacheSize.c
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
|
||||
#define REPS 4096
|
||||
|
||||
@@ -276,7 +277,16 @@ int main(int nargs, char *args[])
|
||||
exit(-1);
|
||||
}
|
||||
if (nargs > 1) MaxSize = atoi(args[1]);
|
||||
- L1Size = GetL1Size(MaxSize, 1.08);
|
||||
+
|
||||
+#ifdef _SC_LEVEL1_DCACHE_SIZE
|
||||
+ {
|
||||
+ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE);
|
||||
+ L1Size = res > 0 ? (int) (res / 1024) : 0;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ if (!L1Size)
|
||||
+ L1Size = GetL1Size(MaxSize, 1.08);
|
||||
if (!L1Size)
|
||||
L1Size = GetL1Size(MaxSize, 1.08);
|
||||
if (!L1Size)
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 12 Dec 2018 20:06:27 +0100
|
||||
Subject: [PATCH 5/8] Optimizations for IBM z13
|
||||
|
||||
Perform some optimizations for IBM z13:
|
||||
- Compile with -O2 instead of -O.
|
||||
- Streamline vector loads/stores.
|
||||
- Define the vvrsum2 macro.
|
||||
|
||||
Also, use the compile option -march=z13 instead of -march=native.
|
||||
---
|
||||
CONFIG/src/atlcomp.txt | 8 +++-----
|
||||
include/atlas_simd.h | 11 +++++------
|
||||
2 files changed, 8 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
|
||||
index aa31604..2ac71cf 100644
|
||||
--- a/CONFIG/src/atlcomp.txt
|
||||
+++ b/CONFIG/src/atlcomp.txt
|
||||
@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77
|
||||
'gfortran' '-O3 -funroll-loops'
|
||||
MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
'gcc' '-O3 -funroll-loops'
|
||||
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc
|
||||
- 'gcc' '-march=native -O -mvx -mzvector'
|
||||
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc
|
||||
- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2'
|
||||
+MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
+ 'gcc' '-march=z13 -mtune=z13 -O2'
|
||||
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
|
||||
- 'gfortran' '-march=native -O -mvx -mzvector'
|
||||
+ 'gfortran' '-march=z13 -mtune=z13 -O2'
|
||||
#
|
||||
# Windows defaults ; need to make SSE/SSE2 arch dep.
|
||||
#
|
||||
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
|
||||
index 68daf79..f171933 100644
|
||||
--- a/include/atlas_simd.h
|
||||
+++ b/include/atlas_simd.h
|
||||
@@ -384,8 +384,8 @@
|
||||
#endif
|
||||
#define ATL_VTYPE vector double
|
||||
#if (defined(DREAL) || defined(DCPLX))
|
||||
- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; }
|
||||
- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];}
|
||||
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
|
||||
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
|
||||
#else
|
||||
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
|
||||
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
|
||||
@@ -400,10 +400,9 @@
|
||||
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
|
||||
#define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
|
||||
#define ATL_vvrsum1(s0_) \
|
||||
- { ATL_VTYPE t_;\
|
||||
- t_ = vec_splat(s0_, 1); \
|
||||
- s0_ += t_; \
|
||||
- }
|
||||
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
|
||||
+ #define ATL_vvrsum2(s0_, s1_) \
|
||||
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
|
||||
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX))
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -0,0 +1,276 @@
|
|||
From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 25 Mar 2020 20:11:19 +0100
|
||||
Subject: [PATCH 6/8] Add IBM z14 support
|
||||
|
||||
Add general support for IBM z14. Also detect and handle the vector
|
||||
enhancements facility 1, which specifically adds single-precision FP
|
||||
arithmetic for vectors.
|
||||
---
|
||||
CONFIG/include/atlconf.h | 14 ++++----
|
||||
CONFIG/src/Makefile | 6 ++++
|
||||
CONFIG/src/atlcomp.txt | 4 +++
|
||||
CONFIG/src/backend/Make.ext | 4 ++-
|
||||
CONFIG/src/backend/archinfo_linux.c | 3 +-
|
||||
CONFIG/src/backend/probe_vxz2.c | 12 +++++++
|
||||
CONFIG/src/probe_comp.c | 3 +-
|
||||
include/atlas_prefetch.h | 3 +-
|
||||
include/atlas_simd.h | 53 +++++++++++++++++++++++++++++
|
||||
9 files changed, 91 insertions(+), 11 deletions(-)
|
||||
create mode 100644 CONFIG/src/backend/probe_vxz2.c
|
||||
|
||||
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
|
||||
index e51d56d..3828fdb 100644
|
||||
--- a/CONFIG/include/atlconf.h
|
||||
+++ b/CONFIG/include/atlconf.h
|
||||
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||
* Corei3EP: v3 Haswell, E5-26XX
|
||||
* Corei4: skylake
|
||||
*/
|
||||
-#define NMACH 62
|
||||
+#define NMACH 63
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
|
||||
"POWER6", "POWER7", "POWER8", "POWERe6500",
|
||||
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13",
|
||||
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
|
||||
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
|
||||
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
|
||||
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
|
||||
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
|
||||
"ARM64xgene1", "ARM64a53", "ARM64a57"};
|
||||
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
|
||||
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */
|
||||
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
|
||||
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
|
||||
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
|
||||
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
#define MachIsARM64(mach_) \
|
||||
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
|
||||
#define MachIsS390(mach_) \
|
||||
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 )
|
||||
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
|
||||
|
||||
|
||||
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
|
||||
@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase};
|
||||
enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort};
|
||||
enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr};
|
||||
|
||||
-#define NISA 15
|
||||
+#define NISA 16
|
||||
static char *ISAXNAM[NISA] =
|
||||
- {"", "VSX", "VXZ", "AltiVec",
|
||||
+ {"", "VSX", "VXZ2", "VXZ", "AltiVec",
|
||||
"AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow",
|
||||
"FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"};
|
||||
enum ISAEXT
|
||||
- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV,
|
||||
+ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV,
|
||||
ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow,
|
||||
ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC};
|
||||
|
||||
diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile
|
||||
index 212b9d7..782a4cf 100644
|
||||
--- a/CONFIG/src/Makefile
|
||||
+++ b/CONFIG/src/Makefile
|
||||
@@ -158,6 +158,12 @@ IRun_NEON :
|
||||
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \
|
||||
redir=config0.out
|
||||
- cat config0.out
|
||||
+IRun_VXZ2 :
|
||||
+ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \
|
||||
+ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c
|
||||
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \
|
||||
+ redir=config0.out
|
||||
+ - cat config0.out
|
||||
IRun_VXZ :
|
||||
$(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \
|
||||
$(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c
|
||||
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
|
||||
index 2ac71cf..2cfacc2 100644
|
||||
--- a/CONFIG/src/atlcomp.txt
|
||||
+++ b/CONFIG/src/atlcomp.txt
|
||||
@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
'gcc' '-march=z13 -mtune=z13 -O2'
|
||||
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
|
||||
'gfortran' '-march=z13 -mtune=z13 -O2'
|
||||
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
+ 'gcc' '-march=z14 -mtune=z14 -O2'
|
||||
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
|
||||
+ 'gfortran' '-march=z14 -mtune=z14 -O2'
|
||||
#
|
||||
# Windows defaults ; need to make SSE/SSE2 arch dep.
|
||||
#
|
||||
diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext
|
||||
index 4743353..794babf 100644
|
||||
--- a/CONFIG/src/backend/Make.ext
|
||||
+++ b/CONFIG/src/backend/Make.ext
|
||||
@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \
|
||||
probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \
|
||||
probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \
|
||||
probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \
|
||||
- probe_vxz.c
|
||||
+ probe_vxz2.c probe_vxz.c
|
||||
|
||||
all : $(files)
|
||||
|
||||
@@ -107,6 +107,8 @@ flibchkF.f : $(basf)
|
||||
$(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f
|
||||
probe_arm32_FPABI.c : $(basf)
|
||||
$(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI
|
||||
+probe_vxz2.c : $(basf)
|
||||
+ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2
|
||||
probe_vxz.c : $(basf)
|
||||
$(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz
|
||||
probe_aff_SETAFFNP.c : $(basf)
|
||||
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
|
||||
index cdcee92..ed6f476 100644
|
||||
--- a/CONFIG/src/backend/archinfo_linux.c
|
||||
+++ b/CONFIG/src/backend/archinfo_linux.c
|
||||
@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch()
|
||||
else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196;
|
||||
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
|
||||
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
|
||||
- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */
|
||||
+ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
|
||||
+ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
|
||||
free(res);
|
||||
}
|
||||
break;
|
||||
diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c
|
||||
new file mode 100644
|
||||
index 0000000..a69d92d
|
||||
--- /dev/null
|
||||
+++ b/CONFIG/src/backend/probe_vxz2.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+#include <vecintrin.h>
|
||||
+void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y
|
||||
+{
|
||||
+ vector float vx, vy;
|
||||
+ vx = (vector float) {x[0], x[1], x[2], x[3]};
|
||||
+ vy = (vector float) {y[0], y[1], y[2], y[3]};
|
||||
+ vy += vx;
|
||||
+ z[0] = vy[0];
|
||||
+ z[1] = vy[1];
|
||||
+ z[2] = vy[2];
|
||||
+ z[3] = vy[3];
|
||||
+}
|
||||
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
|
||||
index 1652e24..857ea82 100644
|
||||
--- a/CONFIG/src/probe_comp.c
|
||||
+++ b/CONFIG/src/probe_comp.c
|
||||
@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb,
|
||||
vp = "-mavx2 -mfma";
|
||||
else if (vecexts & (1<<ISA_VSX))
|
||||
vp = "-mvsx";
|
||||
- else if (vecexts & (1<<ISA_VXZ))
|
||||
+ else if ((vecexts & (1<<ISA_VXZ)) || (vecexts & (1<<ISA_VXZ2)))
|
||||
vp = "-mvx -mzvector";
|
||||
else if (vecexts & (1<<ISA_AV))
|
||||
vp = "-maltivec";
|
||||
@@ -1207,6 +1207,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
|
||||
{
|
||||
case IbmZ12:
|
||||
case IbmZ13:
|
||||
+ case IbmZ14:
|
||||
case IntCorei3:
|
||||
case IntCorei4:
|
||||
case IntCorei2:
|
||||
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
|
||||
index e7988a7..fa426ac 100644
|
||||
--- a/include/atlas_prefetch.h
|
||||
+++ b/include/atlas_prefetch.h
|
||||
@@ -155,7 +155,8 @@
|
||||
#define ATL_L1LS 32
|
||||
#define ATL_L2LS 64
|
||||
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
|
||||
- defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13)
|
||||
+ defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
|
||||
+ defined(ATL_ARCH_IbmZ14)
|
||||
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
|
||||
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
|
||||
#define ATL_GOT_L1PREFETCH
|
||||
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
|
||||
index f171933..eb75577 100644
|
||||
--- a/include/atlas_simd.h
|
||||
+++ b/include/atlas_simd.h
|
||||
@@ -68,6 +68,11 @@
|
||||
((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
|
||||
#define ATL_FRCGNUVEC
|
||||
#endif
|
||||
+ #elif defined(ATL_VXZ2)
|
||||
+ #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \
|
||||
+ ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
|
||||
+ #define ATL_FRCGNUVEC
|
||||
+ #endif
|
||||
#elif defined(ATL_VXZ)
|
||||
#if ATL_VLEN != 2
|
||||
#define ATL_FRCGNUVEC
|
||||
@@ -113,6 +118,12 @@
|
||||
#else
|
||||
#define ATL_VLEN 2
|
||||
#endif
|
||||
+ #elif defined(ATL_VXZ2)
|
||||
+ #if defined(SREAL) || defined(SCPLX)
|
||||
+ #define ATL_VLEN 4
|
||||
+ #else
|
||||
+ #define ATL_VLEN 2
|
||||
+ #endif
|
||||
#elif defined(ATL_VXZ)
|
||||
#define ATL_VLEN 2
|
||||
#elif defined(ATL_NEON)
|
||||
@@ -376,6 +387,48 @@
|
||||
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
#endif
|
||||
+#elif defined(ATL_VXZ2)
|
||||
+ #include <vecintrin.h>
|
||||
+
|
||||
+ #define ATL_VPERMI(s_, t_, i_) \
|
||||
+ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_))
|
||||
+
|
||||
+ #if defined(SREAL) || defined(SCPLX)
|
||||
+ #define ATL_VTYPE vector float
|
||||
+ #if ATL_VLEN != 4
|
||||
+ #error "VSXZ2 supports only VLEN = 4 for floats!"
|
||||
+ #endif
|
||||
+ //#define ATL_vvrsum4(s0_, s1_, s2_, s3_) \
|
||||
+ //{ ATL_VTYPE t0_, t1_; \
|
||||
+ // t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \
|
||||
+ // t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \
|
||||
+ // s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \
|
||||
+ //}
|
||||
+ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2)
|
||||
+ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3)
|
||||
+ #else /* double precision */
|
||||
+ #define ATL_VTYPE vector double
|
||||
+ #if ATL_VLEN != 2
|
||||
+ #error "VSXZ2 supports only VLEN = 2 for doubles!"
|
||||
+ #endif
|
||||
+ #define ATL_vvrsum1(s0_) \
|
||||
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
|
||||
+ #define ATL_vvrsum2(s0_, s1_) \
|
||||
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
|
||||
+ #endif
|
||||
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
|
||||
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
|
||||
+ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
|
||||
+ #define ATL_vcopy(d_, s_) d_ = s_
|
||||
+ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
|
||||
+ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_))
|
||||
+ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_))
|
||||
+ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
|
||||
+ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
|
||||
+ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
|
||||
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
|
||||
+ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
|
||||
+ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
|
||||
#elif defined(ATL_VXZ)
|
||||
#include <vecintrin.h>
|
||||
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -0,0 +1,265 @@
|
|||
From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 29 May 2019 17:51:34 +0200
|
||||
Subject: [PATCH 7/8] Enable "cross-compile"
|
||||
|
||||
This adds support for building ATLAS without running any target code. In
|
||||
order for this to work, the archdefs must contain some additional files
|
||||
that would otherwise be built during various tuning steps; see the new
|
||||
targets extra_get and extra_put in "CONFIG/ARCHS/Makefile".
|
||||
|
||||
Even if the archdefs contain these additional files, cross compilation
|
||||
is *not* automatically enabled. To activate it and disable tuning at
|
||||
build time, add the option "-Si archdef 2" when running "configure".
|
||||
---
|
||||
CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++
|
||||
bin/atlas_install.c | 2 ++
|
||||
makes/Make.aux | 10 +++++-----
|
||||
makes/Make.bin | 22 ++++++++++++++++++++++
|
||||
makes/Make.l3tune | 6 ++++++
|
||||
makes/Make.sysinfo | 8 +++++++-
|
||||
6 files changed, 66 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile
|
||||
index 321e05c..e61b5a0 100644
|
||||
--- a/CONFIG/ARCHS/Makefile
|
||||
+++ b/CONFIG/ARCHS/Makefile
|
||||
@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt
|
||||
- cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/.
|
||||
rm -f xnegflt
|
||||
archput : sys_put kern_put gemm_put la_put
|
||||
+
|
||||
+ifdef ATL_NOTUNE
|
||||
+
|
||||
+# To avoid tuning, some extra files are needed.
|
||||
+
|
||||
+extra_get :
|
||||
+ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/
|
||||
+ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/
|
||||
+
|
||||
+extra_put :
|
||||
+ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/.
|
||||
+ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/.
|
||||
+
|
||||
+ArchNew : extra_get
|
||||
+
|
||||
+archput : extra_put
|
||||
+
|
||||
+endif
|
||||
diff --git a/bin/atlas_install.c b/bin/atlas_install.c
|
||||
index de3eb3a..3c811e6 100644
|
||||
--- a/bin/atlas_install.c
|
||||
+++ b/bin/atlas_install.c
|
||||
@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA)
|
||||
ATL_Cassert(system("make IBozoL1.grd\n")==0,
|
||||
"USING BOZO L1 DEFAULTS", NULL);
|
||||
}
|
||||
+ if (ARCHDEF >= 2)
|
||||
+ setenv("ATL_NOTUNE", "1", 1);
|
||||
if (ARCHDEF)
|
||||
DefInstall = !system("make IArchDef.grd\n");
|
||||
|
||||
diff --git a/makes/Make.aux b/makes/Make.aux
|
||||
index 1f769c8..c793028 100644
|
||||
--- a/makes/Make.aux
|
||||
+++ b/makes/Make.aux
|
||||
@@ -113,23 +113,23 @@ clean :
|
||||
|
||||
$(ATLFWAIT) :
|
||||
cd $(BINdir) ; $(MAKE) xatlas_waitfile
|
||||
-$(INCAdir)/atlas_type.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_type.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_type.h
|
||||
sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h
|
||||
dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h
|
||||
cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h
|
||||
zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h
|
||||
-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT)
|
||||
+$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT)
|
||||
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h
|
||||
$(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h
|
||||
|
||||
diff --git a/makes/Make.bin b/makes/Make.bin
|
||||
index 1035cb9..acad578 100644
|
||||
--- a/makes/Make.bin
|
||||
+++ b/makes/Make.bin
|
||||
@@ -163,7 +163,9 @@ IRunMADef :
|
||||
cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre)
|
||||
|
||||
IRunMMDef :
|
||||
+ifndef ATL_NOTUNE
|
||||
cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre)
|
||||
+endif
|
||||
cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2
|
||||
cd $(MMTdir) ; $(MAKE) install pre=$(pre)
|
||||
IKillL1 : force_build
|
||||
@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \
|
||||
cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/.
|
||||
|
||||
$(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(R1Tdir) ; $(MAKE) $(pre)r2install
|
||||
+else
|
||||
cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre)
|
||||
+endif
|
||||
$(R1Tdir)/res/$(pre)R1K.sum : force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(R1Tdir) ; $(MAKE) $(pre)r1install
|
||||
+else
|
||||
cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre)
|
||||
+endif
|
||||
INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum
|
||||
cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/.
|
||||
INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \
|
||||
$(R1Tdir)/res/$(pre)R2K.sum
|
||||
cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/.
|
||||
+ifndef ATL_NOTUNE
|
||||
cd $(R1Tdir) ; $(MAKE) $(pre)nxtune
|
||||
+else
|
||||
+ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib
|
||||
+endif
|
||||
|
||||
$(MVTdir)/res/$(pre)MVNK.sum : force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall
|
||||
+else
|
||||
cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre)
|
||||
+endif
|
||||
INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum
|
||||
cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/.
|
||||
$(MVTdir)/res/$(pre)MVTK.sum : force_build
|
||||
+ifdef ATL_NOTUNE
|
||||
+ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall
|
||||
+else
|
||||
cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre)
|
||||
+endif
|
||||
INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum
|
||||
cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/.
|
||||
|
||||
diff --git a/makes/Make.l3tune b/makes/Make.l3tune
|
||||
index eaf7d7d..cd7f5f1 100644
|
||||
--- a/makes/Make.l3tune
|
||||
+++ b/makes/Make.l3tune
|
||||
@@ -118,6 +118,7 @@ res/atlas_strsmXover.h :
|
||||
cp $(strsmXover) res/.
|
||||
|
||||
stsmfc :
|
||||
+ifndef ATL_NOTUNE
|
||||
rm -f $(strsmXover)
|
||||
cd $(L3Bdir) ; $(MAKE) slib
|
||||
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \
|
||||
@@ -128,6 +129,7 @@ stsmfc :
|
||||
tran=NoTranspose_ diag=$(diag)
|
||||
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \
|
||||
tran=Transpose_ diag=$(diag)
|
||||
+endif
|
||||
cd $(L3Bdir) ; $(MAKE) slib
|
||||
dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h
|
||||
|
||||
@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h :
|
||||
cp $(dtrsmXover) res/.
|
||||
|
||||
dtsmfc :
|
||||
+ifndef ATL_NOTUNE
|
||||
rm -f $(dtrsmXover)
|
||||
cd $(L3Bdir) ; $(MAKE) dlib
|
||||
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \
|
||||
@@ -148,6 +151,7 @@ dtsmfc :
|
||||
tran=NoTranspose_ diag=$(diag)
|
||||
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \
|
||||
tran=Transpose_ diag=$(diag)
|
||||
+endif
|
||||
cd $(L3Bdir) ; $(MAKE) dlib
|
||||
qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h
|
||||
|
||||
@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h :
|
||||
cp $(qtrsmXover) res/.
|
||||
|
||||
qtsmfc :
|
||||
+ifndef ATL_NOTUNE
|
||||
rm -f $(qtrsmXover)
|
||||
cd $(L3Bdir) ; $(MAKE) qlib
|
||||
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \
|
||||
@@ -168,6 +173,7 @@ qtsmfc :
|
||||
tran=NoTranspose_ diag=$(diag)
|
||||
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \
|
||||
tran=Transpose_ diag=$(diag)
|
||||
+endif
|
||||
cd $(L3Bdir) ; $(MAKE) qlib
|
||||
|
||||
$(pre)tsmfc.o : force_build
|
||||
diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo
|
||||
index 2b7dfdc..8e5dab2 100644
|
||||
--- a/makes/Make.sysinfo
|
||||
+++ b/makes/Make.sysinfo
|
||||
@@ -5,6 +5,7 @@ maxlat=6
|
||||
mflop=200
|
||||
flags=
|
||||
|
||||
+ifndef ATL_NOTUNE
|
||||
sTestFlags : force_build
|
||||
$(MAKE) srbob `cat res/sBEST` pre='s' type=float
|
||||
|
||||
@@ -85,12 +86,14 @@ RunLamch : xemit_lamch
|
||||
cp res/atlas_?lamch.h $(INCAdir)/.
|
||||
RunTyp: xemit_typ
|
||||
$(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h
|
||||
+endif
|
||||
|
||||
xemit_buildinfo : emit_buildinfo.o
|
||||
$(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o
|
||||
xsyssum : GetSysSum.o
|
||||
$(XCC) $(XCCFLAGS) -o $@ GetSysSum.o
|
||||
|
||||
+ifndef ATL_NOTUNE
|
||||
xL1 : time.o L1CacheSize.o
|
||||
$(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o
|
||||
|
||||
@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c
|
||||
$(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c
|
||||
xmasrch : $(mySRCdir)/masrch.c
|
||||
$(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c
|
||||
+endif
|
||||
|
||||
ATL_cputime.c :
|
||||
cp $(mySRCdir)/ATL_cputime.c .
|
||||
@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c
|
||||
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c
|
||||
GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c
|
||||
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c
|
||||
+
|
||||
+ifndef ATL_NOTUNE
|
||||
time.o : $(mySRCdir)/time.c
|
||||
$(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c
|
||||
emit_lamch.o : $(mySRCdir)/emit_lamch.c
|
||||
@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c
|
||||
$(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c
|
||||
tlb.o : $(mySRCdir)/tlb.c
|
||||
$(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c
|
||||
-
|
||||
+endif
|
||||
|
||||
|
||||
force_build :
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Thu, 26 Mar 2020 17:14:49 +0100
|
||||
Subject: [PATCH 8/8] Add IBM z15 support
|
||||
|
||||
Add support for specifying "IBMz15" as target architecture.
|
||||
---
|
||||
CONFIG/include/atlconf.h | 8 ++++----
|
||||
CONFIG/src/atlcomp.txt | 4 ++++
|
||||
CONFIG/src/backend/archinfo_linux.c | 1 +
|
||||
CONFIG/src/probe_comp.c | 1 +
|
||||
include/atlas_prefetch.h | 2 +-
|
||||
5 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
|
||||
index 3828fdb..382601f 100644
|
||||
--- a/CONFIG/include/atlconf.h
|
||||
+++ b/CONFIG/include/atlconf.h
|
||||
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||
* Corei3EP: v3 Haswell, E5-26XX
|
||||
* Corei4: skylake
|
||||
*/
|
||||
-#define NMACH 63
|
||||
+#define NMACH 64
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
|
||||
"POWER6", "POWER7", "POWER8", "POWERe6500",
|
||||
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
|
||||
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15",
|
||||
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
|
||||
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
|
||||
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
|
||||
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
|
||||
"ARM64xgene1", "ARM64a53", "ARM64a57"};
|
||||
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
|
||||
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
|
||||
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15,
|
||||
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
|
||||
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
|
||||
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
|
||||
#define MachIsARM64(mach_) \
|
||||
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
|
||||
#define MachIsS390(mach_) \
|
||||
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
|
||||
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 )
|
||||
|
||||
|
||||
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
|
||||
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
|
||||
index 2cfacc2..acb2c83 100644
|
||||
--- a/CONFIG/src/atlcomp.txt
|
||||
+++ b/CONFIG/src/atlcomp.txt
|
||||
@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
'gcc' '-march=z14 -mtune=z14 -O2'
|
||||
MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
|
||||
'gfortran' '-march=z14 -mtune=z14 -O2'
|
||||
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
|
||||
+ 'gcc' '-march=arch13 -mtune=arch13 -O2'
|
||||
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77
|
||||
+ 'gfortran' '-march=arch13 -mtune=arch13 -O2'
|
||||
#
|
||||
# Windows defaults ; need to make SSE/SSE2 arch dep.
|
||||
#
|
||||
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
|
||||
index ed6f476..934a005 100644
|
||||
--- a/CONFIG/src/backend/archinfo_linux.c
|
||||
+++ b/CONFIG/src/backend/archinfo_linux.c
|
||||
@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch()
|
||||
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
|
||||
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
|
||||
else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
|
||||
+ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15;
|
||||
else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
|
||||
free(res);
|
||||
}
|
||||
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
|
||||
index 857ea82..88bb25e 100644
|
||||
--- a/CONFIG/src/probe_comp.c
|
||||
+++ b/CONFIG/src/probe_comp.c
|
||||
@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
|
||||
case IbmZ12:
|
||||
case IbmZ13:
|
||||
case IbmZ14:
|
||||
+ case IbmZ15:
|
||||
case IntCorei3:
|
||||
case IntCorei4:
|
||||
case IntCorei2:
|
||||
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
|
||||
index fa426ac..583f19d 100644
|
||||
--- a/include/atlas_prefetch.h
|
||||
+++ b/include/atlas_prefetch.h
|
||||
@@ -156,7 +156,7 @@
|
||||
#define ATL_L2LS 64
|
||||
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
|
||||
defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
|
||||
- defined(ATL_ARCH_IbmZ14)
|
||||
+ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15)
|
||||
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
|
||||
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
|
||||
#define ATL_GOT_L1PREFETCH
|
||||
--
|
||||
2.23.0
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
Author: Mark Salter <msalter@redhat.com>
|
||||
|
||||
Index: ATLAS/CONFIG/include/atlconf.h
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/include/atlconf.h
|
||||
+++ ATLAS/CONFIG/include/atlconf.h
|
||||
@@ -16,9 +16,9 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS
|
||||
((OS_) == OSWin64) )
|
||||
|
||||
enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||
- AFARM, AFS390};
|
||||
+ AFARM, AFS390, AFAARCH64};
|
||||
|
||||
-#define NMACH 52
|
||||
+#define NMACH 53
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5",
|
||||
"POWER6", "POWER7", "POWERe6500", "IBMz9", "IBMz10", "IBMz196",
|
||||
@@ -29,7 +29,7 @@ static char *machnam[NMACH] =
|
||||
"Efficeon", "K7", "HAMMER", "AMD64K10h", "AMDLLANO", "AMDDOZER","AMDDRIVER",
|
||||
"UNKNOWNx86", "IA64Itan", "IA64Itan2",
|
||||
"USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS",
|
||||
- "MIPSR1xK", "MIPSICE9", "ARMv7"};
|
||||
+ "MIPSR1xK", "MIPSICE9", "ARMv7", "AARCH64"};
|
||||
enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5,
|
||||
IbmPwr6, IbmPwr7, Pwre6500,
|
||||
IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */
|
||||
@@ -42,7 +42,8 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw
|
||||
SunUSI, SunUSII, SunUSIII, SunUSIV, SunUST1, SunUST2, SunUSX,
|
||||
MIPSR1xK, /* includes R10K, R12K, R14K, R16K */
|
||||
MIPSICE9, /* SiCortex ICE9 -- like MIPS5K */
|
||||
- ARMv7 /* includes Cortex A8, A9 */
|
||||
+ ARMv7, /* includes Cortex A8, A9 */
|
||||
+ AARCH64
|
||||
};
|
||||
#define MachIsX86(mach_) \
|
||||
( (mach_) >= x86x87 && (mach_) <= x86X )
|
||||
@@ -63,6 +64,8 @@ enum MACHTYPE {MACHOther, IbmPwr3, IbmPw
|
||||
( (mach_) == ARMv7 )
|
||||
#define MachIsS390(mach_) \
|
||||
( (mach_) >= IbmZ9 && (mach_) <= IbmZ196 )
|
||||
+#define MachIsAARCH64(mach_) \
|
||||
+ ( (mach_) == AARCH64 )
|
||||
|
||||
|
||||
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
|
||||
@@ -84,13 +87,13 @@ enum ISAEXT
|
||||
{ISA_None=0, ISA_VSX, ISA_AV, ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX,
|
||||
ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow, ISA_NEON};
|
||||
|
||||
-#define NASMD 9
|
||||
+#define NASMD 10
|
||||
enum ASMDIA
|
||||
{ASM_None=0, gas_x86_32, gas_x86_64, gas_sparc, gas_ppc, gas_parisc,
|
||||
- gas_mips, gas_arm, gas_s390};
|
||||
+ gas_mips, gas_arm, gas_s390, gas_aarch64};
|
||||
static char *ASMNAM[NASMD] =
|
||||
{"", "GAS_x8632", "GAS_x8664", "GAS_SPARC", "GAS_PPC", "GAS_PARISC",
|
||||
- "GAS_MIPS", "GAS_ARM", "GAS_S390"};
|
||||
+ "GAS_MIPS", "GAS_ARM", "GAS_S390", "GAS_AARCH64"};
|
||||
|
||||
/*
|
||||
* Used for archinfo probes (can pack in bitfield)
|
||||
Index: ATLAS/CONFIG/src/Makefile
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/Makefile
|
||||
+++ ATLAS/CONFIG/src/Makefile
|
||||
@@ -260,6 +260,11 @@ IRun_BINDP :
|
||||
redir=config0.out
|
||||
- cat config0.out
|
||||
|
||||
+IRun_GAS_AARCH64 :
|
||||
+ $(CC) $(CCFLAGS) -o xprobe_gas_aarch64 $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_aarch64.S
|
||||
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_gas_aarch64 args="$(args)" \
|
||||
+ redir=config0.out
|
||||
+ - cat config0.out
|
||||
IRun_GAS_S390 :
|
||||
$(CC) $(CCFLAGS) -o xprobe_gas_s390 $(SRCdir)/backend/probe_this_asm.c $(SRCdir)/backend/probe_gas_s390.S
|
||||
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_gas_s390 args="$(args)" \
|
||||
Index: ATLAS/CONFIG/src/SpewMakeInc.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/SpewMakeInc.c
|
||||
+++ ATLAS/CONFIG/src/SpewMakeInc.c
|
||||
@@ -391,6 +391,8 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu
|
||||
|
||||
if (MachIsIA64(arch))
|
||||
return(sp);
|
||||
+ if (MachIsAARCH64(arch))
|
||||
+ return(sp);
|
||||
if (MachIsMIPS(arch))
|
||||
return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32");
|
||||
if (MachIsS390(arch))
|
||||
Index: ATLAS/CONFIG/src/atlcomp.txt
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/atlcomp.txt
|
||||
+++ ATLAS/CONFIG/src/atlcomp.txt
|
||||
@@ -267,6 +267,17 @@ MACH=ARMv7 OS=ALL LVL=1000 COMPS=dmc,dkc
|
||||
MACH=ARMv7 OS=ALL LVL=1000 COMPS=f77
|
||||
'gfortran' '-mcpu=cortex-a8 -mfpu=vfpv3 -mfloat-abi=softfp -O'
|
||||
#
|
||||
+# AArch64 defaults
|
||||
+#
|
||||
+MACH=AARCH64 OS=ALL LVL=1000 COMPS=xcc
|
||||
+ 'gcc' '-O2'
|
||||
+MACH=AARCH64 OS=ALL LVL=1000 COMPS=smc,skc,gcc,icc
|
||||
+ 'gcc' '-O2'
|
||||
+MACH=AARCH64 OS=ALL LVL=1000 COMPS=dmc,dkc
|
||||
+ 'gcc' '-O2'
|
||||
+MACH=AARCH64 OS=ALL LVL=1000 COMPS=f77
|
||||
+ 'gfortran' '-O'
|
||||
+#
|
||||
# Generic defaults
|
||||
#
|
||||
MACH=ALL OS=ALL LVL=5 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||
Index: ATLAS/CONFIG/src/atlconf_misc.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/atlconf_misc.c
|
||||
+++ ATLAS/CONFIG/src/atlconf_misc.c
|
||||
@@ -563,6 +563,7 @@ enum ARCHFAM ProbeArchFam(char *targ)
|
||||
else if (strstr(res, "ia64")) fam = AFIA64;
|
||||
else if (strstr(res, "mips")) fam = AFMIPS;
|
||||
else if (strstr(res, "arm")) fam = AFARM;
|
||||
+ else if (strstr(res, "aarch64")) fam = AFAARCH64;
|
||||
else if (strstr(res, "s390")) fam = AFS390;
|
||||
else if ( strstr(res, "i686") || strstr(res, "i586") ||
|
||||
strstr(res, "i486") || strstr(res, "i386") ||
|
||||
@@ -588,6 +589,7 @@ enum ARCHFAM ProbeArchFam(char *targ)
|
||||
strstr(res, "x86_64") ) fam = AFX86;
|
||||
else if (strstr(res, "mips")) fam = AFMIPS;
|
||||
else if (strstr(res, "arm")) fam = AFARM;
|
||||
+ else if (strstr(res, "aarch64")) fam = AFAARCH64;
|
||||
else if (strstr(res, "s390")) fam = AFS390;
|
||||
free(res);
|
||||
}
|
||||
Index: ATLAS/CONFIG/src/backend/Make.ext
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/backend/Make.ext
|
||||
+++ ATLAS/CONFIG/src/backend/Make.ext
|
||||
@@ -57,6 +57,8 @@ probe_gas_arm.S : $(basf)
|
||||
$(extC) -b $(basf) -o probe_gas_arm.S rout=probe_gas_arm.S
|
||||
probe_gas_s390.S : $(basf)
|
||||
$(extC) -b $(basf) -o probe_gas_s390.S rout=probe_gas_s390.S
|
||||
+probe_gas_aarch64.S : $(basf)
|
||||
+ $(extC) -b $(basf) -o probe_gas_aarch64.S rout=probe_gas_aarch64.S
|
||||
probe_AVXMAC.S : $(basf)
|
||||
$(extC) -b $(basf) -o probe_AVXMAC.S rout=probe_AVXMAC.S
|
||||
probe_AVXFMA4.S : $(basf)
|
||||
Index: ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c
|
||||
+++ ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||
@@ -267,6 +267,14 @@ enum MACHTYPE ProbeArch()
|
||||
free(res);
|
||||
}
|
||||
break;
|
||||
+ case AFAARCH64:
|
||||
+ res = atlsys_1L(NULL, "fgrep 'Processor' /proc/cpuinfo", 0, 0);
|
||||
+ if (res)
|
||||
+ {
|
||||
+ if (strstr(res, "AArch64")) mach = AARCH64;
|
||||
+ free(res);
|
||||
+ }
|
||||
+ break;
|
||||
default:
|
||||
#if 0
|
||||
if (!CmndOneLine(NULL, "fgrep 'cpu family' /proc/cpuinfo", res))
|
||||
Index: ATLAS/CONFIG/src/backend/probe_gas_aarch64.S
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ ATLAS/CONFIG/src/backend/probe_gas_aarch64.S
|
||||
@@ -0,0 +1,14 @@
|
||||
+#define ATL_GAS_AARCH64
|
||||
+#include "atlas_asm.h"
|
||||
+#
|
||||
+# Linux AArch64 assembler for:
|
||||
+# int asm_probe(int i)
|
||||
+# RETURNS: i*3
|
||||
+#
|
||||
+.text
|
||||
+.globl ATL_asmdecor(asm_probe)
|
||||
+.type ATL_asmdecor(asm_probe), %function
|
||||
+ATL_asmdecor(asm_probe):
|
||||
+ add w0, w0, w0, LSL #1
|
||||
+ ret
|
||||
+.size ATL_asmdecor(asm_probe),.-ATL_asmdecor(asm_probe)
|
||||
Index: ATLAS/CONFIG/src/probe_comp.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/probe_comp.c
|
||||
+++ ATLAS/CONFIG/src/probe_comp.c
|
||||
@@ -582,7 +582,7 @@ char *GetPtrbitsFlag(enum OSTYPE OS, enu
|
||||
char *sp = "";
|
||||
int i, j, k;
|
||||
|
||||
- if (MachIsIA64(arch))
|
||||
+ if (MachIsIA64(arch) || MachIsAARCH64(arch))
|
||||
return(sp);
|
||||
if (MachIsMIPS(arch))
|
||||
return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32");
|
||||
Index: ATLAS/include/atlas_genparse.h
|
||||
===================================================================
|
||||
--- ATLAS.orig/include/atlas_genparse.h
|
||||
+++ ATLAS/include/atlas_genparse.h
|
||||
@@ -6,13 +6,13 @@
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
-#define NASMD 9
|
||||
+#define NASMD 10
|
||||
enum ASMDIA
|
||||
{ASM_None=0, gas_x86_32, gas_x86_64, gas_sparc, gas_ppc, gas_parisc,
|
||||
- gas_mips, gas_arm, gas_s390};
|
||||
+ gas_mips, gas_arm, gas_s390, gas_aarch64};
|
||||
static char *ASMNAM[NASMD] =
|
||||
{"", "GAS_x8632", "GAS_x8664", "GAS_SPARC", "GAS_PPC", "GAS_PARISC",
|
||||
- "GAS_MIPS", "GAS_ARM", "GAS_S390"};
|
||||
+ "GAS_MIPS", "GAS_ARM", "GAS_S390", "GAS_AARCH64"};
|
||||
/*
|
||||
* Basic data structure for forming queues with some minimal info
|
||||
*/
|
|
@ -1,17 +0,0 @@
|
|||
diff -up wrk/src/threads/ATL_thread_start.c.wrk wrk/src/threads/ATL_thread_start.c
|
||||
--- wrk/src/threads/ATL_thread_start.c.wrk 2013-09-23 13:46:51.881085276 +0200
|
||||
+++ wrk/src/threads/ATL_thread_start.c 2013-09-24 16:13:59.021065418 +0200
|
||||
@@ -101,9 +101,10 @@ int ATL_thread_start(ATL_thread_t *thr,
|
||||
ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED));
|
||||
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* no chk, OK to fail */
|
||||
#ifdef ATL_PAFF_SETAFFNP
|
||||
- CPU_ZERO(&cpuset);
|
||||
- CPU_SET(affID, &cpuset);
|
||||
- ATL_assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset));
|
||||
+ //affinity crashes a machine with fewer processors than the builder
|
||||
+ //CPU_ZERO(&cpuset);
|
||||
+ //CPU_SET(affID, &cpuset);
|
||||
+ //ATL_assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset));
|
||||
#elif defined(ATL_PAFF_SETPROCNP)
|
||||
ATL_assert(!pthread_attr_setprocessor_np(&attr, (pthread_spu_t)affID,
|
||||
PTHREAD_BIND_FORCED_NP));
|
|
@ -1,50 +0,0 @@
|
|||
--- CONFIG/include/atlconf.h 2011-05-14 13:33:24.000000000 -0400
|
||||
+++ CONFIG/include/atlconf.h.new 2011-08-30 14:25:41.427136391 -0400
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS};
|
||||
|
||||
-#define NMACH 37
|
||||
+#define NMACH 38
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5",
|
||||
"POWER6", "POWER7",
|
||||
@@ -25,7 +25,7 @@
|
||||
"Efficeon", "K7", "HAMMER", "AMD64K10h", "UNKNOWNx86",
|
||||
"IA64Itan", "IA64Itan2",
|
||||
"USI", "USII", "USIII", "USIV", "UST2", "UnknownUS",
|
||||
- "MIPSR1xK", "MIPSICE9"};
|
||||
+ "MIPSR1xK", "MIPSICE9", "ARM"};
|
||||
enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5,
|
||||
IbmPwr6, IbmPwr7,
|
||||
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||
@@ -34,7 +34,8 @@
|
||||
IA64Itan, IA64Itan2,
|
||||
SunUSI, SunUSII, SunUSIII, SunUSIV, SunUST2, SunUSX,
|
||||
MIPSR1xK, /* includes R10K, R12K, R14K, R16K */
|
||||
- MIPSICE9 /* SiCortex ICE9 -- like MIPS5K */
|
||||
+ MIPSICE9, /* SiCortex ICE9 -- like MIPS5K */
|
||||
+ ARM
|
||||
};
|
||||
#define MachIsX86(mach_) \
|
||||
( (mach_) >= IntP5 && (mach_) <= x86X )
|
||||
@@ -51,6 +52,8 @@
|
||||
#endif
|
||||
#define MachIsPPC(mach_) \
|
||||
( (mach_) >= PPCG4 && (mach_) <= PPCG5 )
|
||||
+#define MachIsARM(mach_) \
|
||||
+ ( (mach_) == ARM )
|
||||
|
||||
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
|
||||
static char *f2c_intstr[5] =
|
||||
--- CONFIG/src/probe_comp.c 2011-05-14 13:33:24.000000000 -0400
|
||||
+++ CONFIG/src/probe_comp.c.new 2011-08-30 14:28:31.103015151 -0400
|
||||
@@ -507,6 +507,8 @@
|
||||
|
||||
if (MachIsIA64(arch))
|
||||
return(sp);
|
||||
+ if (MachIsARM(arch))
|
||||
+ return(sp);
|
||||
if (MachIsMIPS(arch))
|
||||
return((ptrbits == 64) ? "-mabi=64" : "-mabi=n32");
|
||||
if (!CompIsGcc(comp))
|
|
@ -1,111 +0,0 @@
|
|||
--- makes/Make.lib 2008-06-06 20:57:46.000000000 -0400
|
||||
+++ makes/Make.lib.new 2008-12-21 15:36:21.000000000 -0500
|
||||
@@ -3,6 +3,8 @@
|
||||
#
|
||||
# override with libatlas.so only when atlas is built to one lib
|
||||
#
|
||||
+so_ver_major=3
|
||||
+so_ver = $(so_ver_major).0
|
||||
DYNlibs = liblapack.so libf77blas.so libcblas.so libatlas.so
|
||||
PTDYNlibs = liblapack.so libptf77blas.so libptcblas.so libatlas.so
|
||||
CDYNlibs = liblapack.so libcblas.so libatlas.so
|
||||
@@ -32,38 +34,78 @@
|
||||
mv $(tarnam).tar.gz $(tarnam).tgz
|
||||
|
||||
ptshared :
|
||||
- - rm -f libatlas.so liblapack.so
|
||||
- $(MAKE) libatlas.so liblapack.so libptf77blas.so libf77blas.so \
|
||||
- libptcblas.so libcblas.so liblapack.so
|
||||
+ - rm -f libatlas.so* liblapack.so* \
|
||||
+ lib*blas.so* libclapack.so*
|
||||
+ $(MAKE) libatlas.so libcblas.so libptf77blas.so libf77blas.so \
|
||||
+ libptcblas.so liblapack.so libclapack.so
|
||||
shared :
|
||||
- - rm -f libatlas.so liblapack.so
|
||||
- $(MAKE) libatlas.so liblapack.so libf77blas.so libcblas.so liblapack.so
|
||||
+ - rm -f libatlas.so liblapack.so \
|
||||
+ libatlas.so.$(so_ver) liblapack.so.$(so_ver)
|
||||
+ $(MAKE) libatlas.so libcblas.so libf77blas.so liblapack.so libclapack.so
|
||||
cptshared :
|
||||
- - rm -f libatlas.so libclapack.so
|
||||
- $(MAKE) libatlas.so libclapack.so libptcblas.so libcblas.so
|
||||
+ - rm -f libatlas.so libclapack.so \
|
||||
+ libatlas.so.$(so_ver) libclapack.so.$(so_ver)
|
||||
+ $(MAKE) libatlas.so libptcblas.so libcblas.so libclapack.so
|
||||
cshared :
|
||||
- - rm -f libatlas.so libclapack.so
|
||||
- $(MAKE) libatlas.so libclapack.so libcblas.so
|
||||
+ - rm -f libatlas.so libclapack.so \
|
||||
+ libatlas.so.$(so_ver) libclapack.so.$(so_ver)
|
||||
+ $(MAKE) libatlas.so libcblas.so libclapack.so
|
||||
|
||||
libatlas.so : libatlas.a
|
||||
- ld $(LDFLAGS) -shared -soname libatlas.so -o libatlas.so \
|
||||
- --whole-archive libatlas.a --no-whole-archive -lc $(LIBS)
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../libatlas.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=libatlas.so.$(so_ver_major) \
|
||||
+ -o libatlas.so.$(so_ver) static_libs/*.o -lc $(LIBS)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./libatlas.so.$(so_ver) libatlas.so.$(so_ver_major)
|
||||
+ ln -s ./libatlas.so.$(so_ver) libatlas.so
|
||||
liblapack.so : liblapack.a
|
||||
- ld $(LDFLAGS) -shared -soname $@ -o $@ --whole-archive \
|
||||
- liblapack.a --no-whole-archive $(F77SYSLIB)
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../liblapack.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=liblapack.so.$(so_ver_major) \
|
||||
+ -o liblapack.so.$(so_ver) static_libs/*.o $(F77SYSLIB) \
|
||||
+ libf77blas.so.$(so_ver_major) libcblas.so.$(so_ver_major)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./liblapack.so.$(so_ver) liblapack.so.$(so_ver_major)
|
||||
+ ln -s ./liblapack.so.$(so_ver) liblapack.so
|
||||
libclapack.so : libclapack.a
|
||||
- ld $(LDFLAGS) -shared -soname liblapack.so -o liblapack.so \
|
||||
- --whole-archive libclapack.a
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../libclapack.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=libclapack.so.$(so_ver_major) \
|
||||
+ -o libclapack.so.$(so_ver) static_libs/*.o \
|
||||
+ libcblas.so.$(so_ver_major) -lc $(LIBS)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./libclapack.so.$(so_ver) libclapack.so.$(so_ver_major)
|
||||
+ ln -s ./libclapack.so.$(so_ver) libclapack.so
|
||||
libptf77blas.so : libptf77blas.a
|
||||
- ld $(LDFLAGS) -shared -soname $@ -o $@ --whole-archive libptf77blas.a \
|
||||
- --no-whole-archive $(F77SYSLIB)
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../libptf77blas.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=libptf77blas.so.$(so_ver_major) \
|
||||
+ -o libptf77blas.so.$(so_ver) static_libs/*.o \
|
||||
+ libatlas.so.$(so_ver) $(F77SYSLIB)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./libptf77blas.so.$(so_ver) libptf77blas.so.$(so_ver_major)
|
||||
+ ln -s ./libptf77blas.so.$(so_ver) libptf77blas.so
|
||||
libf77blas.so : libf77blas.a
|
||||
- ld $(LDFLAGS) -shared -soname $@ -o $@ --whole-archive libf77blas.a \
|
||||
- --no-whole-archive $(F77SYSLIB)
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../libf77blas.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=libf77blas.so.$(so_ver_major) \
|
||||
+ -o libf77blas.so.$(so_ver) static_libs/*.o \
|
||||
+ libatlas.so.$(so_ver) $(F77SYSLIB)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./libf77blas.so.$(so_ver) libf77blas.so.$(so_ver_major)
|
||||
+ ln -s ./libf77blas.so.$(so_ver) libf77blas.so
|
||||
libptcblas.so : libptcblas.a
|
||||
- ld $(LDFLAGS) -shared -soname $@ -o $@ --whole-archive libptcblas.a
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../libptcblas.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=libptcblas.so.$(so_ver_major) \
|
||||
+ -o libptcblas.so.$(so_ver) static_libs/*.o \
|
||||
+ libatlas.so.$(so_ver) -lc $(LIBS)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./libptcblas.so.$(so_ver) libptcblas.so.$(so_ver_major)
|
||||
+ ln -s ./libptcblas.so.$(so_ver) libptcblas.so
|
||||
libcblas.so : libcblas.a
|
||||
- ld $(LDFLAGS) -shared -soname $@ -o $@ --whole-archive libcblas.a
|
||||
+ mkdir static_libs ; cd static_libs ; ar x ../libcblas.a ; cd ../
|
||||
+ gcc $(LDFLAGS) -shared -Wl,-soname=libcblas.so.$(so_ver_major) \
|
||||
+ -o libcblas.so.$(so_ver) static_libs/*.o \
|
||||
+ libatlas.so.$(so_ver) -lc $(LIBS)
|
||||
+ rm -rf static_libs
|
||||
+ ln -s ./libcblas.so.$(so_ver) libcblas.so.$(so_ver_major)
|
||||
+ ln -s ./libcblas.so.$(so_ver) libcblas.so
|
||||
#
|
||||
# Builds one shared lib from all ATLAS files
|
||||
#
|
|
@ -0,0 +1,49 @@
|
|||
From 9a3e640a517926c47b5655ba0033d4f56df4a66e Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Martisko <jamartis@redhat.com>
|
||||
Date: Wed, 22 Jan 2020 14:24:46 +0100
|
||||
Subject: [PATCH] test
|
||||
|
||||
---
|
||||
interfaces/blas/C/testing/c_dblat1.f | 4 ++--
|
||||
interfaces/blas/C/testing/c_sblat1.f | 4 ++--
|
||||
2 files changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/interfaces/blas/C/testing/c_dblat1.f b/interfaces/blas/C/testing/c_dblat1.f
|
||||
index 55ea989..7269601 100644
|
||||
--- a/interfaces/blas/C/testing/c_dblat1.f
|
||||
+++ b/interfaces/blas/C/testing/c_dblat1.f
|
||||
@@ -247,11 +247,11 @@
|
||||
IF (ICASE.EQ.7) THEN
|
||||
* .. DNRM2TEST ..
|
||||
STEMP(1) = DTRUE1(NP1)
|
||||
- CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
+ CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.8) THEN
|
||||
* .. DASUMTEST ..
|
||||
STEMP(1) = DTRUE3(NP1)
|
||||
- CALL STEST1(DASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
+ CALL STEST1(DASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.9) THEN
|
||||
* .. DSCALTEST ..
|
||||
CALL DSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)
|
||||
diff --git a/interfaces/blas/C/testing/c_sblat1.f b/interfaces/blas/C/testing/c_sblat1.f
|
||||
index 1fc6dce..b97ed0b 100644
|
||||
--- a/interfaces/blas/C/testing/c_sblat1.f
|
||||
+++ b/interfaces/blas/C/testing/c_sblat1.f
|
||||
@@ -247,11 +247,11 @@
|
||||
IF (ICASE.EQ.7) THEN
|
||||
* .. SNRM2TEST ..
|
||||
STEMP(1) = DTRUE1(NP1)
|
||||
- CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
+ CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.8) THEN
|
||||
* .. SASUMTEST ..
|
||||
STEMP(1) = DTRUE3(NP1)
|
||||
- CALL STEST1(SASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
+ CALL STEST1(SASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.9) THEN
|
||||
* .. SSCALTEST ..
|
||||
CALL SSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)
|
||||
--
|
||||
2.24.1
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
diff --git a/src/testing/ATL_f77getri.c b/src/testing/ATL_f77getri.c
|
||||
index 2cc576c..7ff8eba 100644
|
||||
--- a/src/testing/ATL_f77getri.c
|
||||
+++ b/src/testing/ATL_f77getri.c
|
||||
@@ -97,7 +97,6 @@ int f77getri(const enum ATLAS_ORDER Order, const int N, TYPE *A, const int lda,
|
||||
#ifdef ATL_FunkyInts
|
||||
*lwork = F77lwork;
|
||||
for (i=0; i < MN; i++) ipiv[i] = F77ipiv[i] + 1;
|
||||
- free(F77ipiv);
|
||||
#else
|
||||
for (i=0; i < MN; i++) ipiv[i]++;
|
||||
#endif
|
|
@ -1,32 +0,0 @@
|
|||
Subject: atlas new archdef for ppc64le
|
||||
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
Date: Sun, 13 Jun 2014 18:02:47 +0200
|
||||
|
||||
Need to define different archdef names
|
||||
for ppc64 (that is Big Endian) and ppc64le (that is Little Endian).
|
||||
This is already done upstream in atlas 3.11.30 with issue
|
||||
https://sourceforge.net/p/math-atlas/patches/66/
|
||||
|
||||
Required at least as long as I need the bypass of
|
||||
atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
|
||||
|
||||
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
---
|
||||
CONFIG/src/SpewMakeInc.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
Index: ATLAS/CONFIG/src/SpewMakeInc.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/SpewMakeInc.c
|
||||
+++ ATLAS/CONFIG/src/SpewMakeInc.c
|
||||
@@ -542,6 +542,10 @@ int main(int nargs, char **args)
|
||||
fprintf(fpout, "# -------------------------------------------------\n");
|
||||
fprintf(fpout, " ARCH = %s", machnam[mach]);
|
||||
fprintf(fpout, "%d", ptrbits);
|
||||
+ /* for ppc64le archi add 'LE' characters */
|
||||
+ #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
+ fprintf(fpout, "%s", "LE");
|
||||
+ #endif
|
||||
if (ISAX)
|
||||
fprintf(fpout, "%s", ISAXNAM[ISAX]);
|
||||
if (!USEIEEE)
|
|
@ -1,95 +0,0 @@
|
|||
diff -uNr ATLAS.orig/CONFIG/src/backend/archinfo_linux.c ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||
--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c 2010-02-01 23:28:58.000000000 +0000
|
||||
+++ ATLAS/CONFIG/src/backend/archinfo_linux.c 2010-02-02 22:38:31.000000000 +0000
|
||||
@@ -145,14 +145,12 @@
|
||||
* Add these back if we get machine access and can test
|
||||
*/
|
||||
case AFSPARC: /* don't know here anymore */
|
||||
- #if 0
|
||||
if ( !CmndOneLine(NULL, "fgrep cpu /proc/cpuinfo", res) )
|
||||
{
|
||||
- if (strstr(res, "UltraSparc II")) mach = SunUS2;
|
||||
- else if (strstr(res, "UltraSparc I")) mach = SunUS1;
|
||||
- else if (strstr(res, "UltraSparc")) mach = SunUSX;
|
||||
+ if (strstr(res, "UltraSparc II")) mach = SunUSII;
|
||||
+ else if (strstr(res, "UltraSparc I")) mach = SunUSI;
|
||||
+ else if (strstr(res, "UltraSparc")) mach = SunUSII;
|
||||
}
|
||||
- #endif
|
||||
break;
|
||||
case AFALPHA:
|
||||
#if 0
|
||||
@@ -196,6 +194,11 @@
|
||||
reslns = CmndResults(NULL, "grep '^processor' /proc/cpuinfo");
|
||||
if (reslns) ncpu = fNumLines(reslns);
|
||||
}
|
||||
+ if (__sparc__)
|
||||
+ {
|
||||
+ reslns = CmndResults(NULL, "grep '^CPU.*online' /proc/cpuinfo");
|
||||
+ if (reslns) ncpu = fNumLines(reslns);
|
||||
+ }
|
||||
return(ncpu);
|
||||
}
|
||||
|
||||
diff -uNr ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c 2010-02-01 23:29:23.000000000 +0000
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2_US.c 2010-02-01 23:30:07.000000000 +0000
|
||||
@@ -95,6 +95,11 @@
|
||||
#define incBm %g3
|
||||
#define incBn %g4
|
||||
|
||||
+#if defined(__sparc__) && defined(__arch64__)
|
||||
+ .register %g2, #scratch
|
||||
+ .register %g3, #scratch
|
||||
+#endif
|
||||
+
|
||||
#ifdef DCPLX
|
||||
#define incCm 64
|
||||
#define CSH 4
|
||||
diff -uNr ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c 2010-02-01 23:29:24.000000000 +0000
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x8_US.c 2010-02-01 23:30:08.000000000 +0000
|
||||
@@ -135,6 +135,11 @@
|
||||
#define pfB %i2 /* aliased with ldab */
|
||||
#endif
|
||||
|
||||
+#if defined(__sparc__) && defined(__arch64__)
|
||||
+ .register %g2, #scratch
|
||||
+ .register %g3, #scratch
|
||||
+#endif
|
||||
+
|
||||
#ifdef DCPLX
|
||||
#define CMUL(arg_) ((arg_)*2)
|
||||
#define incCm 64
|
||||
diff -uNr ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c 2010-02-01 23:29:25.000000000 +0000
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x2_US.c 2010-02-01 23:30:09.000000000 +0000
|
||||
@@ -93,6 +93,12 @@
|
||||
#define rC23 %f30
|
||||
#define rC33 %f31
|
||||
#define FSIZE 64
|
||||
+
|
||||
+#if defined(__sparc__) && defined(__arch64__)
|
||||
+ .register %g2, #scratch
|
||||
+ .register %g3, #scratch
|
||||
+#endif
|
||||
+
|
||||
#ifdef SCPLX
|
||||
#define CSH 3
|
||||
#define CMUL(arg_) ((arg_)*2)
|
||||
diff -uNr ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c 2010-02-01 23:29:25.000000000 +0000
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x72_US.c 2010-02-01 23:30:09.000000000 +0000
|
||||
@@ -108,6 +108,12 @@
|
||||
#define rC23 %f30
|
||||
#define rC33 %f31
|
||||
#define FSIZE 64
|
||||
+
|
||||
+#if defined(__sparc__) && defined(__arch64__)
|
||||
+ .register %g2, #scratch
|
||||
+ .register %g3, #scratch
|
||||
+#endif
|
||||
+
|
||||
#ifdef SCPLX
|
||||
#define CSH 3
|
||||
#define CMUL(arg_) ((arg_)*2)
|
|
@ -1,131 +0,0 @@
|
|||
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
Subject: atlas.3.10.2 add power8 cpu
|
||||
Date: Thu, 18 Sep 2014 15:13:24 +0200
|
||||
|
||||
atlas.3.10.2 add Power8 cpu
|
||||
tracked upstream by issue 67
|
||||
https://sourceforge.net/p/math-atlas/patches/67/
|
||||
|
||||
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
---
|
||||
CONFIG/ARCHS/Make.ext | 7 +++++++
|
||||
CONFIG/include/atlconf.h | 6 +++---
|
||||
CONFIG/src/atlcomp.txt | 6 ++++++
|
||||
CONFIG/src/backend/archinfo_aix.c | 2 ++
|
||||
CONFIG/src/backend/archinfo_linux.c | 1 +
|
||||
include/atlas_pca.h | 2 +-
|
||||
6 files changed, 20 insertions(+), 4 deletions(-)
|
||||
|
||||
Index: ATLAS/CONFIG/ARCHS/Make.ext
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/ARCHS/Make.ext
|
||||
+++ ATLAS/CONFIG/ARCHS/Make.ext
|
||||
@@ -33,6 +33,7 @@ files = AMD64K10h32SSE3.tar.bz2 AMD64K10
|
||||
MIPSR1xK64.tar.bz2 Makefile P432SSE2.tar.bz2 P4E32SSE3.tar.bz2 \
|
||||
P4E64SSE3.tar.bz2 PIII32SSE1.tar.bz2 POWER432.tar.bz2 \
|
||||
POWER464.tar.bz2 POWER564.tar.bz2 POWER764VSX.tar.bz2 \
|
||||
+ POWER864VSX.tar.bz2 \
|
||||
PPCG432AltiVec.tar.bz2 PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 \
|
||||
PPRO32.tar.bz2 USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 \
|
||||
USIV64.tar.bz2 UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \
|
||||
@@ -308,6 +309,12 @@ POWER764VSX.tar.bz2 : $(basdr)/POWER764V
|
||||
/tmp/POWER764VSX.tar POWER764VSX
|
||||
bzip2 /tmp/POWER764VSX.tar
|
||||
mv /tmp/POWER764VSX.tar.bz2 ./.
|
||||
+POWER864VSX.tar.bz2 : $(basdr)/POWER864VSX
|
||||
+ - rm -f /tmp/POWER864VSX.tar /tmp/POWER864VSX.tar.bz2
|
||||
+ cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
|
||||
+ /tmp/POWER864VSX.tar POWER864VSX
|
||||
+ bzip2 /tmp/POWER864VSX.tar
|
||||
+ mv /tmp/POWER864VSX.tar.bz2 ./.
|
||||
IBMz1032.tar.bz2 : $(basdr)/IBMz1032
|
||||
- rm -f /tmp/IBMz1032.tar /tmp/IBMz1032.tar.bz2
|
||||
cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
|
||||
Index: ATLAS/CONFIG/include/atlconf.h
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/include/atlconf.h
|
||||
+++ ATLAS/CONFIG/include/atlconf.h
|
||||
@@ -18,10 +18,10 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS
|
||||
enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
|
||||
AFARM, AFS390};
|
||||
|
||||
-#define NMACH 52
|
||||
+#define NMACH 53
|
||||
static char *machnam[NMACH] =
|
||||
{"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5",
|
||||
- "POWER6", "POWER7", "POWERe6500", "IBMz9", "IBMz10", "IBMz196",
|
||||
+ "POWER6", "POWER7", "POWER8", "POWERe6500", "IBMz9", "IBMz10", "IBMz196",
|
||||
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
|
||||
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
|
||||
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
|
||||
@@ -31,7 +31,7 @@ static char *machnam[NMACH] =
|
||||
"USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS",
|
||||
"MIPSR1xK", "MIPSICE9", "ARMv7"};
|
||||
enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5,
|
||||
- IbmPwr6, IbmPwr7, Pwre6500,
|
||||
+ IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
|
||||
IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */
|
||||
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
|
||||
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
|
||||
Index: ATLAS/CONFIG/src/atlcomp.txt
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/atlcomp.txt
|
||||
+++ ATLAS/CONFIG/src/atlcomp.txt
|
||||
@@ -190,6 +190,10 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
|
||||
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2'
|
||||
MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
|
||||
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
|
||||
+MACH=POWER8 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||
+ 'gcc' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops'
|
||||
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
|
||||
+ 'gfortran' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops'
|
||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
|
||||
'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
|
||||
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
|
||||
@@ -210,6 +214,8 @@ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dm
|
||||
'gcc' '-mcpu=power4 -mtune=power4 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
|
||||
MACH=POWER4 OS=ALL LVL=1010 COMPS=f77
|
||||
'xlf' '-qtune=pwr4 -qarch=pwr4 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
||||
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
|
||||
+ 'xlf' '-qtune=pwr8 -qarch=pwr8 -O3 -qmaxmem=-1 -qfloat=hsflt'
|
||||
#
|
||||
# IBM System z or zEnterprise.
|
||||
# These compiler flags given by IBM; -O3 -funroll-loops are chosen because
|
||||
Index: ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c
|
||||
+++ ATLAS/CONFIG/src/backend/archinfo_linux.c
|
||||
@@ -77,6 +77,7 @@ enum MACHTYPE ProbeArch()
|
||||
else if (strstr(res, "7455")) mach = PPCG4;
|
||||
else if (strstr(res, "PPC970FX")) mach = PPCG5;
|
||||
else if (strstr(res, "PPC970MP")) mach = PPCG5;
|
||||
+ else if (strstr(res, "POWER8")) mach = IbmPwr8;
|
||||
else if (strstr(res, "POWER7")) mach = IbmPwr7;
|
||||
else if (strstr(res, "POWER6")) mach = IbmPwr6;
|
||||
else if (strstr(res, "POWER5")) mach = IbmPwr5;
|
||||
Index: ATLAS/include/atlas_pca.h
|
||||
===================================================================
|
||||
--- ATLAS.orig/include/atlas_pca.h
|
||||
+++ ATLAS/include/atlas_pca.h
|
||||
@@ -26,7 +26,7 @@
|
||||
#endif
|
||||
#elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
|
||||
defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
|
||||
- defined(ATL_ARCH_POWER7)
|
||||
+ defined(ATL_ARCH_POWER7) || defined(ATL_ARCH_POWER8)
|
||||
#ifdef __GNUC__
|
||||
#define ATL_membarrier __asm__ __volatile__ ("dcs")
|
||||
/* #define ATL_USEPCA 1 */
|
||||
Index: ATLAS/CONFIG/src/backend/archinfo_aix.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/CONFIG/src/backend/archinfo_aix.c
|
||||
+++ ATLAS/CONFIG/src/backend/archinfo_aix.c
|
||||
@@ -67,6 +67,8 @@ enum MACHTYPE ProbeArch()
|
||||
{
|
||||
if (strstr(res, "PowerPC_POWER5"))
|
||||
mach = IbmPwr5;
|
||||
+ else if (strstr(res, "PowerPC_POWER8"))
|
||||
+ mach = IbmPwr8;
|
||||
else if (strstr(res, "PowerPC_POWER7"))
|
||||
mach = IbmPwr7;
|
||||
else if (strstr(res, "PowerPC_POWER6"))
|
|
@ -1,220 +0,0 @@
|
|||
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
Subject: atlas.3.10.2 ppc64le abiv2 patch
|
||||
Date: Mon, 28 Jul 2014 04:29:05 -0400
|
||||
|
||||
atlas.3.10.2 abiv2 step2 complete the changes already present in atlas 3.10.2
|
||||
* still some files with opd ABI V1 to be disabled for ABI V2
|
||||
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||
|
||||
atlas.3.10.2 ppc64le abiv2 step3
|
||||
* change offsets of parameters read from stack to avoid some segfaults.
|
||||
(values changes 120 => 104 and 128 => 112 identified by gdb investigation)
|
||||
|
||||
Despite this step3 patch there are two Remaining problems for ppc64le archi:
|
||||
* TODO: still have seg-faults in console during build/check
|
||||
but is not critical (without make check) and rpm are generated on fedora.
|
||||
unable to investigate because of problem tracked by issue 950
|
||||
https://sourceforge.net/p/math-atlas/support-requests/950/
|
||||
|
||||
* TODO: make check failure because xsslvtst execution failure
|
||||
related to vector assembly code that assumes big-endian env
|
||||
as written in ATL_cmm4x4x128_av.c and ATL_smm4x4x128_av.c.
|
||||
Would need significant work to support little-endian as per
|
||||
endianess comments of all PowerPC vector instructions in:
|
||||
https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D/$file/vector_simd_pem.ppc.2005AUG23.pdf
|
||||
|
||||
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
---
|
||||
tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 7 +++++++
|
||||
tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 7 +++++++
|
||||
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 9 ++++++++-
|
||||
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 20 ++++++++++++++++++--
|
||||
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 23 ++++++++++++++++++++++-
|
||||
5 files changed, 62 insertions(+), 4 deletions(-)
|
||||
|
||||
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
||||
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
|
||||
.globl Mjoin(_,ATL_USERMM)
|
||||
Mjoin(_,ATL_USERMM):
|
||||
#else
|
||||
- #if defined(ATL_USE64BITS)
|
||||
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||
/*
|
||||
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||
*/
|
||||
@@ -324,8 +324,15 @@ ATL_USERMM:
|
||||
#endif
|
||||
|
||||
#ifdef ATL_USE64BITS
|
||||
+#if _CALL_ELF == 2
|
||||
+/* ABIv2 */
|
||||
+ ld pC0, 104(r1)
|
||||
+ ld incCn, 112(r1)
|
||||
+#else
|
||||
+/* ABIv1 */
|
||||
ld pC0, 120(r1)
|
||||
ld incCn, 128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
||||
lwz pC0, 68(r1)
|
||||
lwz incCn, 72(r1)
|
||||
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
||||
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
|
||||
const TYPE beta, TYPE *C, const int ldc)
|
||||
(r10) 8(r1)
|
||||
*******************************************************************************
|
||||
-64 bit ABIs:
|
||||
+64 bit ABIv1s:
|
||||
r3 r4 r5 r6/f1
|
||||
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
||||
r7 r8 r9 r10
|
||||
const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
||||
f2 120(r1) 128(r1)
|
||||
const TYPE beta, TYPE *C, const int ldc)
|
||||
+
|
||||
+64 bit ABIv2s:
|
||||
+ r3 r4 r5 r6/f1
|
||||
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
||||
+ r7 r8 r9 r10
|
||||
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
||||
+ f2 104(r1) 112(r1)
|
||||
+ const TYPE beta, TYPE *C, const int ldc)
|
||||
#endif
|
||||
#ifdef ATL_AS_AIX_PPC
|
||||
.csect .text[PR]
|
||||
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
|
||||
.globl Mjoin(_,ATL_USERMM)
|
||||
Mjoin(_,ATL_USERMM):
|
||||
#else
|
||||
- #if defined(ATL_USE64BITS)
|
||||
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||
/*
|
||||
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||
*/
|
||||
@@ -257,9 +265,17 @@ ATL_USERMM:
|
||||
#endif
|
||||
#endif
|
||||
|
||||
+
|
||||
#if defined (ATL_USE64BITS)
|
||||
+#if _CALL_ELF == 2
|
||||
+/* ABIv2 */
|
||||
+ ld pC0, 104(r1)
|
||||
+ ld incCn, 112(r1)
|
||||
+#else
|
||||
+/* ABIv1 */
|
||||
ld pC0, 120(r1)
|
||||
ld incCn, 128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
||||
lwz pC0, 68(r1)
|
||||
lwz incCn, 72(r1)
|
||||
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
||||
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
|
||||
.globl Mjoin(_,ATL_USERMM)
|
||||
Mjoin(_,ATL_USERMM):
|
||||
#else
|
||||
- #if defined(ATL_USE64BITS)
|
||||
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
||||
/*
|
||||
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
||||
*/
|
||||
@@ -221,8 +221,15 @@ ATL_USERMM:
|
||||
* kernel instead
|
||||
*/
|
||||
#if defined (ATL_USE64BITS)
|
||||
+#if _CALL_ELF == 2
|
||||
+/* ABIv2 */
|
||||
+ ld r10, 104(r1)
|
||||
+ ld r5, 112(r1)
|
||||
+#else
|
||||
+/* ABIv1 */
|
||||
ld r10, 120(r1)
|
||||
ld r5, 128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC)
|
||||
lwz r10, 60(r1)
|
||||
lwz r5, 64(r1)
|
||||
@@ -285,8 +292,15 @@ ATL_USERMM:
|
||||
eqv r0, r0, r0 /* all 1s */
|
||||
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
||||
#if defined (ATL_USE64BITS)
|
||||
+#if _CALL_ELF == 2
|
||||
+ /* ABIv2 */
|
||||
+ ld pC0, FSIZE+104(r1)
|
||||
+ ld ldc, FSIZE+112(r1)
|
||||
+#else
|
||||
+ /* ABIv1 */
|
||||
ld pC0, FSIZE+120(r1)
|
||||
ld ldc, FSIZE+128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC)
|
||||
lwz pC0, FSIZE+60(r1)
|
||||
lwz ldc, FSIZE+64(r1)
|
||||
@@ -4258,8 +4272,15 @@ UNALIGNED_C:
|
||||
eqv r0, r0, r0 /* all 1s */
|
||||
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
||||
#if defined (ATL_USE64BITS)
|
||||
+#if _CALL_ELF == 2
|
||||
+ /* ABIv2 */
|
||||
+ ld pC0, FSIZE+104(r1)
|
||||
+ ld ldc, FSIZE+112(r1)
|
||||
+#else
|
||||
+ /* ABIv1 */
|
||||
ld pC0, FSIZE+120(r1)
|
||||
ld ldc, FSIZE+128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC)
|
||||
lwz pC0, FSIZE+60(r1)
|
||||
lwz ldc, FSIZE+64(r1)
|
||||
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
||||
@@ -258,8 +258,15 @@ ATL_USERMM:
|
||||
eqv r0, r0, r0 /* all 1s */
|
||||
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
||||
#if defined (ATL_USE64BITS)
|
||||
+#if _CALL_ELF == 2
|
||||
+/* ABIv2 */
|
||||
+ ld pC0, FSIZE+104(r1)
|
||||
+ ld ldc, FSIZE+112(r1)
|
||||
+#else
|
||||
+/* ABIv1 */
|
||||
ld pC0, FSIZE+120(r1)
|
||||
ld ldc, FSIZE+128(r1)
|
||||
+#endif
|
||||
#elif defined(ATL_AS_OSX_PPC)
|
||||
lwz pC0, FSIZE+60(r1)
|
||||
lwz ldc, FSIZE+64(r1)
|
||||
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
||||
@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM):
|
||||
*/
|
||||
#ifdef ATL_GAS_LINUX_PPC
|
||||
#ifdef ATL_USE64BITS
|
||||
+ #if _CALL_ELF == 2
|
||||
+ /* ABIv2 */
|
||||
+ ld pC0, 104(r1)
|
||||
+ ld incCn, 112(r1)
|
||||
+ #else
|
||||
+ /* ABIv1 */
|
||||
ld pC0, 120(r1)
|
||||
ld incCn, 128(r1)
|
||||
+ #endif
|
||||
#else
|
||||
lwz incCn, FSIZE+8(r1)
|
||||
#endif
|
|
@ -1,151 +0,0 @@
|
|||
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
Subject: atlas.3.10.2 ppc64le do not use files with lvx
|
||||
Date: Tue, 12 Aug 2014 16:07:06 +0200
|
||||
|
||||
ppc64le do not use files with lvx
|
||||
This is a temporary patch as long as the related files
|
||||
are not ported yet to ppc64 little-endian.
|
||||
|
||||
Warning: patch to be applied only for ppc64le architecture
|
||||
and will also need atlas-new_archdef_for_ppc64le.patch
|
||||
|
||||
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
---
|
||||
tune/blas/gemm/CASES/ccases.flg | 6 +-----
|
||||
tune/blas/gemm/CASES/dcases.flg | 8 +-------
|
||||
tune/blas/gemm/CASES/dcases.vnb | 4 ----
|
||||
tune/blas/gemm/CASES/scases.flg | 9 +--------
|
||||
tune/blas/gemm/CASES/scases.vnb | 3 ---
|
||||
tune/blas/gemm/CASES/zcases.flg | 8 +-------
|
||||
6 files changed, 4 insertions(+), 34 deletions(-)
|
||||
|
||||
Index: ATLAS/tune/blas/gemm/CASES/ccases.flg
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/ccases.flg
|
||||
+++ ATLAS/tune/blas/gemm/CASES/ccases.flg
|
||||
@@ -1,5 +1,5 @@
|
||||
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||
-24
|
||||
+22
|
||||
304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
|
||||
@@ -48,13 +48,9 @@ gcc
|
||||
328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-fomit-frame-pointer -O2 -fno-tree-loop-optimize
|
||||
-329 192 4 4 4 1 16 4 4 4 ATL_cmm4x4x128_av.c "R. Clint Whaley" \
|
||||
-gcc
|
||||
--x assembler-with-cpp
|
||||
331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mips4
|
||||
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM"
|
||||
333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mfpu=vfpv3
|
||||
Index: ATLAS/tune/blas/gemm/CASES/scases.flg
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/scases.flg
|
||||
+++ ATLAS/tune/blas/gemm/CASES/scases.flg
|
||||
@@ -1,5 +1,5 @@
|
||||
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||
-25
|
||||
+22
|
||||
304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
|
||||
@@ -48,16 +48,9 @@ gcc
|
||||
328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-fomit-frame-pointer -O2 -fno-tree-loop-optimize
|
||||
-329 192 4 4 4 1 16 4 4 4 ATL_smm4x4x128_av.c "R. Clint Whaley" \
|
||||
-gcc
|
||||
--x assembler-with-cpp
|
||||
-330 200 92 92 92 1 16 92 92 92 ATL_smm4x4x128_av.c "R. Clint Whaley" \
|
||||
-gcc
|
||||
--x assembler-with-cpp
|
||||
331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mips4
|
||||
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM"
|
||||
333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mfpu=vfpv3
|
||||
Index: ATLAS/tune/blas/gemm/CASES/scases.vnb
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/scases.vnb
|
||||
+++ ATLAS/tune/blas/gemm/CASES/scases.vnb
|
||||
@@ -31,9 +31,6 @@
|
||||
# Defaults: TA='t', TB='n', SSE=0, X87=0, LDBOT=1, RTKU=0, AOUTER=0,
|
||||
# KBMAX=KU, KBMIN=KU, BETAN1=0, RTMN=1
|
||||
#
|
||||
-ID=1 ROUT='ATL_smm4x4x128_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=4 \
|
||||
- LDKB=1 LDBOT=1 KBMIN=4 KBMAX=128 ASM=GAS_PPC \
|
||||
- COMP='gcc' FLAGS='-x assembler-with-cpp'
|
||||
ID=2 ROUT='ATL_smm4x4x16_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=16 \
|
||||
LDKB=1 LDBOT=0 KBMIN=16 KBMAX=2048 ASM=GAS_SPARC \
|
||||
COMP='gcc' FLAGS='-x assembler-with-cpp'
|
||||
Index: ATLAS/tune/blas/gemm/CASES/dcases.flg
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.flg
|
||||
+++ ATLAS/tune/blas/gemm/CASES/dcases.flg
|
||||
@@ -1,5 +1,5 @@
|
||||
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||
-32
|
||||
+30
|
||||
306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2
|
||||
@@ -79,12 +79,6 @@ gcc
|
||||
336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mips4
|
||||
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
|
||||
-gcc
|
||||
--x assembler-with-cpp
|
||||
-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \
|
||||
-gcc
|
||||
--O3 -mvsx
|
||||
339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mfpu=vfpv3
|
||||
Index: ATLAS/tune/blas/gemm/CASES/dcases.vnb
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.vnb
|
||||
+++ ATLAS/tune/blas/gemm/CASES/dcases.vnb
|
||||
@@ -53,10 +53,6 @@ ID=6 ROUT='ATL_dmm4x1x90_x87.c' AUTH='R
|
||||
ID=7 ROUT='ATL_dmm8x1x120_sse2.c' AUTH='R. Clint Whaley' \
|
||||
MU=8 NU=1 KU=1 KBMAX=512 ASM=GAS_x8664 BETAN1=1 \
|
||||
COMP='gcc' FLAGS='-m64 -x assembler-with-cpp'
|
||||
-ID=70 ROUT='ATL_dmm4x4x80_ppc.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
|
||||
- MU=4 NU=4 KU=1 KBMIN=1 KBMAX=80 ASM=GAS_PPC BETAN1=0 LDBOT=0 \
|
||||
- LDAB=0 LDISKB=1 RTN=1 RTM=1 RTK=0 \
|
||||
- COMP='gcc' FLAGS='-x assembler-with-cpp'
|
||||
ID=80 ROUT='ATL_dmm4x4x16r8_US.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
|
||||
MU=4 NU=4 KU=24 KBMIN=24 KBMAX=512 ASM=GAS_SPARC BETAN1=0 \
|
||||
LDAB=0 RTK=1 RTN=1 RTM=1 LDBOT=0 LDISKB=1 LDAB=1 \
|
||||
Index: ATLAS/tune/blas/gemm/CASES/zcases.flg
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/gemm/CASES/zcases.flg
|
||||
+++ ATLAS/tune/blas/gemm/CASES/zcases.flg
|
||||
@@ -1,5 +1,5 @@
|
||||
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
|
||||
-31
|
||||
+29
|
||||
306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2
|
||||
@@ -76,12 +76,6 @@ gcc
|
||||
336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mips4
|
||||
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
|
||||
-gcc
|
||||
--x assembler-with-cpp
|
||||
-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \
|
||||
-gcc
|
||||
--O3 -mvsx
|
||||
339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
|
||||
gcc
|
||||
-x assembler-with-cpp -mfpu=vfpv3
|
505
atlas.spec
505
atlas.spec
|
@ -5,47 +5,58 @@ Version: 3.10.3
|
|||
%if "%{?enable_native_atlas}" != "0"
|
||||
%define dist .native
|
||||
%endif
|
||||
Release: 5%{?dist}
|
||||
Release: 19%{?dist}
|
||||
Summary: Automatically Tuned Linear Algebra Software
|
||||
|
||||
Group: System Environment/Libraries
|
||||
License: BSD
|
||||
URL: http://math-atlas.sourceforge.net/
|
||||
Source0: http://downloads.sourceforge.net/math-atlas/%{name}%{version}.tar.bz2
|
||||
Source1: PPRO32.tgz
|
||||
#Source2: K7323DNow.tgz
|
||||
Source3: README.dist
|
||||
#Source4: USII64.tgz
|
||||
#Source5: USII32.tgz
|
||||
#Source6: IBMz1032.tgz
|
||||
#Source7: IBMz1064.tgz
|
||||
#Source8: IBMz19632.tgz
|
||||
#Source9: IBMz19664.tgz
|
||||
Source2: README.dist
|
||||
#archdefs taken from debian:
|
||||
Source11: POWER332.tar.bz2
|
||||
Source12: IBMz932.tar.bz2
|
||||
Source13: IBMz964.tar.bz2
|
||||
Source3: POWER332.tar.bz2
|
||||
Source4: IBMz932.tar.bz2
|
||||
Source5: IBMz964.tar.bz2
|
||||
#upstream arm uses softfp abi, fedora arm uses hard
|
||||
Source14: ARMv732NEON.tar.bz2
|
||||
Source6: ARMv732NEON.tar.bz2
|
||||
#again, taken from debian
|
||||
Source15: IBMz1264.tar.bz2
|
||||
Source16: ARMa732.tar.bz2
|
||||
Source7: IBMz1264.tar.bz2
|
||||
Source8: ARMa732.tar.bz2
|
||||
|
||||
#Provided By IBM
|
||||
Source9: IBMz1364VXZ.tar.bz2
|
||||
Source10: IBMz1464VXZ2.tar.bz2
|
||||
Source11: IBMz1564VXZ2.tar.bz2
|
||||
|
||||
Patch2: atlas-fedora-arm.patch
|
||||
# Properly pass -melf_* to the linker with -Wl, fixes FTBFS bug 817552
|
||||
# https://sourceforge.net/tracker/?func=detail&atid=379484&aid=3555789&group_id=23725
|
||||
Patch3: atlas-melf.patch
|
||||
Patch4: atlas-throttling.patch
|
||||
Patch1: atlas-melf.patch
|
||||
Patch2: atlas-throttling.patch
|
||||
|
||||
#credits Lukas Slebodnik
|
||||
Patch5: atlas-shared_libraries.patch
|
||||
Patch3: atlas-shared_libraries.patch
|
||||
|
||||
Patch7: atlas-aarch64port.patch
|
||||
Patch8: atlas-genparse.patch
|
||||
Patch4: atlas-genparse.patch
|
||||
|
||||
# Unbundle LAPACK (BZ #1181369)
|
||||
Patch9: atlas.3.10.1-unbundle.patch
|
||||
Patch5: atlas.3.10.1-unbundle.patch
|
||||
Patch6: atlas-gcc10.patch
|
||||
|
||||
|
||||
#patches dealing with z{13,14,15}, provided by IBM
|
||||
Patch7: 0001-Avoid-c99-standard-compiler.patch
|
||||
Patch8: 0002-Fix-rpath-link-command-line-options.patch
|
||||
Patch9: 0003-Fix-SIMD-support-on-IBM-z13.patch
|
||||
Patch10: 0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch
|
||||
Patch11: 0005-Optimizations-for-IBM-z13.patch
|
||||
Patch12: 0006-Add-IBM-z14-support.patch
|
||||
Patch13: 0007-Enable-cross-compile.patch
|
||||
Patch14: 0008-Add-IBM-z15-support.patch
|
||||
|
||||
#Covscan
|
||||
Patch101: atlas-getri.patch
|
||||
|
||||
BuildRequires: make
|
||||
BuildRequires: gcc-gfortran, lapack-static, gcc
|
||||
|
||||
%ifarch x86_64
|
||||
|
@ -60,8 +71,8 @@ Obsoletes: atlas-sse3 < 3.10.3-1
|
|||
%endif
|
||||
|
||||
%ifarch s390 s390x
|
||||
#Obsoletes: atlas-z10 < 3.10
|
||||
#Obsoletes: atlas-z196 < 3.10
|
||||
Obsoletes: atlas-z10 < 3.10.3-11
|
||||
Obsoletes: atlas-z196 < 3.10.3-11
|
||||
%endif
|
||||
|
||||
|
||||
|
@ -83,11 +94,10 @@ see the documentation for information.
|
|||
|
||||
%package devel
|
||||
Summary: Development libraries for ATLAS
|
||||
Group: Development/Libraries
|
||||
Requires: %{name} = %{version}-%{release}
|
||||
Obsoletes: %name-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%ifarch x86_64
|
||||
Obsoletes: atlas-sse3-devel < 3.10.3-1
|
||||
|
@ -99,16 +109,21 @@ Obsoletes: atlas-sse-devel < 3.10.3-1
|
|||
Obsoletes: atlas-sse2-devel < 3.10.3-1
|
||||
Obsoletes: atlas-sse3-devel < 3.10.3-1
|
||||
%endif
|
||||
|
||||
%ifarch s390 s390x
|
||||
Obsoletes: atlas-z10-devel < 3.10.3-11
|
||||
Obsoletes: atlas-z196-devel < 3.10.3-11
|
||||
%endif
|
||||
|
||||
%description devel
|
||||
This package contains headers for development with ATLAS
|
||||
(Automatically Tuned Linear Algebra Software).
|
||||
|
||||
%package static
|
||||
Summary: Static libraries for ATLAS
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-devel = %{version}-%{release}
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%ifarch x86_64
|
||||
Obsoletes: atlas-sse3-static < 3.10.3-1
|
||||
|
@ -120,6 +135,12 @@ Obsoletes: atlas-sse-static < 3.10.3-1
|
|||
Obsoletes: atlas-sse2-static < 3.10.3-1
|
||||
Obsoletes: atlas-sse3-static < 3.10.3-1
|
||||
%endif
|
||||
|
||||
%ifarch s390 s390x
|
||||
Obsoletes: atlas-z10-static < 3.10.3-11
|
||||
Obsoletes: atlas-z196-static < 3.10.3-11
|
||||
%endif
|
||||
|
||||
%description static
|
||||
This package contains static version of ATLAS (Automatically Tuned
|
||||
Linear Algebra Software).
|
||||
|
@ -132,12 +153,9 @@ Linear Algebra Software).
|
|||
#
|
||||
%ifarch x86_64
|
||||
%define types base corei2
|
||||
#corei4
|
||||
# sse3
|
||||
|
||||
%package corei2-static
|
||||
Summary: ATLAS libraries for Corei2 (Ivy/Sandy bridge) CPUs
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description corei2-static
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
|
@ -146,7 +164,6 @@ CPUs. The base ATLAS builds for the x86_64 architecture are made for the hammer6
|
|||
|
||||
%package corei2
|
||||
Summary: ATLAS libraries for Corei2 (Ivy/Sandy bridge) CPUs
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description corei2
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
|
@ -155,11 +172,10 @@ CPUs. The base ATLAS builds for the x86_64 architecture are made for the hammer6
|
|||
|
||||
%package corei2-devel
|
||||
Summary: Development libraries for ATLAS for Corei2 (Ivy/Sandy bridge) CPUs
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-corei2 = %{version}-%{release}
|
||||
Obsoletes: %name-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%description corei2-devel
|
||||
This package contains shared and static versions of the ATLAS
|
||||
|
@ -169,113 +185,78 @@ optimizations for the corei2 (Ivy/Sandy bridge) CPUs.
|
|||
|
||||
%ifarch %{ix86}
|
||||
%define types base
|
||||
#corei1
|
||||
|
||||
#%package corei1
|
||||
#Summary: ATLAS libraries for Corei1 (Nehalem/Westmere) CPUs
|
||||
#Group: System Environment/Libraries
|
||||
|
||||
#%description corei1
|
||||
#This package contains ATLAS (Automatically Tuned Linear Algebra Software)
|
||||
#shared libraries compiled with optimizations for the Corei1 (Nehalem/Westmere) CPUs.
|
||||
#The base ATLAS builds for the ix86 architecture are made for PIII CPUs.
|
||||
|
||||
#%package corei1-devel
|
||||
#Summary: Development libraries for ATLAS for Corei1 (Nehalem/Westmere) CPUs
|
||||
#Group: Development/Libraries
|
||||
#Requires: %{name}-corei1 = %{version}-%{release}
|
||||
#Obsoletes: %name-header <= %version-%release
|
||||
#Requires(posttrans): chkconfig
|
||||
#Requires(postun): chkconfig
|
||||
|
||||
#%description corei1-devel
|
||||
#This package contains shared and static versions of the ATLAS
|
||||
#(Automatically Tuned Linear Algebra Software) libraries compiled with
|
||||
#optimizations for the corei1 (Nehalem/Westmere) CPUs.
|
||||
|
||||
#%package corei1-static
|
||||
#Summary: Static libraries for ATLAS for Corei1 (/Nehalem/Westmere) CPUs
|
||||
#Group: Development/Libraries
|
||||
#Requires: %{name}-corei1-devel = %{version}-%{release}
|
||||
#Requires(posttrans): chkconfig
|
||||
#Requires(postun): chkconfig
|
||||
|
||||
#%description corei1-static
|
||||
#This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
#Software) static libraries compiled with optimizations for the Corei1 (Nehalem/Westemere)
|
||||
#CPUs. The base ATLAS builds for the ix86 architecture are made for the PIII CPUs.
|
||||
|
||||
%endif
|
||||
|
||||
%ifarch s390 s390x
|
||||
%define types base z196 z10
|
||||
%define types base z14 z15
|
||||
|
||||
%package z196
|
||||
Summary: ATLAS libraries for z196
|
||||
%package z14
|
||||
Summary: ATLAS libraries for z14
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description z196
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
Software) libraries compiled with optimizations for the z196.
|
||||
%description z14
|
||||
This package contains ATLAS (Automatically Tuned Linear Algebra Software)
|
||||
shared libraries compiled with optimizations for the z14 CPUs.
|
||||
|
||||
%package z196-devel
|
||||
Summary: Development libraries for ATLAS for z196
|
||||
%package z14-devel
|
||||
Summary: Development libraries for ATLAS for z14
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-z196 = %{version}-%{release}
|
||||
Obsoletes: %name-z196-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
|
||||
%description z196-devel
|
||||
This package contains headers and shared versions of the ATLAS
|
||||
(Automatically Tuned Linear Algebra Software) libraries compiled with
|
||||
optimizations for the z196 architecture.
|
||||
|
||||
%package z196-static
|
||||
Summary: Static libraries for ATLAS
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-z196-devel = %{version}-%{release}
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
|
||||
%description z196-static
|
||||
This package contains static version of ATLAS (Automatically Tuned
|
||||
Linear Algebra Software) for the z196 architecture.
|
||||
|
||||
|
||||
%package z10
|
||||
Summary: ATLAS libraries for z10
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description z10
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
Software) libraries compiled with optimizations for the z10.
|
||||
|
||||
%package z10-devel
|
||||
Summary: Development libraries for ATLAS for z10
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-z10 = %{version}-%{release}
|
||||
Requires: %{name}-z14 = %{version}-%{release}
|
||||
Obsoletes: %name-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
|
||||
%description z10-devel
|
||||
This package contains headers and shared versions of the ATLAS
|
||||
%description z14-devel
|
||||
This package contains shared and static versions of the ATLAS
|
||||
(Automatically Tuned Linear Algebra Software) libraries compiled with
|
||||
optimizations for the z10 architecture.
|
||||
optimizations for the z14 CPUs.
|
||||
|
||||
%package z10-static
|
||||
Summary: Static libraries for ATLAS
|
||||
%package z14-static
|
||||
Summary: Static libraries for ATLAS for z14
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-devel = %{version}-%{release}
|
||||
Requires: %{name}-z14-devel = %{version}-%{release}
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
|
||||
%description z10-static
|
||||
This package contains static version of ATLAS (Automatically Tuned
|
||||
Linear Algebra Software) for the z10 architecture.
|
||||
%description z14-static
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
Software) static libraries compiled with optimizations for the z14
|
||||
CPUs.
|
||||
|
||||
|
||||
%package z15
|
||||
Summary: ATLAS libraries for z15
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description z15
|
||||
This package contains ATLAS (Automatically Tuned Linear Algebra Software)
|
||||
shared libraries compiled with optimizations for the z15 CPUs.
|
||||
|
||||
%package z15-devel
|
||||
Summary: Development libraries for ATLAS for z15
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-z15 = %{version}-%{release}
|
||||
Obsoletes: %name-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
|
||||
%description z15-devel
|
||||
This package contains shared and static versions of the ATLAS
|
||||
(Automatically Tuned Linear Algebra Software) libraries compiled with
|
||||
optimizations for the z15 CPUs.
|
||||
|
||||
%package z15-static
|
||||
Summary: Static libraries for ATLAS for z15
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-z15-devel = %{version}-%{release}
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
|
||||
%description z15-static
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
Software) static libraries compiled with optimizations for the z15
|
||||
CPUs.
|
||||
%endif
|
||||
|
||||
|
||||
|
@ -284,7 +265,6 @@ Linear Algebra Software) for the z10 architecture.
|
|||
|
||||
%package power8
|
||||
Summary: ATLAS libraries for Power 8
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description power8
|
||||
This package contains ATLAS (Automatically Tuned Linear Algebra Software)
|
||||
|
@ -293,11 +273,10 @@ The base ATLAS builds for the ppc64 architecture are made for Power 5 CPUs.
|
|||
|
||||
%package power8-devel
|
||||
Summary: Development libraries for ATLAS for Power 8
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-power8 = %{version}-%{release}
|
||||
Obsoletes: %name-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%description power8-devel
|
||||
This package contains shared and static versions of the ATLAS
|
||||
|
@ -306,10 +285,9 @@ optimizations for the Power 8 CPUs.
|
|||
|
||||
%package power8-static
|
||||
Summary: Static libraries for ATLAS for Power 8
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-power8-devel = %{version}-%{release}
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%description power8-static
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
|
@ -318,7 +296,6 @@ CPUs. The base ATLAS builds for the ppc64 architecture are made for the Power 5
|
|||
|
||||
%package power7
|
||||
Summary: ATLAS libraries for Power 7
|
||||
Group: System Environment/Libraries
|
||||
|
||||
%description power7
|
||||
This package contains ATLAS (Automatically Tuned Linear Algebra Software)
|
||||
|
@ -327,11 +304,10 @@ The base ATLAS builds for the ppc64 architecture are made for Power 5 CPUs.
|
|||
|
||||
%package power7-devel
|
||||
Summary: Development libraries for ATLAS for Power 7
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-power7 = %{version}-%{release}
|
||||
Obsoletes: %name-header <= %version-%release
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%description power7-devel
|
||||
This package contains shared and static versions of the ATLAS
|
||||
|
@ -340,10 +316,9 @@ optimizations for the Power 7 CPUs.
|
|||
|
||||
%package power7-static
|
||||
Summary: Static libraries for ATLAS for Power 7
|
||||
Group: Development/Libraries
|
||||
Requires: %{name}-power7-devel = %{version}-%{release}
|
||||
Requires(posttrans): chkconfig
|
||||
Requires(postun): chkconfig
|
||||
Requires(posttrans): /usr/sbin/alternatives
|
||||
Requires(postun): /usr/sbin/alternatives
|
||||
|
||||
%description power7-static
|
||||
This package contains the ATLAS (Automatically Tuned Linear Algebra
|
||||
|
@ -357,48 +332,51 @@ CPUs. The base ATLAS builds for the ppc64 architecture are made for the Power 5
|
|||
%prep
|
||||
#cat /proc/cpuinfo
|
||||
%setup -q -n ATLAS
|
||||
#patch0 -p0 -b .shared
|
||||
#arm patch not applicable, probably not needed
|
||||
#%ifarch %{arm}
|
||||
#%patch2 -p0 -b .arm
|
||||
#%endif
|
||||
%patch3 -p1 -b .melf
|
||||
%patch4 -p1 -b .thrott
|
||||
%patch5 -p2 -b .sharedlib
|
||||
%ifarch aarch64
|
||||
#%patch7 -p1 -b .aarch64
|
||||
|
||||
|
||||
%patch1 -p1
|
||||
%patch2 -p1
|
||||
%patch3 -p2
|
||||
%patch4 -p1
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
|
||||
%patch7 -p1
|
||||
%patch8 -p1
|
||||
%patch10 -p1
|
||||
|
||||
%ifarch s390x s390
|
||||
%patch9 -p1
|
||||
%patch11 -p1
|
||||
%patch12 -p1
|
||||
%patch13 -p1
|
||||
%patch14 -p1
|
||||
%endif
|
||||
%patch8 -p1 -b .genparse
|
||||
%patch9 -p1 -b .unbundle
|
||||
|
||||
%patch101 -p1
|
||||
|
||||
cp %{SOURCE1} CONFIG/ARCHS/
|
||||
#cp %{SOURCE2} CONFIG/ARCHS/
|
||||
cp %{SOURCE3} doc
|
||||
cp %{SOURCE2} doc
|
||||
cp %{SOURCE3} CONFIG/ARCHS/
|
||||
cp %{SOURCE4} CONFIG/ARCHS/
|
||||
cp %{SOURCE5} CONFIG/ARCHS/
|
||||
cp %{SOURCE6} CONFIG/ARCHS/
|
||||
cp %{SOURCE7} CONFIG/ARCHS/
|
||||
cp %{SOURCE8} CONFIG/ARCHS/
|
||||
cp %{SOURCE9} CONFIG/ARCHS/
|
||||
cp %{SOURCE10} CONFIG/ARCHS/
|
||||
cp %{SOURCE11} CONFIG/ARCHS/
|
||||
cp %{SOURCE12} CONFIG/ARCHS/
|
||||
cp %{SOURCE13} CONFIG/ARCHS/
|
||||
cp %{SOURCE14} CONFIG/ARCHS/
|
||||
cp %{SOURCE15} CONFIG/ARCHS/
|
||||
cp %{SOURCE16} CONFIG/ARCHS/
|
||||
#cp %{SOURCE8} CONFIG/ARCHS/
|
||||
#cp %{SOURCE9} CONFIG/ARCHS/
|
||||
|
||||
%ifarch %{arm}
|
||||
# Set arm flags in atlcomp.txt
|
||||
#sed -i -e 's,-mfpu=vfpv3,-mfpu=neon,' CONFIG/src/atlcomp.txt
|
||||
sed -i -e 's,-mfloat-abi=softfp,-mfloat-abi=hard,' CONFIG/src/atlcomp.txt
|
||||
# Some extra arm flags not needed
|
||||
#sed -i -e 's,-mfpu=vfpv3,,' tune/blas/gemm/CASES/*.flg
|
||||
%endif
|
||||
# Debug
|
||||
#sed -i -e 's,> \(.*\)/ptsanity.out,> \1/ptsanity.out || cat \1/ptsanity.out \&\& exit 1,' makes/Make.*
|
||||
|
||||
# Generate lapack library
|
||||
mkdir lapacklib
|
||||
cd lapacklib
|
||||
ar x %{_libdir}/liblapack_pic.a
|
||||
# Remove functions that have ATLAS implementations
|
||||
rm -f cgelqf.o cgels.o cgeqlf.o cgeqrf.o cgerqf.o cgesv.o cgetrf.o cgetri.o cgetrs.o clarfb.o clarft.o clauum.o cposv.o cpotrf.o cpotri.o cpotrs.o ctrtri.o dgelqf.o dgels.o dgeqlf.o dgeqrf.o dgerqf.o dgesv.o dgetrf.o dgetri.o dgetrs.o dlamch.o dlarfb.o dlarft.o dlauum.o dposv.o dpotrf.o dpotri.o dpotrs.o dtrtri.o ieeeck.o ilaenv.o lsame.o sgelqf.o sgels.o sgeqlf.o sgeqrf.o sgerqf.o sgesv.o sgetrf.o sgetri.o sgetrs.o slamch.o slarfb.o slarft.o slauum.o sposv.o spotrf.o spotri.o spotrs.o strtri.o xerbla.o zgelqf.o zgels.o zgeqlf.o zgeqrf.o zgerqf.o zgesv.o zgetrf.o zgetri.o zgetrs.o zlarfb.o zlarft.o zlauum.o zposv.o zpotrf.o zpotri.o zpotrs.o ztrtri.o
|
||||
rm -f cgelqf.f.o cgels.f.o cgeqlf.f.o cgeqrf.f.o cgerqf.f.o cgesv.f.o cgetrf.f.o cgetri.f.o cgetrs.f.o clarfb.f.o clarft.f.o clauum.f.o cposv.f.o cpotrf.f.o cpotri.f.o cpotrs.f.o ctrtri.f.o dgelqf.f.o dgels.f.o dgeqlf.f.o dgeqrf.f.o dgerqf.f.o dgesv.f.o dgetrf.f.o dgetri.f.o dgetrs.f.o dlamch.f.o dlarfb.f.o dlarft.f.o dlauum.f.o dposv.f.o dpotrf.f.o dpotri.f.o dpotrs.f.o dtrtri.f.o ieeeck.f.o ilaenv.f.o lsame.f.o sgelqf.f.o sgels.f.o sgeqlf.f.o sgeqrf.f.o sgerqf.f.o sgesv.f.o sgetrf.f.o sgetri.f.o sgetrs.f.o slamch.f.o slarfb.f.o slarft.f.o slauum.f.o sposv.f.o spotrf.f.o spotri.f.o spotrs.f.o strtri.f.o xerbla.f.o zgelqf.f.o zgels.f.o zgeqlf.f.o zgeqrf.f.o zgerqf.f.o zgesv.f.o zgetrf.f.o zgetri.f.o zgetrs.f.o zlarfb.f.o zlarft.f.o zlauum.f.o zposv.f.o zpotrf.f.o zpotri.f.o zpotrs.f.o ztrtri.f.o
|
||||
# Create new library
|
||||
ar rcs ../liblapack_pic_pruned.a *.o
|
||||
cd ..
|
||||
|
@ -420,7 +398,8 @@ p=$(pwd)
|
|||
#Target architectures for the 'base' versions
|
||||
%ifarch s390x
|
||||
%define flags %{nil}
|
||||
%define base_options "-A IBMz9 -V 1"
|
||||
%define base_options "-A IBMz12 -V 1"
|
||||
#%define base_options "-A IBMz13 -V 8 -Si archdef 2"
|
||||
%endif
|
||||
|
||||
%ifarch x86_64
|
||||
|
@ -479,12 +458,14 @@ for type in %{types}; do
|
|||
elif [ "$type" = "corei1" ]; then
|
||||
arg_options="-A Corei1 -V 896"
|
||||
%define pr_corei1 %(echo $((%{__isa_bits}+2)))
|
||||
elif [ "$type" = "z10" ]; then
|
||||
arg_options="-A IBMz10 -V 1"
|
||||
%define pr_z10 %(echo $((%{__isa_bits}+2)))
|
||||
elif [ "$type" = "z196" ]; then
|
||||
arg_options="-A IBMz196 -V 1"
|
||||
%define pr_z196 %(echo $((%{__isa_bits}+4)))
|
||||
elif [ "$type" = "z14" ]; then
|
||||
thread_options="-t 4"
|
||||
arg_options="-A IBMz14 -V 4 -Si archdef 2"
|
||||
%define pr_z14 %(echo $((%{__isa_bits}+2)))
|
||||
elif [ "$type" = "z15" ]; then
|
||||
thread_options="-t 4"
|
||||
arg_options="-A IBMz15 -V 4 -Si archdef 2"
|
||||
%define pr_z15 %(echo $((%{__isa_bits}+4)))
|
||||
elif [ "$type" = "power7" ]; then
|
||||
thread_options="-t 4"
|
||||
arg_options="-A POWER7 -V 1"
|
||||
|
@ -497,7 +478,7 @@ for type in %{types}; do
|
|||
fi
|
||||
mkdir -p %{_arch}_${type}
|
||||
pushd %{_arch}_${type}
|
||||
../configure %{mode} $thread_options $arg_options -D c -DWALL -Fa alg '%{flags} -g -Wa,--noexecstack -fPIC ${RPM_LD_FLAGS}'\
|
||||
../configure %{mode} $thread_options $arg_options -D c -DWALL -F xc ' ' -Fa alg '%{flags} -D_FORTIFY_SOURCE=2 -g -Wa,--noexecstack,--generate-missing-build-notes=yes -fstack-protector-strong -fstack-clash-protection -fPIC -fplugin=annobin -Wl,-z,now'\
|
||||
--prefix=%{buildroot}%{_prefix} \
|
||||
--incdir=%{buildroot}%{_includedir} \
|
||||
--libdir=%{buildroot}%{_libdir}/${libname}
|
||||
|
@ -529,6 +510,8 @@ for type in %{types}; do
|
|||
pushd %{_arch}_${type}
|
||||
make DESTDIR=%{buildroot} install
|
||||
mv %{buildroot}%{_includedir}/atlas %{buildroot}%{_includedir}/atlas-%{_arch}-${type}
|
||||
mv %{buildroot}%{_includedir}/clapack.h %{buildroot}%{_includedir}/atlas-%{_arch}-${type}/clapack.h
|
||||
mv %{buildroot}%{_includedir}/cblas.h %{buildroot}%{_includedir}/atlas-%{_arch}-${type}/cblas.h
|
||||
if [ "$type" = "base" ]; then
|
||||
cp -pr lib/*.so* %{buildroot}%{_libdir}/atlas/
|
||||
rm -f %{buildroot}%{_libdir}/atlas/*.a
|
||||
|
@ -565,18 +548,15 @@ mkdir -p %{buildroot}%{_includedir}/atlas
|
|||
|
||||
|
||||
%check
|
||||
# Run make check but don't fail the build on these arches
|
||||
#%ifarch s390 aarch64 ppc64
|
||||
#for type in %{types}; do
|
||||
# pushd %{_arch}_${type}
|
||||
# make check ptcheck
|
||||
# popd
|
||||
#done
|
||||
#%else
|
||||
for type in %{types}; do
|
||||
if [ "$type" = "z14" ] || [ "$type" = "z15" ]; then
|
||||
# skip the tests (may fail due to illegal instructions).
|
||||
echo "Skipping tests for the $type subpackage"
|
||||
else
|
||||
pushd %{_arch}_${type}
|
||||
make check ptcheck
|
||||
popd
|
||||
fi
|
||||
done
|
||||
#%endif
|
||||
|
||||
|
@ -612,48 +592,35 @@ fi
|
|||
%endif
|
||||
|
||||
%ifarch %{ix86}
|
||||
|
||||
%post -n atlas-corei1 -p /sbin/ldconfig
|
||||
|
||||
%postun -n atlas-corei1 -p /sbin/ldconfig
|
||||
|
||||
%posttrans corei1-devel
|
||||
/usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \
|
||||
%{_includedir}/atlas-%{_arch}-corei1 %{pr_corei1}
|
||||
|
||||
%postun corei1-devel
|
||||
if [ $1 -ge 0 ] ; then
|
||||
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-corei1
|
||||
fi
|
||||
|
||||
# No arch specific packages
|
||||
%endif
|
||||
|
||||
%ifarch s390 s390x
|
||||
|
||||
%post -n atlas-z10 -p /sbin/ldconfig
|
||||
%post -n atlas-z14 -p /sbin/ldconfig
|
||||
|
||||
%postun -n atlas-z10 -p /sbin/ldconfig
|
||||
%postun -n atlas-z14 -p /sbin/ldconfig
|
||||
|
||||
%posttrans z10-devel
|
||||
%posttrans z14-devel
|
||||
/usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \
|
||||
%{_includedir}/atlas-%{_arch}-z10 %{pr_z10}
|
||||
%{_includedir}/atlas-%{_arch}-z14 %{pr_z14}
|
||||
|
||||
%postun z10-devel
|
||||
%postun z14-devel
|
||||
if [ $1 -ge 0 ] ; then
|
||||
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z10
|
||||
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z14
|
||||
fi
|
||||
|
||||
%post -n atlas-z196 -p /sbin/ldconfig
|
||||
%post -n atlas-z15 -p /sbin/ldconfig
|
||||
|
||||
%postun -n atlas-z196 -p /sbin/ldconfig
|
||||
%postun -n atlas-z15 -p /sbin/ldconfig
|
||||
|
||||
%posttrans z196-devel
|
||||
%posttrans z15-devel
|
||||
/usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \
|
||||
%{_includedir}/atlas-%{_arch}-z196 %{pr_z196}
|
||||
%{_includedir}/atlas-%{_arch}-z15 %{pr_z15}
|
||||
|
||||
%postun z196-devel
|
||||
%postun z15-devel
|
||||
if [ $1 -ge 0 ] ; then
|
||||
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z196
|
||||
/usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z15
|
||||
fi
|
||||
|
||||
%endif
|
||||
|
@ -699,7 +666,6 @@ fi
|
|||
%doc doc
|
||||
%{_libdir}/atlas/*.so
|
||||
%{_includedir}/atlas-%{_arch}-base/
|
||||
%{_includedir}/*.h
|
||||
%ghost %{_includedir}/atlas
|
||||
%{_libdir}/pkgconfig/atlas.pc
|
||||
|
||||
|
@ -720,7 +686,6 @@ fi
|
|||
%doc doc
|
||||
%{_libdir}/atlas-corei2/*.so
|
||||
%{_includedir}/atlas-%{_arch}-corei2/
|
||||
%{_includedir}/*.h
|
||||
%ghost %{_includedir}/atlas
|
||||
|
||||
%files corei2-static
|
||||
|
@ -740,7 +705,6 @@ fi
|
|||
%doc doc
|
||||
%{_libdir}/atlas-power8/*.so
|
||||
%{_includedir}/atlas-%{_arch}-power8/
|
||||
%{_includedir}/*.h
|
||||
%ghost %{_includedir}/atlas
|
||||
|
||||
%files power8-static
|
||||
|
@ -756,70 +720,107 @@ fi
|
|||
%doc doc
|
||||
%{_libdir}/atlas-power7/*.so
|
||||
%{_includedir}/atlas-%{_arch}-power7/
|
||||
%{_includedir}/*.h
|
||||
%ghost %{_includedir}/atlas
|
||||
|
||||
%files power7-static
|
||||
%{_libdir}/atlas-power7/*.a
|
||||
%endif
|
||||
|
||||
%ifarch %{ix86}
|
||||
|
||||
#%files corei1
|
||||
#%doc doc/README.dist
|
||||
#%dir %{_libdir}/atlas-corei1
|
||||
#%{_libdir}/atlas-corei1/*.so.*
|
||||
#%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-corei1.conf
|
||||
|
||||
#%files corei1-devel
|
||||
#%doc doc
|
||||
#%{_libdir}/atlas-corei1/*.so
|
||||
#%{_includedir}/atlas-%{_arch}-corei1/
|
||||
#%{_includedir}/*.h
|
||||
#%ghost %{_includedir}/atlas
|
||||
|
||||
#%files corei1-static
|
||||
#%{_libdir}/atlas-corei1/*.a
|
||||
%endif
|
||||
|
||||
%ifarch s390 s390x
|
||||
%files z10
|
||||
%doc doc/README.dist
|
||||
%dir %{_libdir}/atlas-z10
|
||||
%{_libdir}/atlas-z10/*.so.*
|
||||
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z10.conf
|
||||
|
||||
%files z10-devel
|
||||
%files z14
|
||||
%doc doc/README.dist
|
||||
%dir %{_libdir}/atlas-z14
|
||||
%{_libdir}/atlas-z14/*.so.*
|
||||
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z14.conf
|
||||
|
||||
%files z14-devel
|
||||
%doc doc
|
||||
%{_libdir}/atlas-z10/*.so
|
||||
%{_includedir}/atlas-%{_arch}-z10/
|
||||
%{_includedir}/*.h
|
||||
%{_libdir}/atlas-z14/*.so
|
||||
%{_includedir}/atlas-%{_arch}-z14/
|
||||
%ghost %{_includedir}/atlas
|
||||
|
||||
%files z10-static
|
||||
%{_libdir}/atlas-z10/*.a
|
||||
%files z14-static
|
||||
%{_libdir}/atlas-z14/*.a
|
||||
|
||||
%files z196
|
||||
|
||||
%files z15
|
||||
%doc doc/README.dist
|
||||
%dir %{_libdir}/atlas-z196
|
||||
%{_libdir}/atlas-z196/*.so.*
|
||||
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z196.conf
|
||||
%dir %{_libdir}/atlas-z15
|
||||
%{_libdir}/atlas-z15/*.so.*
|
||||
%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z15.conf
|
||||
|
||||
%files z196-devel
|
||||
%files z15-devel
|
||||
%doc doc
|
||||
%{_libdir}/atlas-z196/*.so
|
||||
%{_includedir}/atlas-%{_arch}-z196/
|
||||
%{_includedir}/*.h
|
||||
%{_libdir}/atlas-z15/*.so
|
||||
%{_includedir}/atlas-%{_arch}-z15/
|
||||
%ghost %{_includedir}/atlas
|
||||
|
||||
%files z196-static
|
||||
%{_libdir}/atlas-z196/*.a
|
||||
%files z15-static
|
||||
%{_libdir}/atlas-z15/*.a
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
#enable_native_atlas if
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Wed Jan 19 2022 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-19
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild
|
||||
|
||||
* Wed Jul 21 2021 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-18
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild
|
||||
|
||||
* Tue Apr 20 2021 Jakub Martisko <jamartis@redhat.com> - 3.10.3-17
|
||||
- Disable the custom vvrsum function introduced in the ibm patch
|
||||
Related: 1951565
|
||||
|
||||
* Tue Apr 20 2021 Jakub Martisko <jamartis@redhat.com> - 3.10.3-16
|
||||
- Move the cblas.h and clapack.h to include/atlas to resolve conflict with lapack
|
||||
- Resolves: #1948187
|
||||
|
||||
* Mon Feb 22 2021 Jakub Martisko <jamartis@redhat.com> - 3.10.3-15
|
||||
- Remove unused scriptlets
|
||||
|
||||
* Mon Feb 22 2021 Jakub Martisko <jamartis@redhat.com> - 3.10.3-14
|
||||
- Fix the messed scriptlets
|
||||
Resolves: #1929845
|
||||
|
||||
* Tue Jan 26 2021 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-13
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild
|
||||
|
||||
* Thu Aug 27 2020 Jakub Martisko <jamartis@redhat.com> - 3.10.3.13
|
||||
- Sync with rhel 8.3 + cleanup
|
||||
- Add new subpackages - z{14,15}
|
||||
- Unlike in rhel8, the base subpackage still needs to be build for z12.
|
||||
- Covscan related bugfixes
|
||||
- Spec and git cleanup (remove unused patches/sources)
|
||||
|
||||
* Mon Jul 27 2020 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-12
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild
|
||||
|
||||
* Thu Feb 13 2020 Jakub Martisko <jamartis@redhat.com> - 3.10.3-11
|
||||
- Drop IBM z10 and z196 subpackages
|
||||
- s390 is now optimized for z12
|
||||
Related: #1780286
|
||||
|
||||
* Mon Jan 27 2020 Jakub Martisko <jamartis@redhat.com> - 3.10.3-10
|
||||
- Fix compatibility with gcc 10
|
||||
- Sync compiler/linker flags with RHEL
|
||||
|
||||
* Wed Jul 24 2019 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-9
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_31_Mass_Rebuild
|
||||
|
||||
* Thu Jan 31 2019 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-8
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_30_Mass_Rebuild
|
||||
|
||||
* Sun Oct 14 2018 Peter Robinson <pbrobinson@fedoraproject.org> 3.10.3-7
|
||||
- Update requires for alternatives
|
||||
|
||||
* Thu Jul 12 2018 Fedora Release Engineering <releng@fedoraproject.org> - 3.10.3-6
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild
|
||||
|
||||
* Wed Apr 11 2018 Jakub Martisko <jamartis@redhat.com> - 3.10.3-5
|
||||
- Pass RPM_LD_FLAGS to linker
|
||||
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
diff -up ATLAS/include/atlas_genparse.h.than ATLAS/include/atlas_genparse.h
|
||||
--- ATLAS/include/atlas_genparse.h.than 2015-11-26 10:53:55.056586198 -0500
|
||||
+++ ATLAS/include/atlas_genparse.h 2015-11-26 10:56:00.168537914 -0500
|
||||
@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str)
|
||||
}
|
||||
|
||||
/* procedure 7 */
|
||||
-static int GetDoubleArr(char *str, int N, double *d)
|
||||
+static int GetDoubleArr(char *callerstr, int N, double *d)
|
||||
/*
|
||||
* Reads in a list with form "%le,%le...,%le"; N-length d recieves doubles.
|
||||
* RETURNS: the number of doubles found, or N, whichever is less
|
||||
*/
|
||||
{
|
||||
- int i=1;
|
||||
+ int i;
|
||||
+ char *dupstr = DupString(callerstr);
|
||||
+ char *str = dupstr;
|
||||
+ /* strip the string to end on first white space */
|
||||
+ for (i=0; dupstr[i]; i++)
|
||||
+ {
|
||||
+ if (isspace(dupstr[i])) {
|
||||
+ dupstr[i] = '\0';
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ i = 1;
|
||||
assert(sscanf(str, "%le", d) == 1);
|
||||
while (i < N)
|
||||
{
|
||||
@@ -167,6 +178,7 @@ static int GetDoubleArr(char *str, int N
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
+ free(dupstr);
|
||||
return(i);
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
From: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
Subject: initialize malloc memory.invtrsm.wms.oct23
|
||||
Date: Mon, 14 Apr 2014 17:18:53 +0200
|
||||
References: http://sourceforge.net/p/math-atlas/mailman/message/32471499/
|
||||
|
||||
initialize malloc memory invtrsm.c
|
||||
|
||||
|
||||
Signed-off-by: Will Schmidt <will_schmidt@vnet.ibm.com>
|
||||
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
||||
---
|
||||
ATLAS/tune/blas/level3/invtrsm.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
Index: ATLAS/tune/blas/level3/invtrsm.c
|
||||
===================================================================
|
||||
--- ATLAS.orig/tune/blas/level3/invtrsm.c
|
||||
+++ ATLAS/tune/blas/level3/invtrsm.c
|
||||
@@ -525,6 +525,7 @@ static double RunTiming
|
||||
a = A = malloc(i * ATL_MulBySize(incA));
|
||||
if (A)
|
||||
{
|
||||
+ memset(A,0,i*ATL_MulBySize(incA)); /* wms (!!) malloc call above returns non-initialized memory. */
|
||||
if (Uplo == TestGE)
|
||||
for (i=0; i < k; i++)
|
||||
Mjoin(PATL,gegen)(N, N, A+i*incA, lda, N+lda);
|
3
sources
3
sources
|
@ -15,3 +15,6 @@ SHA512 (ARMa732.tar) = 47d6564b5a439bc3778ccc79242220b236c7dc8d36e12ce6850c7e9a0
|
|||
SHA512 (ARMa732.tar.bz2) = 8b83b59a32f18d2cd432c205efd4358b0000ce1685799f2f38a60532bc925e9cd871371d2dfd226ab8e30e830bf608f022d63bcd26f26f9fe74acab067bd4d4f
|
||||
SHA512 (POWER864LEVSXp4.tar.bz2) = e2fa637061a4a4806bc091009c37ccd719c4c4051baf36ed451917e255375881fa168caa5ca296ae9c89bb28523d9015fda42a5dbc51aef4c66efbf6efd966d2
|
||||
SHA512 (K7323DNow.tgz) = e1d5e4208ce454b5f5daa68663d2dd28a2bd3cc97496e4e1515df880b9ccd00bcc75bd820402c3b2bf8409f98500e43f2481fbf5dd480f7d0ba60fe2f82a1ac1
|
||||
SHA512 (IBMz1364VXZ.tar.bz2) = abcd32e4e92eaee702bb9583179d7019b551c5cdc45733ef683a62627d52d002425f7eb9515c15c700160920a6cda9bdd9586a748e6bea0425958346c341481f
|
||||
SHA512 (IBMz1464VXZ2.tar.bz2) = 6783ec5658d59f8a1f1270fa9845c5aad3a320b03e7b0bee7e16c0357679118bec0bf99f0ada8815620d2df17ab10c3bd91dff2454dbd4f0e6401de69944317f
|
||||
SHA512 (IBMz1564VXZ2.tar.bz2) = ff522d80d758c508c71989a189442121e4be2f0309c7c9dcb87174bacef6a3c6caf2debc069311335a9c14930450e84a81f9be171e46f4a0c2da5ae0771a9b1c
|
||||
|
|
Loading…
Reference in New Issue