Add the patches that should have been part of the previous commit

This commit is contained in:
Jakub Martisko 2020-09-08 18:54:01 +02:00
parent 9a83c6c2ac
commit bb25731117
9 changed files with 895 additions and 0 deletions

View File

@ -0,0 +1,30 @@
From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:03:52 +0100
Subject: [PATCH 1/8] Avoid c99 standard compiler
When probing for a usable GCC, the existing code already dropped path
names that contained "c89" or "c90", because these compilers don't have
the GCC extensions enabled. This patch also drops names with "c99" in
them.
---
CONFIG/src/atlconf_misc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c
index 63cb1ef..fb62214 100644
--- a/CONFIG/src/atlconf_misc.c
+++ b/CONFIG/src/atlconf_misc.c
@@ -824,7 +824,8 @@ int CompIsGcc(char *comp)
int i;
cmpname = NameWithoutPath(comp);
- if (strstr(cmpname, "c89") || strstr(cmpname, "c90"))
+ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") ||
+ strstr(cmpname, "c99"))
{
free(cmpname);
return(0);
--
2.23.0

View File

@ -0,0 +1,38 @@
From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:20:19 +0100
Subject: [PATCH 2/8] Fix -rpath-link command line options
The "-rpath-link" command line options were written in the wrong syntax,
causing errors in the build. This is fixed.
---
makes/Make.lib | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/makes/Make.lib b/makes/Make.lib
index 4ceff02..b322a32 100644
--- a/makes/Make.lib
+++ b/makes/Make.lib
@@ -47,11 +47,11 @@ cshared : fat_cshared
#
LDTRY_WIN:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \
+ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
- -Wl,"-rpath-link $(LIBINSTdir)" \
+ -Wl,"-rpath-link=$(LIBINSTdir)" \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
GCCTRY_norp_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
@@ -113,7 +113,7 @@ TRYALL_WIN :
#
LDTRY:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) \
+ -rpath-link=$(LIBINSTdir) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY:
$(GOODGCC) -shared -o $(outso).$(so_ver) \

View File

@ -0,0 +1,55 @@
From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 5 Dec 2018 18:59:15 +0100
Subject: [PATCH 3/8] Fix SIMD support on IBM z13
The header file atlas_simd.h contained a syntax error and a few functional
errors that affected IBM z13. It prevented any SIMD kernels from being
compiled successfully for that platform. This is fixed. The macro
vec_madd is avoided, because some GCC versions don't implement it
correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used
instead.
---
include/atlas_simd.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index baee6b1..68daf79 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -69,7 +69,7 @@
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_VXZ)
- #if ATL_VLEN != 2;
+ #if ATL_VLEN != 2
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_NEON)
@@ -390,19 +390,19 @@
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
#endif
- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vzero(v_) v_ = vec_splats((double)0.0)
#define ATL_vcopy(d_, s_) d_ = s_
- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_)))
#define ATL_vuld(v_, p_) ATL_vld(v_, p_)
#define ATL_vust(p_, v_) ATL_vst(p_, v_)
#define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
#define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_)
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
{ ATL_VTYPE t_;\
t_ = vec_splat(s0_, 1); \
- s0 += t_; \
+ s0_ += t_; \
}
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
--
2.23.0

View File

@ -0,0 +1,46 @@
From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 19:44:32 +0100
Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible
The probing of the L1 data cache size is sometimes not reliable. This can
cause the tuning to yield varying, sub-obtimal results. But on Linux the
L1 data cache size can usually be retrieved with sysconf instead, which is
faster and more reliable. Do this whenever possible.
---
tune/sysinfo/L1CacheSize.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c
index e62a273..dffa76e 100644
--- a/tune/sysinfo/L1CacheSize.c
+++ b/tune/sysinfo/L1CacheSize.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#define REPS 4096
@@ -276,7 +277,16 @@ int main(int nargs, char *args[])
exit(-1);
}
if (nargs > 1) MaxSize = atoi(args[1]);
- L1Size = GetL1Size(MaxSize, 1.08);
+
+#ifdef _SC_LEVEL1_DCACHE_SIZE
+ {
+ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE);
+ L1Size = res > 0 ? (int) (res / 1024) : 0;
+ }
+#endif
+
+ if (!L1Size)
+ L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
--
2.23.0

View File

@ -0,0 +1,68 @@
From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 20:06:27 +0100
Subject: [PATCH 5/8] Optimizations for IBM z13
Perform some optimizations for IBM z13:
- Compile with -O2 instead of -O.
- Streamline vector loads/stores.
- Define the vvrsum2 macro.
Also, use the compile option -march=z13 instead of -march=native.
---
CONFIG/src/atlcomp.txt | 8 +++-----
include/atlas_simd.h | 11 +++++------
2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index aa31604..2ac71cf 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77
'gfortran' '-O3 -funroll-loops'
MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-O3 -funroll-loops'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc
- 'gcc' '-march=native -O -mvx -mzvector'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc
- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2'
+MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
- 'gfortran' '-march=native -O -mvx -mzvector'
+ 'gfortran' '-march=z13 -mtune=z13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index 68daf79..f171933 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -384,8 +384,8 @@
#endif
#define ATL_VTYPE vector double
#if (defined(DREAL) || defined(DCPLX))
- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; }
- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];}
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
#else
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
@@ -400,10 +400,9 @@
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
#define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
- { ATL_VTYPE t_;\
- t_ = vec_splat(s0_, 1); \
- s0_ += t_; \
- }
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX))
--
2.23.0

View File

@ -0,0 +1,276 @@
From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 25 Mar 2020 20:11:19 +0100
Subject: [PATCH 6/8] Add IBM z14 support
Add general support for IBM z14. Also detect and handle the vector
enhancements facility 1, which specifically adds single-precision FP
arithmetic for vectors.
---
CONFIG/include/atlconf.h | 14 ++++----
CONFIG/src/Makefile | 6 ++++
CONFIG/src/atlcomp.txt | 4 +++
CONFIG/src/backend/Make.ext | 4 ++-
CONFIG/src/backend/archinfo_linux.c | 3 +-
CONFIG/src/backend/probe_vxz2.c | 12 +++++++
CONFIG/src/probe_comp.c | 3 +-
include/atlas_prefetch.h | 3 +-
include/atlas_simd.h | 53 +++++++++++++++++++++++++++++
9 files changed, 91 insertions(+), 11 deletions(-)
create mode 100644 CONFIG/src/backend/probe_vxz2.c
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index e51d56d..3828fdb 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 62
+#define NMACH 63
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase};
enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort};
enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr};
-#define NISA 15
+#define NISA 16
static char *ISAXNAM[NISA] =
- {"", "VSX", "VXZ", "AltiVec",
+ {"", "VSX", "VXZ2", "VXZ", "AltiVec",
"AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow",
"FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"};
enum ISAEXT
- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV,
+ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV,
ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow,
ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC};
diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile
index 212b9d7..782a4cf 100644
--- a/CONFIG/src/Makefile
+++ b/CONFIG/src/Makefile
@@ -158,6 +158,12 @@ IRun_NEON :
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \
redir=config0.out
- cat config0.out
+IRun_VXZ2 :
+ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \
+ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \
+ redir=config0.out
+ - cat config0.out
IRun_VXZ :
$(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \
$(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2ac71cf..2cfacc2 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z13 -mtune=z13 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=z14 -mtune=z14 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext
index 4743353..794babf 100644
--- a/CONFIG/src/backend/Make.ext
+++ b/CONFIG/src/backend/Make.ext
@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \
probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \
probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \
probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \
- probe_vxz.c
+ probe_vxz2.c probe_vxz.c
all : $(files)
@@ -107,6 +107,8 @@ flibchkF.f : $(basf)
$(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f
probe_arm32_FPABI.c : $(basf)
$(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI
+probe_vxz2.c : $(basf)
+ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2
probe_vxz.c : $(basf)
$(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz
probe_aff_SETAFFNP.c : $(basf)
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index cdcee92..ed6f476 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196;
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */
+ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
break;
diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c
new file mode 100644
index 0000000..a69d92d
--- /dev/null
+++ b/CONFIG/src/backend/probe_vxz2.c
@@ -0,0 +1,12 @@
+#include <vecintrin.h>
+void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y
+{
+ vector float vx, vy;
+ vx = (vector float) {x[0], x[1], x[2], x[3]};
+ vy = (vector float) {y[0], y[1], y[2], y[3]};
+ vy += vx;
+ z[0] = vy[0];
+ z[1] = vy[1];
+ z[2] = vy[2];
+ z[3] = vy[3];
+}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 1652e24..857ea82 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb,
vp = "-mavx2 -mfma";
else if (vecexts & (1<<ISA_VSX))
vp = "-mvsx";
- else if (vecexts & (1<<ISA_VXZ))
+ else if ((vecexts & (1<<ISA_VXZ)) || (vecexts & (1<<ISA_VXZ2)))
vp = "-mvx -mzvector";
else if (vecexts & (1<<ISA_AV))
vp = "-maltivec";
@@ -1207,6 +1207,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
{
case IbmZ12:
case IbmZ13:
+ case IbmZ14:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index e7988a7..fa426ac 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -155,7 +155,8 @@
#define ATL_L1LS 32
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
- defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13)
+ defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
+ defined(ATL_ARCH_IbmZ14)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index f171933..eb75577 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -68,6 +68,11 @@
((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
#define ATL_FRCGNUVEC
#endif
+ #elif defined(ATL_VXZ2)
+ #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \
+ ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
+ #define ATL_FRCGNUVEC
+ #endif
#elif defined(ATL_VXZ)
#if ATL_VLEN != 2
#define ATL_FRCGNUVEC
@@ -113,6 +118,12 @@
#else
#define ATL_VLEN 2
#endif
+ #elif defined(ATL_VXZ2)
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VLEN 4
+ #else
+ #define ATL_VLEN 2
+ #endif
#elif defined(ATL_VXZ)
#define ATL_VLEN 2
#elif defined(ATL_NEON)
@@ -376,6 +387,48 @@
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#endif
+#elif defined(ATL_VXZ2)
+ #include <vecintrin.h>
+
+ #define ATL_VPERMI(s_, t_, i_) \
+ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_))
+
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VTYPE vector float
+ #if ATL_VLEN != 4
+ #error "VSXZ2 supports only VLEN = 4 for floats!"
+ #endif
+ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \
+ { ATL_VTYPE t0_, t1_; \
+ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \
+ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \
+ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \
+ }
+ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2)
+ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3)
+ #else /* double precision */
+ #define ATL_VTYPE vector double
+ #if ATL_VLEN != 2
+ #error "VSXZ2 supports only VLEN = 2 for doubles!"
+ #endif
+ #define ATL_vvrsum1(s0_) \
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
+ #endif
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
+ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vcopy(d_, s_) d_ = s_
+ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_))
+ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_))
+ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
+ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
+ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
+ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
+ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_VXZ)
#include <vecintrin.h>
--
2.23.0

View File

@ -0,0 +1,265 @@
From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 29 May 2019 17:51:34 +0200
Subject: [PATCH 7/8] Enable "cross-compile"
This adds support for building ATLAS without running any target code. In
order for this to work, the archdefs must contain some additional files
that would otherwise be built during various tuning steps; see the new
targets extra_get and extra_put in "CONFIG/ARCHS/Makefile".
Even if the archdefs contain these additional files, cross compilation
is *not* automatically enabled. To activate it and disable tuning at
build time, add the option "-Si archdef 2" when running "configure".
---
CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++
bin/atlas_install.c | 2 ++
makes/Make.aux | 10 +++++-----
makes/Make.bin | 22 ++++++++++++++++++++++
makes/Make.l3tune | 6 ++++++
makes/Make.sysinfo | 8 +++++++-
6 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile
index 321e05c..e61b5a0 100644
--- a/CONFIG/ARCHS/Makefile
+++ b/CONFIG/ARCHS/Makefile
@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt
- cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/.
rm -f xnegflt
archput : sys_put kern_put gemm_put la_put
+
+ifdef ATL_NOTUNE
+
+# To avoid tuning, some extra files are needed.
+
+extra_get :
+ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/
+
+extra_put :
+ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/.
+
+ArchNew : extra_get
+
+archput : extra_put
+
+endif
diff --git a/bin/atlas_install.c b/bin/atlas_install.c
index de3eb3a..3c811e6 100644
--- a/bin/atlas_install.c
+++ b/bin/atlas_install.c
@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA)
ATL_Cassert(system("make IBozoL1.grd\n")==0,
"USING BOZO L1 DEFAULTS", NULL);
}
+ if (ARCHDEF >= 2)
+ setenv("ATL_NOTUNE", "1", 1);
if (ARCHDEF)
DefInstall = !system("make IArchDef.grd\n");
diff --git a/makes/Make.aux b/makes/Make.aux
index 1f769c8..c793028 100644
--- a/makes/Make.aux
+++ b/makes/Make.aux
@@ -113,23 +113,23 @@ clean :
$(ATLFWAIT) :
cd $(BINdir) ; $(MAKE) xatlas_waitfile
-$(INCAdir)/atlas_type.h : $(ATLFWAIT)
+$(INCAdir)/atlas_type.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h
$(ATLFWAIT) -f $(INCAdir)/atlas_type.h
sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h
dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h
cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h
zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h
diff --git a/makes/Make.bin b/makes/Make.bin
index 1035cb9..acad578 100644
--- a/makes/Make.bin
+++ b/makes/Make.bin
@@ -163,7 +163,9 @@ IRunMADef :
cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre)
IRunMMDef :
+ifndef ATL_NOTUNE
cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre)
+endif
cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2
cd $(MMTdir) ; $(MAKE) install pre=$(pre)
IKillL1 : force_build
@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \
cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/.
$(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r2install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre)
+endif
$(R1Tdir)/res/$(pre)R1K.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r1install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum
cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/.
INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \
$(R1Tdir)/res/$(pre)R2K.sum
cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/.
+ifndef ATL_NOTUNE
cd $(R1Tdir) ; $(MAKE) $(pre)nxtune
+else
+ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib
+endif
$(MVTdir)/res/$(pre)MVNK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum
cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/.
$(MVTdir)/res/$(pre)MVTK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum
cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/.
diff --git a/makes/Make.l3tune b/makes/Make.l3tune
index eaf7d7d..cd7f5f1 100644
--- a/makes/Make.l3tune
+++ b/makes/Make.l3tune
@@ -118,6 +118,7 @@ res/atlas_strsmXover.h :
cp $(strsmXover) res/.
stsmfc :
+ifndef ATL_NOTUNE
rm -f $(strsmXover)
cd $(L3Bdir) ; $(MAKE) slib
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \
@@ -128,6 +129,7 @@ stsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) slib
dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h
@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h :
cp $(dtrsmXover) res/.
dtsmfc :
+ifndef ATL_NOTUNE
rm -f $(dtrsmXover)
cd $(L3Bdir) ; $(MAKE) dlib
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \
@@ -148,6 +151,7 @@ dtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) dlib
qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h
@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h :
cp $(qtrsmXover) res/.
qtsmfc :
+ifndef ATL_NOTUNE
rm -f $(qtrsmXover)
cd $(L3Bdir) ; $(MAKE) qlib
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \
@@ -168,6 +173,7 @@ qtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) qlib
$(pre)tsmfc.o : force_build
diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo
index 2b7dfdc..8e5dab2 100644
--- a/makes/Make.sysinfo
+++ b/makes/Make.sysinfo
@@ -5,6 +5,7 @@ maxlat=6
mflop=200
flags=
+ifndef ATL_NOTUNE
sTestFlags : force_build
$(MAKE) srbob `cat res/sBEST` pre='s' type=float
@@ -85,12 +86,14 @@ RunLamch : xemit_lamch
cp res/atlas_?lamch.h $(INCAdir)/.
RunTyp: xemit_typ
$(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h
+endif
xemit_buildinfo : emit_buildinfo.o
$(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o
xsyssum : GetSysSum.o
$(XCC) $(XCCFLAGS) -o $@ GetSysSum.o
+ifndef ATL_NOTUNE
xL1 : time.o L1CacheSize.o
$(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o
@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c
$(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c
xmasrch : $(mySRCdir)/masrch.c
$(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c
+endif
ATL_cputime.c :
cp $(mySRCdir)/ATL_cputime.c .
@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c
GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c
+
+ifndef ATL_NOTUNE
time.o : $(mySRCdir)/time.c
$(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c
emit_lamch.o : $(mySRCdir)/emit_lamch.c
@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c
tlb.o : $(mySRCdir)/tlb.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c
-
+endif
force_build :
--
2.23.0

View File

@ -0,0 +1,105 @@
From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Thu, 26 Mar 2020 17:14:49 +0100
Subject: [PATCH 8/8] Add IBM z15 support
Add support for specifying "IBMz15" as target architecture.
---
CONFIG/include/atlconf.h | 8 ++++----
CONFIG/src/atlcomp.txt | 4 ++++
CONFIG/src/backend/archinfo_linux.c | 1 +
CONFIG/src/probe_comp.c | 1 +
include/atlas_prefetch.h | 2 +-
5 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index 3828fdb..382601f 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 63
+#define NMACH 64
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15,
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2cfacc2..acb2c83 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z14 -mtune=z14 -O2'
MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=arch13 -mtune=arch13 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=arch13 -mtune=arch13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index ed6f476..934a005 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15;
else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 857ea82..88bb25e 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
case IbmZ12:
case IbmZ13:
case IbmZ14:
+ case IbmZ15:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index fa426ac..583f19d 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -156,7 +156,7 @@
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
- defined(ATL_ARCH_IbmZ14)
+ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
--
2.23.0

12
atlas-getri.patch Normal file
View File

@ -0,0 +1,12 @@
diff --git a/src/testing/ATL_f77getri.c b/src/testing/ATL_f77getri.c
index 2cc576c..7ff8eba 100644
--- a/src/testing/ATL_f77getri.c
+++ b/src/testing/ATL_f77getri.c
@@ -97,7 +97,6 @@ int f77getri(const enum ATLAS_ORDER Order, const int N, TYPE *A, const int lda,
#ifdef ATL_FunkyInts
*lwork = F77lwork;
for (i=0; i < MN; i++) ipiv[i] = F77ipiv[i] + 1;
- free(F77ipiv);
#else
for (i=0; i < MN; i++) ipiv[i]++;
#endif