221 lines
7.7 KiB
Diff
221 lines
7.7 KiB
Diff
From: Michel Normand <normand@linux.vnet.ibm.com>
|
|
Subject: atlas.3.10.2 ppc64le abiv2 patch
|
|
Date: Mon, 28 Jul 2014 04:29:05 -0400
|
|
|
|
atlas.3.10.2 abiv2 step2 complete the changes already present in atlas 3.10.2
|
|
* still some files with opd ABI V1 to be disabled for ABI V2
|
|
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
|
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
|
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
|
|
|
atlas.3.10.2 ppc64le abiv2 step3
|
|
* change offsets of parameters read from stack to avoid some segfaults.
|
|
(values changes 120 => 104 and 128 => 112 identified by gdb investigation)
|
|
|
|
Despite this step3 patch there are two Remaining problems for ppc64le archi:
|
|
* TODO: still have seg-faults in console during build/check
|
|
but is not critical (without make check) and rpm are generated on fedora.
|
|
unable to investigate because of problem tracked by issue 950
|
|
https://sourceforge.net/p/math-atlas/support-requests/950/
|
|
|
|
* TODO: make check failure because xsslvtst execution failure
|
|
related to vector assembly code that assumes big-endian env
|
|
as written in ATL_cmm4x4x128_av.c and ATL_smm4x4x128_av.c.
|
|
Would need significant work to support little-endian as per
|
|
endianess comments of all PowerPC vector instructions in:
|
|
https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D/$file/vector_simd_pem.ppc.2005AUG23.pdf
|
|
|
|
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
|
|
---
|
|
tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 7 +++++++
|
|
tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 7 +++++++
|
|
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 9 ++++++++-
|
|
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 20 ++++++++++++++++++--
|
|
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 23 ++++++++++++++++++++++-
|
|
5 files changed, 62 insertions(+), 4 deletions(-)
|
|
|
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
|
===================================================================
|
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
|
|
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
|
|
.globl Mjoin(_,ATL_USERMM)
|
|
Mjoin(_,ATL_USERMM):
|
|
#else
|
|
- #if defined(ATL_USE64BITS)
|
|
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
|
/*
|
|
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
|
*/
|
|
@@ -324,8 +324,15 @@ ATL_USERMM:
|
|
#endif
|
|
|
|
#ifdef ATL_USE64BITS
|
|
+#if _CALL_ELF == 2
|
|
+/* ABIv2 */
|
|
+ ld pC0, 104(r1)
|
|
+ ld incCn, 112(r1)
|
|
+#else
|
|
+/* ABIv1 */
|
|
ld pC0, 120(r1)
|
|
ld incCn, 128(r1)
|
|
+#endif
|
|
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
|
lwz pC0, 68(r1)
|
|
lwz incCn, 72(r1)
|
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
|
===================================================================
|
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
|
|
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
|
|
const TYPE beta, TYPE *C, const int ldc)
|
|
(r10) 8(r1)
|
|
*******************************************************************************
|
|
-64 bit ABIs:
|
|
+64 bit ABIv1s:
|
|
r3 r4 r5 r6/f1
|
|
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
|
r7 r8 r9 r10
|
|
const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
|
f2 120(r1) 128(r1)
|
|
const TYPE beta, TYPE *C, const int ldc)
|
|
+
|
|
+64 bit ABIv2s:
|
|
+ r3 r4 r5 r6/f1
|
|
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
|
|
+ r7 r8 r9 r10
|
|
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
|
|
+ f2 104(r1) 112(r1)
|
|
+ const TYPE beta, TYPE *C, const int ldc)
|
|
#endif
|
|
#ifdef ATL_AS_AIX_PPC
|
|
.csect .text[PR]
|
|
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
|
|
.globl Mjoin(_,ATL_USERMM)
|
|
Mjoin(_,ATL_USERMM):
|
|
#else
|
|
- #if defined(ATL_USE64BITS)
|
|
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
|
/*
|
|
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
|
*/
|
|
@@ -257,9 +265,17 @@ ATL_USERMM:
|
|
#endif
|
|
#endif
|
|
|
|
+
|
|
#if defined (ATL_USE64BITS)
|
|
+#if _CALL_ELF == 2
|
|
+/* ABIv2 */
|
|
+ ld pC0, 104(r1)
|
|
+ ld incCn, 112(r1)
|
|
+#else
|
|
+/* ABIv1 */
|
|
ld pC0, 120(r1)
|
|
ld incCn, 128(r1)
|
|
+#endif
|
|
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
|
|
lwz pC0, 68(r1)
|
|
lwz incCn, 72(r1)
|
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
|
===================================================================
|
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
|
|
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
|
|
.globl Mjoin(_,ATL_USERMM)
|
|
Mjoin(_,ATL_USERMM):
|
|
#else
|
|
- #if defined(ATL_USE64BITS)
|
|
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
|
|
/*
|
|
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
|
|
*/
|
|
@@ -221,8 +221,15 @@ ATL_USERMM:
|
|
* kernel instead
|
|
*/
|
|
#if defined (ATL_USE64BITS)
|
|
+#if _CALL_ELF == 2
|
|
+/* ABIv2 */
|
|
+ ld r10, 104(r1)
|
|
+ ld r5, 112(r1)
|
|
+#else
|
|
+/* ABIv1 */
|
|
ld r10, 120(r1)
|
|
ld r5, 128(r1)
|
|
+#endif
|
|
#elif defined(ATL_AS_OSX_PPC)
|
|
lwz r10, 60(r1)
|
|
lwz r5, 64(r1)
|
|
@@ -285,8 +292,15 @@ ATL_USERMM:
|
|
eqv r0, r0, r0 /* all 1s */
|
|
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
|
#if defined (ATL_USE64BITS)
|
|
+#if _CALL_ELF == 2
|
|
+ /* ABIv2 */
|
|
+ ld pC0, FSIZE+104(r1)
|
|
+ ld ldc, FSIZE+112(r1)
|
|
+#else
|
|
+ /* ABIv1 */
|
|
ld pC0, FSIZE+120(r1)
|
|
ld ldc, FSIZE+128(r1)
|
|
+#endif
|
|
#elif defined(ATL_AS_OSX_PPC)
|
|
lwz pC0, FSIZE+60(r1)
|
|
lwz ldc, FSIZE+64(r1)
|
|
@@ -4258,8 +4272,15 @@ UNALIGNED_C:
|
|
eqv r0, r0, r0 /* all 1s */
|
|
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
|
#if defined (ATL_USE64BITS)
|
|
+#if _CALL_ELF == 2
|
|
+ /* ABIv2 */
|
|
+ ld pC0, FSIZE+104(r1)
|
|
+ ld ldc, FSIZE+112(r1)
|
|
+#else
|
|
+ /* ABIv1 */
|
|
ld pC0, FSIZE+120(r1)
|
|
ld ldc, FSIZE+128(r1)
|
|
+#endif
|
|
#elif defined(ATL_AS_OSX_PPC)
|
|
lwz pC0, FSIZE+60(r1)
|
|
lwz ldc, FSIZE+64(r1)
|
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
|
===================================================================
|
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
|
|
@@ -258,8 +258,15 @@ ATL_USERMM:
|
|
eqv r0, r0, r0 /* all 1s */
|
|
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
|
|
#if defined (ATL_USE64BITS)
|
|
+#if _CALL_ELF == 2
|
|
+/* ABIv2 */
|
|
+ ld pC0, FSIZE+104(r1)
|
|
+ ld ldc, FSIZE+112(r1)
|
|
+#else
|
|
+/* ABIv1 */
|
|
ld pC0, FSIZE+120(r1)
|
|
ld ldc, FSIZE+128(r1)
|
|
+#endif
|
|
#elif defined(ATL_AS_OSX_PPC)
|
|
lwz pC0, FSIZE+60(r1)
|
|
lwz ldc, FSIZE+64(r1)
|
|
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
|
===================================================================
|
|
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
|
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
|
|
@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM):
|
|
*/
|
|
#ifdef ATL_GAS_LINUX_PPC
|
|
#ifdef ATL_USE64BITS
|
|
+ #if _CALL_ELF == 2
|
|
+ /* ABIv2 */
|
|
+ ld pC0, 104(r1)
|
|
+ ld incCn, 112(r1)
|
|
+ #else
|
|
+ /* ABIv1 */
|
|
ld pC0, 120(r1)
|
|
ld incCn, 128(r1)
|
|
+ #endif
|
|
#else
|
|
lwz incCn, FSIZE+8(r1)
|
|
#endif
|