openblas/2010.patch

500 lines
16 KiB
Diff

From dc6ac9eab0c59bcf56c1c512c099723215609fb2 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 12 Feb 2019 15:33:48 +0100
Subject: [PATCH 1/4] Fix declaration of input arguments in the x86_64
s/dGEMV_T and s/dGEMV_N kernels
Arguments 0 and 1 need to be tagged as both input and output
---
kernel/x86_64/dgemv_n_4.c | 10 +++++-----
kernel/x86_64/dgemv_t_4.c | 18 +++++++++---------
kernel/x86_64/sgemv_n_4.c | 14 +++++++-------
kernel/x86_64/sgemv_t_4.c | 18 +++++++++---------
4 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/kernel/x86_64/dgemv_n_4.c b/kernel/x86_64/dgemv_n_4.c
index 6d2530e81..6d33641e9 100644
--- a/kernel/x86_64/dgemv_n_4.c
+++ b/kernel/x86_64/dgemv_n_4.c
@@ -111,9 +111,9 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"jnz 1b \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -166,9 +166,9 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *a
"jnz 1b \n\t"
:
+ "+r" (i), // 0
+ "+r" (n) // 1
:
- "r" (i), // 0
- "r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4
diff --git a/kernel/x86_64/dgemv_t_4.c b/kernel/x86_64/dgemv_t_4.c
index a7478e3a8..ed672a757 100644
--- a/kernel/x86_64/dgemv_t_4.c
+++ b/kernel/x86_64/dgemv_t_4.c
@@ -127,9 +127,9 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT *ap0, FLOAT *ap1, FLOAT *x, FLOAT
"movsd %%xmm11,8(%2) \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (y), // 2
"r" (ap0), // 3
"r" (ap1), // 4
@@ -195,9 +195,9 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"movsd %%xmm10, (%2) \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (y), // 2
"r" (ap), // 3
"r" (x) // 4
@@ -259,9 +259,9 @@ static void add_y(BLASLONG n, FLOAT da , FLOAT *src, FLOAT *dest, BLASLONG inc_d
"jnz 1b \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (&da), // 2
"r" (src), // 3
"r" (dest) // 4
diff --git a/kernel/x86_64/sgemv_n_4.c b/kernel/x86_64/sgemv_n_4.c
index 65305ac59..63697970f 100644
--- a/kernel/x86_64/sgemv_n_4.c
+++ b/kernel/x86_64/sgemv_n_4.c
@@ -149,9 +149,9 @@ static void sgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"jnz 1b \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -223,9 +223,9 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *a
"3: \n\t"
:
+ "+r" (i), // 0
+ "+r" (n1) // 1
:
- "r" (i), // 0
- "r" (n1), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4
@@ -277,9 +277,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest)
"jnz 1b \n\t"
:
+ "+r" (i), // 0
+ "+r" (n) // 1
:
- "r" (i), // 0
- "r" (n), // 1
"r" (src), // 2
"r" (dest) // 3
: "cc",
diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c
index 065e5b385..86ecaf516 100644
--- a/kernel/x86_64/sgemv_t_4.c
+++ b/kernel/x86_64/sgemv_t_4.c
@@ -139,9 +139,9 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT *ap0, FLOAT *ap1, FLOAT *x, FLOAT
"movss %%xmm11,4(%2) \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (y), // 2
"r" (ap0), // 3
"r" (ap1), // 4
@@ -208,9 +208,9 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"movss %%xmm10, (%2) \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (y), // 2
"r" (ap), // 3
"r" (x) // 4
@@ -272,9 +272,9 @@ static void add_y(BLASLONG n, FLOAT da , FLOAT *src, FLOAT *dest, BLASLONG inc_d
"jnz 1b \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (&da), // 2
"r" (src), // 3
"r" (dest) // 4
From 91481a3e4e88b26be920aff7d5c9e72ee82d6abc Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 12 Feb 2019 15:51:43 +0100
Subject: [PATCH 2/4] Fix declaration of input arguments in inline assembly
Argument 0 is modified as it doubles as a counter
---
kernel/x86_64/dscal.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c
index ef9a0a6ba..d0d7801fd 100644
--- a/kernel/x86_64/dscal.c
+++ b/kernel/x86_64/dscal.c
@@ -136,8 +136,8 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
"jnz 1b \n\t"
:
+ "+r" (n) // 0
:
- "r" (n), // 0
"r" (x), // 1
"r" (x1), // 2
"r" (alpha), // 3
From b824fa70ebdd0b66ed045dbb17c08519525af782 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 12 Feb 2019 16:00:18 +0100
Subject: [PATCH 3/4] Fix declaration of assembly arguments in SSYMV and DSYMV
microkernels
Arguments 0 and 1 are both input and output
---
kernel/x86_64/dsymv_U_microk_bulldozer-2.c | 6 +++---
kernel/x86_64/dsymv_U_microk_haswell-2.c | 6 +++---
kernel/x86_64/dsymv_U_microk_nehalem-2.c | 6 +++---
kernel/x86_64/dsymv_U_microk_sandy-2.c | 6 +++---
kernel/x86_64/ssymv_U_microk_bulldozer-2.c | 6 +++---
kernel/x86_64/ssymv_U_microk_haswell-2.c | 6 +++---
kernel/x86_64/ssymv_U_microk_nehalem-2.c | 6 +++---
kernel/x86_64/ssymv_U_microk_sandy-2.c | 6 +++---
8 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/kernel/x86_64/dsymv_U_microk_bulldozer-2.c b/kernel/x86_64/dsymv_U_microk_bulldozer-2.c
index d7166fe4b..ae287b6d8 100644
--- a/kernel/x86_64/dsymv_U_microk_bulldozer-2.c
+++ b/kernel/x86_64/dsymv_U_microk_bulldozer-2.c
@@ -106,9 +106,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"vmovsd %%xmm3 ,24(%9) \n\t" // save temp2
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/dsymv_U_microk_haswell-2.c b/kernel/x86_64/dsymv_U_microk_haswell-2.c
index d83d20f8e..4778f644a 100644
--- a/kernel/x86_64/dsymv_U_microk_haswell-2.c
+++ b/kernel/x86_64/dsymv_U_microk_haswell-2.c
@@ -107,9 +107,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"vzeroupper \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/dsymv_U_microk_nehalem-2.c b/kernel/x86_64/dsymv_U_microk_nehalem-2.c
index 1344c75f7..065182286 100644
--- a/kernel/x86_64/dsymv_U_microk_nehalem-2.c
+++ b/kernel/x86_64/dsymv_U_microk_nehalem-2.c
@@ -101,9 +101,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"movsd %%xmm3 , 24(%9) \n\t" // save temp2
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/dsymv_U_microk_sandy-2.c b/kernel/x86_64/dsymv_U_microk_sandy-2.c
index 1ef6fbafd..d84e703bd 100644
--- a/kernel/x86_64/dsymv_U_microk_sandy-2.c
+++ b/kernel/x86_64/dsymv_U_microk_sandy-2.c
@@ -116,9 +116,9 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"vzeroupper \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/ssymv_U_microk_bulldozer-2.c b/kernel/x86_64/ssymv_U_microk_bulldozer-2.c
index 8c01ab806..4a4f4d68d 100644
--- a/kernel/x86_64/ssymv_U_microk_bulldozer-2.c
+++ b/kernel/x86_64/ssymv_U_microk_bulldozer-2.c
@@ -90,9 +90,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"vmovss %%xmm3 ,12(%9) \n\t" // save temp2
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/ssymv_U_microk_haswell-2.c b/kernel/x86_64/ssymv_U_microk_haswell-2.c
index a32e59b44..e6a09ccf8 100644
--- a/kernel/x86_64/ssymv_U_microk_haswell-2.c
+++ b/kernel/x86_64/ssymv_U_microk_haswell-2.c
@@ -112,9 +112,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"vzeroupper \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/ssymv_U_microk_nehalem-2.c b/kernel/x86_64/ssymv_U_microk_nehalem-2.c
index b8e6ee732..c56ff3b15 100644
--- a/kernel/x86_64/ssymv_U_microk_nehalem-2.c
+++ b/kernel/x86_64/ssymv_U_microk_nehalem-2.c
@@ -106,9 +106,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"movss %%xmm3 , 12(%9) \n\t" // save temp2
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
diff --git a/kernel/x86_64/ssymv_U_microk_sandy-2.c b/kernel/x86_64/ssymv_U_microk_sandy-2.c
index e8650650c..c4919a39a 100644
--- a/kernel/x86_64/ssymv_U_microk_sandy-2.c
+++ b/kernel/x86_64/ssymv_U_microk_sandy-2.c
@@ -120,9 +120,9 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"vzeroupper \n\t"
:
- :
- "r" (i), // 0
- "r" (n), // 1
+ "+r" (i), // 0
+ "+r" (n) // 1
+ :
"r" (x), // 2
"r" (y), // 3
"r" (a0), // 4
From ab1630f9fac57245fbbfc20af91a060354e41c71 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 12 Feb 2019 16:14:02 +0100
Subject: [PATCH 4/4] Fix declaration of arguments in inline assembly
Argument 0 is modified so should be input and output
---
kernel/x86_64/dsymv_L_microk_bulldozer-2.c | 4 ++--
kernel/x86_64/dsymv_L_microk_haswell-2.c | 4 ++--
kernel/x86_64/dsymv_L_microk_nehalem-2.c | 4 ++--
kernel/x86_64/dsymv_L_microk_sandy-2.c | 4 ++--
kernel/x86_64/ssymv_L_microk_bulldozer-2.c | 4 ++--
kernel/x86_64/ssymv_L_microk_haswell-2.c | 4 ++--
kernel/x86_64/ssymv_L_microk_nehalem-2.c | 4 ++--
kernel/x86_64/ssymv_L_microk_sandy-2.c | 8 ++++----
8 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/kernel/x86_64/dsymv_L_microk_bulldozer-2.c b/kernel/x86_64/dsymv_L_microk_bulldozer-2.c
index d84470cc4..bfa07b6d0 100644
--- a/kernel/x86_64/dsymv_L_microk_bulldozer-2.c
+++ b/kernel/x86_64/dsymv_L_microk_bulldozer-2.c
@@ -113,8 +113,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vmovsd %%xmm3 ,24(%9) \n\t" // save temp2
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/dsymv_L_microk_haswell-2.c b/kernel/x86_64/dsymv_L_microk_haswell-2.c
index 866782ee6..6241879d5 100644
--- a/kernel/x86_64/dsymv_L_microk_haswell-2.c
+++ b/kernel/x86_64/dsymv_L_microk_haswell-2.c
@@ -105,8 +105,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vzeroupper \n\t"
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/dsymv_L_microk_nehalem-2.c b/kernel/x86_64/dsymv_L_microk_nehalem-2.c
index 38479f77a..a161dcd8b 100644
--- a/kernel/x86_64/dsymv_L_microk_nehalem-2.c
+++ b/kernel/x86_64/dsymv_L_microk_nehalem-2.c
@@ -108,8 +108,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"movsd %%xmm3 , 24(%9) \n\t" // save temp2
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/dsymv_L_microk_sandy-2.c b/kernel/x86_64/dsymv_L_microk_sandy-2.c
index b4e6ab369..b205b1019 100644
--- a/kernel/x86_64/dsymv_L_microk_sandy-2.c
+++ b/kernel/x86_64/dsymv_L_microk_sandy-2.c
@@ -114,8 +114,8 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vzeroupper \n\t"
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/ssymv_L_microk_bulldozer-2.c b/kernel/x86_64/ssymv_L_microk_bulldozer-2.c
index 9002228f3..602c3edf2 100644
--- a/kernel/x86_64/ssymv_L_microk_bulldozer-2.c
+++ b/kernel/x86_64/ssymv_L_microk_bulldozer-2.c
@@ -98,8 +98,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vmovss %%xmm3 ,12(%9) \n\t" // save temp2
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/ssymv_L_microk_haswell-2.c b/kernel/x86_64/ssymv_L_microk_haswell-2.c
index 69db008b6..fdfe4349a 100644
--- a/kernel/x86_64/ssymv_L_microk_haswell-2.c
+++ b/kernel/x86_64/ssymv_L_microk_haswell-2.c
@@ -99,8 +99,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vzeroupper \n\t"
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/ssymv_L_microk_nehalem-2.c b/kernel/x86_64/ssymv_L_microk_nehalem-2.c
index c0fe5d640..6bb9c02f6 100644
--- a/kernel/x86_64/ssymv_L_microk_nehalem-2.c
+++ b/kernel/x86_64/ssymv_L_microk_nehalem-2.c
@@ -113,8 +113,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, F
"movss %%xmm3 , 12(%9) \n\t" // save temp2
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
diff --git a/kernel/x86_64/ssymv_L_microk_sandy-2.c b/kernel/x86_64/ssymv_L_microk_sandy-2.c
index 093ca8073..0c78212e7 100644
--- a/kernel/x86_64/ssymv_L_microk_sandy-2.c
+++ b/kernel/x86_64/ssymv_L_microk_sandy-2.c
@@ -109,8 +109,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vzeroupper \n\t"
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3
@@ -217,8 +217,8 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vzeroupper \n\t"
:
- :
- "r" (from), // 0
+ "+r" (from) // 0
+ :
"r" (to), // 1
"r" (x), // 2
"r" (y), // 3