diff --git a/1965.patch b/1965.patch new file mode 100644 index 0000000..5d8b935 --- /dev/null +++ b/1965.patch @@ -0,0 +1,3283 @@ +From f0dd0584306b42289cac77fdafe6997e449d4f38 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 19:56:10 +0100 +Subject: [PATCH 001/111] Tag operands 0 and 1 as both input and output + +For #1964 (basically a continuation of coding problems first seen in #1292) +--- + kernel/x86_64/caxpy_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_bulldozer-2.c b/kernel/x86_64/caxpy_microk_bulldozer-2.c +index 33bda0943..cb98f208a 100644 +--- a/kernel/x86_64/caxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/caxpy_microk_bulldozer-2.c +@@ -115,8 +115,8 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -182,8 +182,8 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 8a6bbf5a5bf4623795b2ff9aaa8d35467288d6c7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 19:57:27 +0100 +Subject: [PATCH 002/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/caxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_haswell-2.c b/kernel/x86_64/caxpy_microk_haswell-2.c +index 00e2e6a42..f31cf9710 100644 +--- a/kernel/x86_64/caxpy_microk_haswell-2.c ++++ b/kernel/x86_64/caxpy_microk_haswell-2.c +@@ -113,8 +113,8 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 4e6f8fec31e83648c77c47398829b5191e671966 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 19:58:19 +0100 +Subject: [PATCH 003/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/caxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_sandy-2.c b/kernel/x86_64/caxpy_microk_sandy-2.c +index a798fd977..931d1ad47 100644 +--- a/kernel/x86_64/caxpy_microk_sandy-2.c ++++ b/kernel/x86_64/caxpy_microk_sandy-2.c +@@ -97,8 +97,8 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 663eef3b666e79c0e93f35cf79eada50040d9dd3 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 19:59:59 +0100 +Subject: [PATCH 004/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/caxpy_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_steamroller-2.c b/kernel/x86_64/caxpy_microk_steamroller-2.c +index 87370b032..9aeb47968 100644 +--- a/kernel/x86_64/caxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/caxpy_microk_steamroller-2.c +@@ -115,8 +115,8 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -182,8 +182,8 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From a671e19dd2cad6dc1e2e639f45a4faebf53b6f7f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:02:01 +0100 +Subject: [PATCH 005/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/cdot_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_bulldozer-2.c b/kernel/x86_64/cdot_microk_bulldozer-2.c +index f587aa036..e6d11f1af 100644 +--- a/kernel/x86_64/cdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/cdot_microk_bulldozer-2.c +@@ -98,8 +98,8 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -177,8 +177,8 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 47e2b4592eb31860a58222bedc8a3208c153aa00 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:03:03 +0100 +Subject: [PATCH 006/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/cdot_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_haswell-2.c b/kernel/x86_64/cdot_microk_haswell-2.c +index fe195a63b..9fee7615d 100644 +--- a/kernel/x86_64/cdot_microk_haswell-2.c ++++ b/kernel/x86_64/cdot_microk_haswell-2.c +@@ -99,8 +99,8 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 30a7bd8e15fb68d3fa651bbf48e1e65fc6078090 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:03:50 +0100 +Subject: [PATCH 007/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/cdot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_sandy-2.c b/kernel/x86_64/cdot_microk_sandy-2.c +index 01816917d..705c80c5c 100644 +--- a/kernel/x86_64/cdot_microk_sandy-2.c ++++ b/kernel/x86_64/cdot_microk_sandy-2.c +@@ -107,8 +107,8 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 2f5a7c1656b7975f71db2b8da90080938ccd3757 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:05:03 +0100 +Subject: [PATCH 008/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/cdot_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_steamroller-2.c b/kernel/x86_64/cdot_microk_steamroller-2.c +index 76a3aa0eb..5a46aed8c 100644 +--- a/kernel/x86_64/cdot_microk_steamroller-2.c ++++ b/kernel/x86_64/cdot_microk_steamroller-2.c +@@ -98,8 +98,8 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -177,8 +177,8 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From bb16456fe1ff372b61a7ab042418248f68ddddc6 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:23:58 +0100 +Subject: [PATCH 009/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/daxpy_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_bulldozer-2.c b/kernel/x86_64/daxpy_microk_bulldozer-2.c +index 8c520dcf1..c9a01580e 100644 +--- a/kernel/x86_64/daxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/daxpy_microk_bulldozer-2.c +@@ -65,8 +65,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 7af8f34df4efcc0ecaaa34c380119edcd5d206de Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:24:55 +0100 +Subject: [PATCH 010/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/daxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_haswell-2.c b/kernel/x86_64/daxpy_microk_haswell-2.c +index bbe8b9550..67431659d 100644 +--- a/kernel/x86_64/daxpy_microk_haswell-2.c ++++ b/kernel/x86_64/daxpy_microk_haswell-2.c +@@ -61,8 +61,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From d94e7da701dae1106854753b2d5b676255c1c0f4 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:25:56 +0100 +Subject: [PATCH 011/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/daxpy_microk_nehalem-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_nehalem-2.c b/kernel/x86_64/daxpy_microk_nehalem-2.c +index 943d893af..61c99904a 100644 +--- a/kernel/x86_64/daxpy_microk_nehalem-2.c ++++ b/kernel/x86_64/daxpy_microk_nehalem-2.c +@@ -74,8 +74,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 6008f6531855d615ad98febe65364074b99fa5bf Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:26:55 +0100 +Subject: [PATCH 012/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/daxpy_microk_piledriver-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_piledriver-2.c b/kernel/x86_64/daxpy_microk_piledriver-2.c +index 95eb953b4..e3d605b75 100644 +--- a/kernel/x86_64/daxpy_microk_piledriver-2.c ++++ b/kernel/x86_64/daxpy_microk_piledriver-2.c +@@ -80,8 +80,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 +@@ -142,8 +142,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 9d46f84f24dc7284fc398574b811621e5c61e2dc Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:27:48 +0100 +Subject: [PATCH 013/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/daxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_sandy-2.c b/kernel/x86_64/daxpy_microk_sandy-2.c +index 85e038cef..1b827e7e2 100644 +--- a/kernel/x86_64/daxpy_microk_sandy-2.c ++++ b/kernel/x86_64/daxpy_microk_sandy-2.c +@@ -101,8 +101,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From ca02ac724f5b06e16a8941ef3b2582c251234679 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:28:56 +0100 +Subject: [PATCH 014/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/daxpy_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_steamroller-2.c b/kernel/x86_64/daxpy_microk_steamroller-2.c +index e40009037..2cab80067 100644 +--- a/kernel/x86_64/daxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/daxpy_microk_steamroller-2.c +@@ -80,8 +80,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 +@@ -142,8 +142,8 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From c18c2c9d9b0cd7e82cb98c7b212ffb29648fb9e0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:32:17 +0100 +Subject: [PATCH 015/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/ddot_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_bulldozer-2.c b/kernel/x86_64/ddot_microk_bulldozer-2.c +index 9756ee46a..379fd3ca1 100644 +--- a/kernel/x86_64/ddot_microk_bulldozer-2.c ++++ b/kernel/x86_64/ddot_microk_bulldozer-2.c +@@ -67,8 +67,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From c23c17163f1b7a5fb7652cbc038a50c01f9440c5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:33:07 +0100 +Subject: [PATCH 016/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/ddot_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_haswell-2.c b/kernel/x86_64/ddot_microk_haswell-2.c +index 365737363..c0c277c32 100644 +--- a/kernel/x86_64/ddot_microk_haswell-2.c ++++ b/kernel/x86_64/ddot_microk_haswell-2.c +@@ -78,8 +78,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From de207d10c1f11ef1f38b4f766909619ab744d64a Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:34:05 +0100 +Subject: [PATCH 017/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/ddot_microk_nehalem-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_nehalem-2.c b/kernel/x86_64/ddot_microk_nehalem-2.c +index fb5ec9bca..ea0b4eff1 100644 +--- a/kernel/x86_64/ddot_microk_nehalem-2.c ++++ b/kernel/x86_64/ddot_microk_nehalem-2.c +@@ -77,8 +77,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From c9078eb8b4481fbc1841bcbf36ba438bf2749632 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:35:14 +0100 +Subject: [PATCH 018/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/ddot_microk_piledriver-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_piledriver-2.c b/kernel/x86_64/ddot_microk_piledriver-2.c +index ac950885c..f7b74add6 100644 +--- a/kernel/x86_64/ddot_microk_piledriver-2.c ++++ b/kernel/x86_64/ddot_microk_piledriver-2.c +@@ -83,8 +83,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -147,8 +147,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 00aff05c4049cd697b4000b5f2e726496b34dc54 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:36:08 +0100 +Subject: [PATCH 019/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/ddot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_sandy-2.c b/kernel/x86_64/ddot_microk_sandy-2.c +index 160f95604..e57eb37ea 100644 +--- a/kernel/x86_64/ddot_microk_sandy-2.c ++++ b/kernel/x86_64/ddot_microk_sandy-2.c +@@ -83,8 +83,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From dc15f3b5a7689a6cea1d31e004d7a3488bf9b66d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:37:06 +0100 +Subject: [PATCH 020/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/ddot_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_steamroller-2.c b/kernel/x86_64/ddot_microk_steamroller-2.c +index 5ce20b5de..845c78df1 100644 +--- a/kernel/x86_64/ddot_microk_steamroller-2.c ++++ b/kernel/x86_64/ddot_microk_steamroller-2.c +@@ -80,8 +80,8 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 3f1719a98da89f0a6f1d435d3f705aa083702ac7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:39:08 +0100 +Subject: [PATCH 021/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/saxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_haswell-2.c b/kernel/x86_64/saxpy_microk_haswell-2.c +index 3a743d64c..3b03e11a4 100644 +--- a/kernel/x86_64/saxpy_microk_haswell-2.c ++++ b/kernel/x86_64/saxpy_microk_haswell-2.c +@@ -61,8 +61,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From b13f3c3bcfffcecbcc80454c90c31bc05dd5a04d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:39:57 +0100 +Subject: [PATCH 022/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/saxpy_microk_nehalem-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_nehalem-2.c b/kernel/x86_64/saxpy_microk_nehalem-2.c +index 68f68ea3a..4ffb39acf 100644 +--- a/kernel/x86_64/saxpy_microk_nehalem-2.c ++++ b/kernel/x86_64/saxpy_microk_nehalem-2.c +@@ -74,8 +74,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 2bd18c7b73731d1b8bd900213fc7fa7a2356a357 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:40:50 +0100 +Subject: [PATCH 023/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/saxpy_microk_piledriver-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_piledriver-2.c b/kernel/x86_64/saxpy_microk_piledriver-2.c +index 204cf8bac..87c5fe3cf 100644 +--- a/kernel/x86_64/saxpy_microk_piledriver-2.c ++++ b/kernel/x86_64/saxpy_microk_piledriver-2.c +@@ -80,8 +80,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 +@@ -141,8 +141,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 6fcb55b22f6e8b80e7f6ffcf228c70c0929915b5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:41:41 +0100 +Subject: [PATCH 024/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/saxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_sandy-2.c b/kernel/x86_64/saxpy_microk_sandy-2.c +index 0a6bef046..5a8424d66 100644 +--- a/kernel/x86_64/saxpy_microk_sandy-2.c ++++ b/kernel/x86_64/saxpy_microk_sandy-2.c +@@ -101,8 +101,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 922e44897831f393cbeeb1406feb7fcf6e320281 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:42:35 +0100 +Subject: [PATCH 025/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/sdot_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_bulldozer-2.c b/kernel/x86_64/sdot_microk_bulldozer-2.c +index 36e61b077..5a6fc6da2 100644 +--- a/kernel/x86_64/sdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/sdot_microk_bulldozer-2.c +@@ -68,8 +68,8 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From d384880da564344e92a8d60b08e3183ab02ba75b Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:43:24 +0100 +Subject: [PATCH 026/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/sdot_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_haswell-2.c b/kernel/x86_64/sdot_microk_haswell-2.c +index df367b61f..89d9cfe61 100644 +--- a/kernel/x86_64/sdot_microk_haswell-2.c ++++ b/kernel/x86_64/sdot_microk_haswell-2.c +@@ -81,8 +81,8 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From cd3a35ee79b4b5fa00e5a446be2a6cceb3230874 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:44:13 +0100 +Subject: [PATCH 027/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/sdot_microk_nehalem-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_nehalem-2.c b/kernel/x86_64/sdot_microk_nehalem-2.c +index 1a27177f5..cef41b530 100644 +--- a/kernel/x86_64/sdot_microk_nehalem-2.c ++++ b/kernel/x86_64/sdot_microk_nehalem-2.c +@@ -77,8 +77,8 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From ba9f792e759ea97e75445b1fe1eaab4f3432f4f1 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:45:08 +0100 +Subject: [PATCH 028/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/sdot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_sandy-2.c b/kernel/x86_64/sdot_microk_sandy-2.c +index ca13536f2..e77ba1424 100644 +--- a/kernel/x86_64/sdot_microk_sandy-2.c ++++ b/kernel/x86_64/sdot_microk_sandy-2.c +@@ -84,8 +84,8 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From c931bb8172bbdcbcfe6d2de281d2f83a7f5a3515 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:46:19 +0100 +Subject: [PATCH 029/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/sdot_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_steamroller-2.c b/kernel/x86_64/sdot_microk_steamroller-2.c +index 6b8b2566b..bedde8fb6 100644 +--- a/kernel/x86_64/sdot_microk_steamroller-2.c ++++ b/kernel/x86_64/sdot_microk_steamroller-2.c +@@ -82,8 +82,8 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -145,8 +145,8 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 0172c51829110a5450b4d6d5f454bd4aa4106269 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:48:16 +0100 +Subject: [PATCH 030/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zaxpy_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_bulldozer-2.c b/kernel/x86_64/zaxpy_microk_bulldozer-2.c +index 0e15761f7..56493f8cb 100644 +--- a/kernel/x86_64/zaxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/zaxpy_microk_bulldozer-2.c +@@ -115,8 +115,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -182,8 +182,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 0cfb647a577058cebeaabadbe6ef62eebd2ce49e Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:51:34 +0100 +Subject: [PATCH 031/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zaxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_haswell-2.c b/kernel/x86_64/zaxpy_microk_haswell-2.c +index 30e8b1955..bd52ba01f 100644 +--- a/kernel/x86_64/zaxpy_microk_haswell-2.c ++++ b/kernel/x86_64/zaxpy_microk_haswell-2.c +@@ -113,8 +113,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 2b542d10368cbb8433b7274fb12b77845606d2fe Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:52:35 +0100 +Subject: [PATCH 032/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zaxpy_microk_sandy-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_sandy-2.c b/kernel/x86_64/zaxpy_microk_sandy-2.c +index 233af143a..d6a9ff394 100644 +--- a/kernel/x86_64/zaxpy_microk_sandy-2.c ++++ b/kernel/x86_64/zaxpy_microk_sandy-2.c +@@ -101,8 +101,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -178,8 +178,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From af29c99c85d9ea5c27b6e917ebb1dcdbe1292f7b Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 20:53:29 +0100 +Subject: [PATCH 033/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zaxpy_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_steamroller-2.c b/kernel/x86_64/zaxpy_microk_steamroller-2.c +index 728d09213..58d4c7286 100644 +--- a/kernel/x86_64/zaxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/zaxpy_microk_steamroller-2.c +@@ -115,8 +115,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -182,8 +182,8 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From f78531a9ec8ee28f7790505382231b3f5094b795 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 21:05:31 +0100 +Subject: [PATCH 034/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zdot_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_bulldozer-2.c b/kernel/x86_64/zdot_microk_bulldozer-2.c +index 30a9552d6..ed66cc674 100644 +--- a/kernel/x86_64/zdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/zdot_microk_bulldozer-2.c +@@ -98,8 +98,8 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -177,8 +177,8 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From b6f4ef5aea58e5ea1225283e406cadf9416818fc Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 21:06:54 +0100 +Subject: [PATCH 035/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zdot_microk_haswell-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_haswell-2.c b/kernel/x86_64/zdot_microk_haswell-2.c +index 11056a3c1..0e6ac55db 100644 +--- a/kernel/x86_64/zdot_microk_haswell-2.c ++++ b/kernel/x86_64/zdot_microk_haswell-2.c +@@ -103,8 +103,8 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -188,8 +188,8 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 715b1f263d6903f1af391c5278a9aa61f1753193 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 21:08:09 +0100 +Subject: [PATCH 036/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zdot_microk_sandy-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_sandy-2.c b/kernel/x86_64/zdot_microk_sandy-2.c +index 87c5b0340..416265ae2 100644 +--- a/kernel/x86_64/zdot_microk_sandy-2.c ++++ b/kernel/x86_64/zdot_microk_sandy-2.c +@@ -109,8 +109,8 @@ if ( n < 1280 ) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -201,8 +201,8 @@ if ( n < 1280 ) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From e8d835ea466a1605db2157b6884a4cfe762478fc Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 21:09:03 +0100 +Subject: [PATCH 037/111] Tag operands 0 and 1 as both input and output + +--- + kernel/x86_64/zdot_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_steamroller-2.c b/kernel/x86_64/zdot_microk_steamroller-2.c +index 325f74ae3..fe1613fd4 100644 +--- a/kernel/x86_64/zdot_microk_steamroller-2.c ++++ b/kernel/x86_64/zdot_microk_steamroller-2.c +@@ -97,8 +97,8 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -174,8 +174,8 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From bbc30700e871d84c07d770f54b645ea3eee549fa Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:49:34 +0100 +Subject: [PATCH 038/111] Update saxpy_microk_nehalem-2.c + +--- + kernel/x86_64/saxpy_microk_nehalem-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/saxpy_microk_nehalem-2.c b/kernel/x86_64/saxpy_microk_nehalem-2.c +index 4ffb39acf..e25156939 100644 +--- a/kernel/x86_64/saxpy_microk_nehalem-2.c ++++ b/kernel/x86_64/saxpy_microk_nehalem-2.c +@@ -73,9 +73,9 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 300bb19b3ec0a48b7371d7c1be3ee88a29e87cf9 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:52:04 +0100 +Subject: [PATCH 039/111] Update caxpy_microk_bulldozer-2.c + +--- + kernel/x86_64/caxpy_microk_bulldozer-2.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_bulldozer-2.c b/kernel/x86_64/caxpy_microk_bulldozer-2.c +index cb98f208a..faf5cdc40 100644 +--- a/kernel/x86_64/caxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/caxpy_microk_bulldozer-2.c +@@ -114,9 +114,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -180,10 +180,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 1878e0c95aee9777f7c082bcc98ff12b04edc75d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:52:54 +0100 +Subject: [PATCH 040/111] Update caxpy_microk_haswell-2.c + +--- + kernel/x86_64/caxpy_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/caxpy_microk_haswell-2.c b/kernel/x86_64/caxpy_microk_haswell-2.c +index f31cf9710..a011b2bfa 100644 +--- a/kernel/x86_64/caxpy_microk_haswell-2.c ++++ b/kernel/x86_64/caxpy_microk_haswell-2.c +@@ -112,9 +112,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From f6be89295f4e21572a743d26e677256fc29ee8cf Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:53:35 +0100 +Subject: [PATCH 041/111] Update caxpy_microk_sandy-2.c + +--- + kernel/x86_64/caxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_sandy-2.c b/kernel/x86_64/caxpy_microk_sandy-2.c +index 931d1ad47..c760d6540 100644 +--- a/kernel/x86_64/caxpy_microk_sandy-2.c ++++ b/kernel/x86_64/caxpy_microk_sandy-2.c +@@ -95,10 +95,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 4673e5317861de37b326181b0dfc8514a2b3b69d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:54:39 +0100 +Subject: [PATCH 042/111] Update caxpy_microk_steamroller-2.c + +--- + kernel/x86_64/caxpy_microk_steamroller-2.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_steamroller-2.c b/kernel/x86_64/caxpy_microk_steamroller-2.c +index 9aeb47968..b6eb55f9b 100644 +--- a/kernel/x86_64/caxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/caxpy_microk_steamroller-2.c +@@ -113,10 +113,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -181,9 +181,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From ba6d2c77a98f55431d8d2d4de4b6df99814352c1 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:55:38 +0100 +Subject: [PATCH 043/111] Update cdot_microk_bulldozer-2.c + +--- + kernel/x86_64/cdot_microk_bulldozer-2.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_bulldozer-2.c b/kernel/x86_64/cdot_microk_bulldozer-2.c +index e6d11f1af..c2245c6dc 100644 +--- a/kernel/x86_64/cdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/cdot_microk_bulldozer-2.c +@@ -97,9 +97,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -175,10 +175,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 093a3d7d5790efd7441611ee8c8769d4f3d997c0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:56:15 +0100 +Subject: [PATCH 044/111] Update cdot_microk_haswell-2.c + +--- + kernel/x86_64/cdot_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/cdot_microk_haswell-2.c b/kernel/x86_64/cdot_microk_haswell-2.c +index 9fee7615d..396dbeaa7 100644 +--- a/kernel/x86_64/cdot_microk_haswell-2.c ++++ b/kernel/x86_64/cdot_microk_haswell-2.c +@@ -98,9 +98,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 2224bcb4f070e607ede67f2f6e089e2e99519517 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:57:01 +0100 +Subject: [PATCH 045/111] Update cdot_microk_sandy-2.c + +--- + kernel/x86_64/cdot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_sandy-2.c b/kernel/x86_64/cdot_microk_sandy-2.c +index 705c80c5c..20ba48c00 100644 +--- a/kernel/x86_64/cdot_microk_sandy-2.c ++++ b/kernel/x86_64/cdot_microk_sandy-2.c +@@ -105,10 +105,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 2414f1d796e23f8e9e4abba27e948f5877773640 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:57:56 +0100 +Subject: [PATCH 046/111] Update cdot_microk_steamroller-2.c + +--- + kernel/x86_64/cdot_microk_steamroller-2.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_steamroller-2.c b/kernel/x86_64/cdot_microk_steamroller-2.c +index 5a46aed8c..01754b147 100644 +--- a/kernel/x86_64/cdot_microk_steamroller-2.c ++++ b/kernel/x86_64/cdot_microk_steamroller-2.c +@@ -97,9 +97,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -175,10 +175,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From ae00befb3e3a9632d9545ba0af43f9afb90787b2 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:58:52 +0100 +Subject: [PATCH 047/111] Update daxpy_microk_bulldozer-2.c + +--- + kernel/x86_64/daxpy_microk_bulldozer-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/daxpy_microk_bulldozer-2.c b/kernel/x86_64/daxpy_microk_bulldozer-2.c +index c9a01580e..2e2356fb6 100644 +--- a/kernel/x86_64/daxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/daxpy_microk_bulldozer-2.c +@@ -64,9 +64,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 604c574542a5fac237b5134610166fab26db1285 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 22:59:30 +0100 +Subject: [PATCH 048/111] Update daxpy_microk_haswell-2.c + +--- + kernel/x86_64/daxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_haswell-2.c b/kernel/x86_64/daxpy_microk_haswell-2.c +index 67431659d..c77fc33ef 100644 +--- a/kernel/x86_64/daxpy_microk_haswell-2.c ++++ b/kernel/x86_64/daxpy_microk_haswell-2.c +@@ -59,10 +59,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 082498ee3b8470e992f33414e3097ca301f9efa7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:00:07 +0100 +Subject: [PATCH 049/111] Update daxpy_microk_nehalem-2.c + +--- + kernel/x86_64/daxpy_microk_nehalem-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/daxpy_microk_nehalem-2.c b/kernel/x86_64/daxpy_microk_nehalem-2.c +index 61c99904a..b81fe6562 100644 +--- a/kernel/x86_64/daxpy_microk_nehalem-2.c ++++ b/kernel/x86_64/daxpy_microk_nehalem-2.c +@@ -73,9 +73,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 293f5531e66088d7149bebd68bcd7aa564b3a263 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:00:53 +0100 +Subject: [PATCH 050/111] Update daxpy_microk_piledriver-2.c + +--- + kernel/x86_64/daxpy_microk_piledriver-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_piledriver-2.c b/kernel/x86_64/daxpy_microk_piledriver-2.c +index e3d605b75..efe93dfed 100644 +--- a/kernel/x86_64/daxpy_microk_piledriver-2.c ++++ b/kernel/x86_64/daxpy_microk_piledriver-2.c +@@ -78,10 +78,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "subq $16, %1 \n\t" + "jnz 1b \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 +@@ -140,10 +140,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "subq $16, %1 \n\t" + "jnz 1b \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 6cee8e0fdd463139f85656292971de1e4810d775 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:01:28 +0100 +Subject: [PATCH 051/111] Update daxpy_microk_sandy-2.c + +--- + kernel/x86_64/daxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_sandy-2.c b/kernel/x86_64/daxpy_microk_sandy-2.c +index 1b827e7e2..3b1214f36 100644 +--- a/kernel/x86_64/daxpy_microk_sandy-2.c ++++ b/kernel/x86_64/daxpy_microk_sandy-2.c +@@ -99,10 +99,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 6450bf14afa94cade7d28330749dfbf255697026 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:02:32 +0100 +Subject: [PATCH 052/111] Update daxpy_microk_steamroller-2.c + +--- + kernel/x86_64/daxpy_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_steamroller-2.c b/kernel/x86_64/daxpy_microk_steamroller-2.c +index 2cab80067..a5143682f 100644 +--- a/kernel/x86_64/daxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/daxpy_microk_steamroller-2.c +@@ -78,10 +78,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "subq $16, %1 \n\t" + "jnz 1b \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 +@@ -140,10 +140,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "subq $16, %1 \n\t" + "jnz 1b \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From a339b45e51c58e5b13c01c6918282fb31941acdf Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:03:07 +0100 +Subject: [PATCH 053/111] Update ddot_microk_bulldozer-2.c + +--- + kernel/x86_64/ddot_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_bulldozer-2.c b/kernel/x86_64/ddot_microk_bulldozer-2.c +index 379fd3ca1..62bf7e7dc 100644 +--- a/kernel/x86_64/ddot_microk_bulldozer-2.c ++++ b/kernel/x86_64/ddot_microk_bulldozer-2.c +@@ -65,10 +65,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + "vmovsd %%xmm4, (%4) \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 64fcdadf39137bdc56c56ead1e4d8f1bea32fe2a Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:03:44 +0100 +Subject: [PATCH 054/111] Update ddot_microk_haswell-2.c + +--- + kernel/x86_64/ddot_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/ddot_microk_haswell-2.c b/kernel/x86_64/ddot_microk_haswell-2.c +index c0c277c32..0cf4ece65 100644 +--- a/kernel/x86_64/ddot_microk_haswell-2.c ++++ b/kernel/x86_64/ddot_microk_haswell-2.c +@@ -77,9 +77,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vzeroupper \n\t" + + : +- : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 504dd44e887cbd985bac3d48a2a7fdc3a03727d8 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:04:20 +0100 +Subject: [PATCH 055/111] Update ddot_microk_nehalem-2.c + +--- + kernel/x86_64/ddot_microk_nehalem-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_nehalem-2.c b/kernel/x86_64/ddot_microk_nehalem-2.c +index ea0b4eff1..086a0bb91 100644 +--- a/kernel/x86_64/ddot_microk_nehalem-2.c ++++ b/kernel/x86_64/ddot_microk_nehalem-2.c +@@ -75,10 +75,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + "movsd %%xmm4, (%4) \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 56c67a929a2b215c3980a542c74a016f828e119d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:05:11 +0100 +Subject: [PATCH 056/111] Update ddot_microk_piledriver-2.c + +--- + kernel/x86_64/ddot_microk_piledriver-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_piledriver-2.c b/kernel/x86_64/ddot_microk_piledriver-2.c +index f7b74add6..d7347ebdf 100644 +--- a/kernel/x86_64/ddot_microk_piledriver-2.c ++++ b/kernel/x86_64/ddot_microk_piledriver-2.c +@@ -81,10 +81,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovsd %%xmm4, (%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -145,10 +145,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovsd %%xmm4, (%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From b7ffbc40eca528e3aae46d004c1ad8e6fd013530 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:05:43 +0100 +Subject: [PATCH 057/111] Update ddot_microk_sandy-2.c + +--- + kernel/x86_64/ddot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_sandy-2.c b/kernel/x86_64/ddot_microk_sandy-2.c +index e57eb37ea..28b1a8bd1 100644 +--- a/kernel/x86_64/ddot_microk_sandy-2.c ++++ b/kernel/x86_64/ddot_microk_sandy-2.c +@@ -81,10 +81,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovsd %%xmm4, (%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 0c9c31dbe4817ad24ecc2cc5dc553239a7c31590 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:06:20 +0100 +Subject: [PATCH 058/111] Update ddot_microk_steamroller-2.c + +--- + kernel/x86_64/ddot_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_steamroller-2.c b/kernel/x86_64/ddot_microk_steamroller-2.c +index 845c78df1..98cf94acf 100644 +--- a/kernel/x86_64/ddot_microk_steamroller-2.c ++++ b/kernel/x86_64/ddot_microk_steamroller-2.c +@@ -78,10 +78,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovsd %%xmm4, (%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From d1b69022c935a37bbe3c8b09eb329a7468339ff0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:07:04 +0100 +Subject: [PATCH 059/111] Update saxpy_microk_haswell-2.c + +--- + kernel/x86_64/saxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_haswell-2.c b/kernel/x86_64/saxpy_microk_haswell-2.c +index 3b03e11a4..3bc450f7b 100644 +--- a/kernel/x86_64/saxpy_microk_haswell-2.c ++++ b/kernel/x86_64/saxpy_microk_haswell-2.c +@@ -59,10 +59,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 369a2b4af5680dfcbd1d8290077f62a4d74336fb Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:07:54 +0100 +Subject: [PATCH 060/111] Update saxpy_microk_piledriver-2.c + +--- + kernel/x86_64/saxpy_microk_piledriver-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_piledriver-2.c b/kernel/x86_64/saxpy_microk_piledriver-2.c +index 87c5fe3cf..87e742ac7 100644 +--- a/kernel/x86_64/saxpy_microk_piledriver-2.c ++++ b/kernel/x86_64/saxpy_microk_piledriver-2.c +@@ -78,10 +78,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 +@@ -139,10 +139,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From dc931ad1fe709ad378d6d963fbde5bad421e5514 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:08:27 +0100 +Subject: [PATCH 061/111] Update saxpy_microk_sandy-2.c + +--- + kernel/x86_64/saxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_sandy-2.c b/kernel/x86_64/saxpy_microk_sandy-2.c +index 5a8424d66..6ce67a7d1 100644 +--- a/kernel/x86_64/saxpy_microk_sandy-2.c ++++ b/kernel/x86_64/saxpy_microk_sandy-2.c +@@ -99,10 +99,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From b2d6fea1cb99f0830c33e3667d1928be4496a31f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:09:07 +0100 +Subject: [PATCH 062/111] Update sdot_microk_bulldozer-2.c + +--- + kernel/x86_64/sdot_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_bulldozer-2.c b/kernel/x86_64/sdot_microk_bulldozer-2.c +index 5a6fc6da2..c7f8cb1a7 100644 +--- a/kernel/x86_64/sdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/sdot_microk_bulldozer-2.c +@@ -66,10 +66,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + "vmovss %%xmm4, (%4) \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From ffc008663aef2dd318c58275fb8b68cc93de9a42 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:09:50 +0100 +Subject: [PATCH 063/111] Update sdot_microk_haswell-2.c + +--- + kernel/x86_64/sdot_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_haswell-2.c b/kernel/x86_64/sdot_microk_haswell-2.c +index 89d9cfe61..417fb3862 100644 +--- a/kernel/x86_64/sdot_microk_haswell-2.c ++++ b/kernel/x86_64/sdot_microk_haswell-2.c +@@ -79,10 +79,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovss %%xmm4, (%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 88b0dbfbddbc5170263bd06eb0aad0abf85faa81 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:10:30 +0100 +Subject: [PATCH 064/111] Update sdot_microk_nehalem-2.c + +--- + kernel/x86_64/sdot_microk_nehalem-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_nehalem-2.c b/kernel/x86_64/sdot_microk_nehalem-2.c +index cef41b530..115e7a410 100644 +--- a/kernel/x86_64/sdot_microk_nehalem-2.c ++++ b/kernel/x86_64/sdot_microk_nehalem-2.c +@@ -75,10 +75,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + "movss %%xmm4, (%4) \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From ba9c3c4328a73821ce6067fb78b01b8817a92fa1 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:11:09 +0100 +Subject: [PATCH 065/111] Update sdot_microk_sandy-2.c + +--- + kernel/x86_64/sdot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_sandy-2.c b/kernel/x86_64/sdot_microk_sandy-2.c +index e77ba1424..9d0795181 100644 +--- a/kernel/x86_64/sdot_microk_sandy-2.c ++++ b/kernel/x86_64/sdot_microk_sandy-2.c +@@ -82,10 +82,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovss %%xmm4, (%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 266e72d24b767dbcdb97f597c899c7f495609c6f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:12:07 +0100 +Subject: [PATCH 066/111] Update sdot_microk_steamroller-2.c + +--- + kernel/x86_64/sdot_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_steamroller-2.c b/kernel/x86_64/sdot_microk_steamroller-2.c +index bedde8fb6..3475f890d 100644 +--- a/kernel/x86_64/sdot_microk_steamroller-2.c ++++ b/kernel/x86_64/sdot_microk_steamroller-2.c +@@ -80,10 +80,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + "vmovss %%xmm4, (%4) \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -143,10 +143,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + "vmovss %%xmm4, (%4) \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 72c3a4d1bd1daf3a98413dbea081f19fc6ee897d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:13:06 +0100 +Subject: [PATCH 067/111] Update zaxpy_microk_bulldozer-2.c + +--- + kernel/x86_64/zaxpy_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_bulldozer-2.c b/kernel/x86_64/zaxpy_microk_bulldozer-2.c +index 56493f8cb..eed36ffd0 100644 +--- a/kernel/x86_64/zaxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/zaxpy_microk_bulldozer-2.c +@@ -113,10 +113,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -180,10 +180,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 157e65ff74b7760a19ed38e8796aab6ad0d2a152 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:13:41 +0100 +Subject: [PATCH 068/111] Update zaxpy_microk_haswell-2.c + +--- + kernel/x86_64/zaxpy_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_haswell-2.c b/kernel/x86_64/zaxpy_microk_haswell-2.c +index bd52ba01f..9aeea975b 100644 +--- a/kernel/x86_64/zaxpy_microk_haswell-2.c ++++ b/kernel/x86_64/zaxpy_microk_haswell-2.c +@@ -111,10 +111,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 212b0a106d83491aeac793c6d45b4e494d06d868 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:14:28 +0100 +Subject: [PATCH 069/111] Update zaxpy_microk_sandy-2.c + +--- + kernel/x86_64/zaxpy_microk_sandy-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_sandy-2.c b/kernel/x86_64/zaxpy_microk_sandy-2.c +index d6a9ff394..cbd9b378f 100644 +--- a/kernel/x86_64/zaxpy_microk_sandy-2.c ++++ b/kernel/x86_64/zaxpy_microk_sandy-2.c +@@ -99,10 +99,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -176,10 +176,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 2fa6d8107c40d780c988c8f23b5d61d6a0f8e8eb Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:15:18 +0100 +Subject: [PATCH 070/111] Update zaxpy_microk_steamroller-2.c + +--- + kernel/x86_64/zaxpy_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_steamroller-2.c b/kernel/x86_64/zaxpy_microk_steamroller-2.c +index 58d4c7286..5fc56aec7 100644 +--- a/kernel/x86_64/zaxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/zaxpy_microk_steamroller-2.c +@@ -113,10 +113,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 +@@ -180,10 +180,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "jnz 1b \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha), // 4 + +From 79d5dd461d13953e8cade9a1dad43ad38cf93aaa Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:16:09 +0100 +Subject: [PATCH 071/111] Update zdot_microk_bulldozer-2.c + +--- + kernel/x86_64/zdot_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_bulldozer-2.c b/kernel/x86_64/zdot_microk_bulldozer-2.c +index ed66cc674..a80eac003 100644 +--- a/kernel/x86_64/zdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/zdot_microk_bulldozer-2.c +@@ -96,10 +96,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -175,10 +175,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From cb5cfffb1765ac8ef1e2f149aea1dc3e5fbb9623 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:16:55 +0100 +Subject: [PATCH 072/111] Update zdot_microk_haswell-2.c + +--- + kernel/x86_64/zdot_microk_haswell-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_haswell-2.c b/kernel/x86_64/zdot_microk_haswell-2.c +index 0e6ac55db..963d2e3bd 100644 +--- a/kernel/x86_64/zdot_microk_haswell-2.c ++++ b/kernel/x86_64/zdot_microk_haswell-2.c +@@ -101,10 +101,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -186,10 +186,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From f4e5f931ae5c14d284749c65d1e9ed08873afaa2 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:17:46 +0100 +Subject: [PATCH 073/111] Update zdot_microk_sandy-2.c + +--- + kernel/x86_64/zdot_microk_sandy-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_sandy-2.c b/kernel/x86_64/zdot_microk_sandy-2.c +index 416265ae2..88d4e1bbb 100644 +--- a/kernel/x86_64/zdot_microk_sandy-2.c ++++ b/kernel/x86_64/zdot_microk_sandy-2.c +@@ -107,10 +107,10 @@ if ( n < 1280 ) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -199,10 +199,10 @@ if ( n < 1280 ) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From ae2f3e617df8894ebe1779d3bcc78170bcad8b4c Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:18:27 +0100 +Subject: [PATCH 074/111] Update zdot_microk_steamroller-2.c + +--- + kernel/x86_64/zdot_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_steamroller-2.c b/kernel/x86_64/zdot_microk_steamroller-2.c +index fe1613fd4..2f11fe562 100644 +--- a/kernel/x86_64/zdot_microk_steamroller-2.c ++++ b/kernel/x86_64/zdot_microk_steamroller-2.c +@@ -95,10 +95,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 +@@ -172,10 +172,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + "vmovups %%xmm4, 16(%4) \n\t" + "vzeroupper \n\t" + +- : +- : ++ : + "+r" (i), // 0 + "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (dot) // 4 + +From 379aa11f4bfc5bb352372a3f423062267e73dd77 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:10:21 +0100 +Subject: [PATCH 075/111] Update caxpy_microk_bulldozer-2.c + +--- + kernel/x86_64/caxpy_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_bulldozer-2.c b/kernel/x86_64/caxpy_microk_bulldozer-2.c +index faf5cdc40..ca2209340 100644 +--- a/kernel/x86_64/caxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/caxpy_microk_bulldozer-2.c +@@ -115,7 +115,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -182,7 +182,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 983c72ab0fc182264a635d1c5286ceebc2b2f3e2 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:10:51 +0100 +Subject: [PATCH 076/111] Update caxpy_microk_haswell-2.c + +--- + kernel/x86_64/caxpy_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/caxpy_microk_haswell-2.c b/kernel/x86_64/caxpy_microk_haswell-2.c +index a011b2bfa..b605ea34c 100644 +--- a/kernel/x86_64/caxpy_microk_haswell-2.c ++++ b/kernel/x86_64/caxpy_microk_haswell-2.c +@@ -113,7 +113,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 6f7f9967f945c145e6e4ceac14162e8dbc551f4c Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:11:21 +0100 +Subject: [PATCH 077/111] Update caxpy_microk_sandy-2.c + +--- + kernel/x86_64/caxpy_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/caxpy_microk_sandy-2.c b/kernel/x86_64/caxpy_microk_sandy-2.c +index c760d6540..72d37afed 100644 +--- a/kernel/x86_64/caxpy_microk_sandy-2.c ++++ b/kernel/x86_64/caxpy_microk_sandy-2.c +@@ -97,7 +97,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From aa799573b5f91e786ef41116b9fd030161fb6a10 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:11:59 +0100 +Subject: [PATCH 078/111] Update caxpy_microk_steamroller-2.c + +--- + kernel/x86_64/caxpy_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/caxpy_microk_steamroller-2.c b/kernel/x86_64/caxpy_microk_steamroller-2.c +index b6eb55f9b..7ca7af070 100644 +--- a/kernel/x86_64/caxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/caxpy_microk_steamroller-2.c +@@ -115,7 +115,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -182,7 +182,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From f9497bdab685ca8b9bea018c900df24b7dd2aad7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:12:37 +0100 +Subject: [PATCH 079/111] Update cdot_microk_bulldozer-2.c + +--- + kernel/x86_64/cdot_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_bulldozer-2.c b/kernel/x86_64/cdot_microk_bulldozer-2.c +index c2245c6dc..118655913 100644 +--- a/kernel/x86_64/cdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/cdot_microk_bulldozer-2.c +@@ -98,7 +98,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -177,7 +177,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From de4c5a9258b3c29e1e305660c50e7b4cf8204c46 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:13:09 +0100 +Subject: [PATCH 080/111] Update daxpy_microk_haswell-2.c + +--- + kernel/x86_64/daxpy_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/daxpy_microk_haswell-2.c b/kernel/x86_64/daxpy_microk_haswell-2.c +index c77fc33ef..f3682e6d7 100644 +--- a/kernel/x86_64/daxpy_microk_haswell-2.c ++++ b/kernel/x86_64/daxpy_microk_haswell-2.c +@@ -61,7 +61,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 59ca748c9ec75cf57148bcf4de06dc328f227845 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:13:38 +0100 +Subject: [PATCH 081/111] Update daxpy_microk_nehalem-2.c + +--- + kernel/x86_64/daxpy_microk_nehalem-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/daxpy_microk_nehalem-2.c b/kernel/x86_64/daxpy_microk_nehalem-2.c +index b81fe6562..8feb9f26c 100644 +--- a/kernel/x86_64/daxpy_microk_nehalem-2.c ++++ b/kernel/x86_64/daxpy_microk_nehalem-2.c +@@ -74,7 +74,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 5f2ef0e70fb180022f3447826029f42c75c6fbb5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:14:13 +0100 +Subject: [PATCH 082/111] Update daxpy_microk_piledriver-2.c + +--- + kernel/x86_64/daxpy_microk_piledriver-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_piledriver-2.c b/kernel/x86_64/daxpy_microk_piledriver-2.c +index efe93dfed..4b83124c7 100644 +--- a/kernel/x86_64/daxpy_microk_piledriver-2.c ++++ b/kernel/x86_64/daxpy_microk_piledriver-2.c +@@ -80,7 +80,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -142,7 +142,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From c5b01c8be14c3cc3b364b9067124695e2d91c63a Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:14:43 +0100 +Subject: [PATCH 083/111] Update daxpy_microk_sandy-2.c + +--- + kernel/x86_64/daxpy_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/daxpy_microk_sandy-2.c b/kernel/x86_64/daxpy_microk_sandy-2.c +index 3b1214f36..db9a45de8 100644 +--- a/kernel/x86_64/daxpy_microk_sandy-2.c ++++ b/kernel/x86_64/daxpy_microk_sandy-2.c +@@ -101,7 +101,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From d4f3b733dc1026c9d1bfa8bea5696353de3b47c0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:15:18 +0100 +Subject: [PATCH 084/111] Update daxpy_microk_steamroller-2.c + +--- + kernel/x86_64/daxpy_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/daxpy_microk_steamroller-2.c b/kernel/x86_64/daxpy_microk_steamroller-2.c +index a5143682f..8e63fcc1d 100644 +--- a/kernel/x86_64/daxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/daxpy_microk_steamroller-2.c +@@ -80,7 +80,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -142,7 +142,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From dcfab783f725abb0280a77f61a4083be581e89b8 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:15:57 +0100 +Subject: [PATCH 085/111] Update ddot_microk_bulldozer-2.c + +--- + kernel/x86_64/ddot_microk_bulldozer-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/ddot_microk_bulldozer-2.c b/kernel/x86_64/ddot_microk_bulldozer-2.c +index 62bf7e7dc..5590c5b17 100644 +--- a/kernel/x86_64/ddot_microk_bulldozer-2.c ++++ b/kernel/x86_64/ddot_microk_bulldozer-2.c +@@ -67,7 +67,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 0779654cb47dbc9984f344d5b7ffa68e39afdbc3 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:16:26 +0100 +Subject: [PATCH 086/111] Update ddot_microk_haswell-2.c + +--- + kernel/x86_64/ddot_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/ddot_microk_haswell-2.c b/kernel/x86_64/ddot_microk_haswell-2.c +index 0cf4ece65..dbb5487f7 100644 +--- a/kernel/x86_64/ddot_microk_haswell-2.c ++++ b/kernel/x86_64/ddot_microk_haswell-2.c +@@ -78,7 +78,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 29028652213235c1d2e7dc18d49daa86f3356574 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:16:53 +0100 +Subject: [PATCH 087/111] Update ddot_microk_nehalem-2.c + +--- + kernel/x86_64/ddot_microk_nehalem-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/ddot_microk_nehalem-2.c b/kernel/x86_64/ddot_microk_nehalem-2.c +index 086a0bb91..e5e234e22 100644 +--- a/kernel/x86_64/ddot_microk_nehalem-2.c ++++ b/kernel/x86_64/ddot_microk_nehalem-2.c +@@ -77,7 +77,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 6df88c7c455c37a18a16f1cbd003b640ef6777f0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:17:43 +0100 +Subject: [PATCH 088/111] Update cdot_microk_haswell-2.c + +--- + kernel/x86_64/cdot_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/cdot_microk_haswell-2.c b/kernel/x86_64/cdot_microk_haswell-2.c +index 396dbeaa7..8b9d6d104 100644 +--- a/kernel/x86_64/cdot_microk_haswell-2.c ++++ b/kernel/x86_64/cdot_microk_haswell-2.c +@@ -99,7 +99,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 81691c726eb55df75f638794fe3afff70cc3286d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:18:11 +0100 +Subject: [PATCH 089/111] Update cdot_microk_sandy-2.c + +--- + kernel/x86_64/cdot_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/cdot_microk_sandy-2.c b/kernel/x86_64/cdot_microk_sandy-2.c +index 20ba48c00..fe142c38f 100644 +--- a/kernel/x86_64/cdot_microk_sandy-2.c ++++ b/kernel/x86_64/cdot_microk_sandy-2.c +@@ -107,7 +107,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From ab8cc007364b9477e13c107a7befce7668c10ebb Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:18:47 +0100 +Subject: [PATCH 090/111] Update cdot_microk_steamroller-2.c + +--- + kernel/x86_64/cdot_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/cdot_microk_steamroller-2.c b/kernel/x86_64/cdot_microk_steamroller-2.c +index 01754b147..7350b21c9 100644 +--- a/kernel/x86_64/cdot_microk_steamroller-2.c ++++ b/kernel/x86_64/cdot_microk_steamroller-2.c +@@ -98,7 +98,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -177,7 +177,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From bdcba6adda368da48e450cdc3b9c9f7b6c52e630 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:19:32 +0100 +Subject: [PATCH 091/111] Update daxpy_microk_bulldozer-2.c + +--- + kernel/x86_64/daxpy_microk_bulldozer-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/daxpy_microk_bulldozer-2.c b/kernel/x86_64/daxpy_microk_bulldozer-2.c +index 2e2356fb6..9c1305b97 100644 +--- a/kernel/x86_64/daxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/daxpy_microk_bulldozer-2.c +@@ -65,7 +65,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From e9fc4dfdead60ed013e016c62215170d04b5ad9d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:20:20 +0100 +Subject: [PATCH 092/111] Update ddot_microk_piledriver-2.c + +--- + kernel/x86_64/ddot_microk_piledriver-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/ddot_microk_piledriver-2.c b/kernel/x86_64/ddot_microk_piledriver-2.c +index d7347ebdf..cc4bcd90a 100644 +--- a/kernel/x86_64/ddot_microk_piledriver-2.c ++++ b/kernel/x86_64/ddot_microk_piledriver-2.c +@@ -83,7 +83,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -147,7 +147,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 9430424102257485eae76482f495402260e9682d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:20:48 +0100 +Subject: [PATCH 093/111] Update ddot_microk_sandy-2.c + +--- + kernel/x86_64/ddot_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/ddot_microk_sandy-2.c b/kernel/x86_64/ddot_microk_sandy-2.c +index 28b1a8bd1..84493ec27 100644 +--- a/kernel/x86_64/ddot_microk_sandy-2.c ++++ b/kernel/x86_64/ddot_microk_sandy-2.c +@@ -83,7 +83,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 129a987e4b55f13c413f4eaad58465443051dd43 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:21:26 +0100 +Subject: [PATCH 094/111] Update ddot_microk_steamroller-2.c + +--- + kernel/x86_64/ddot_microk_steamroller-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/ddot_microk_steamroller-2.c b/kernel/x86_64/ddot_microk_steamroller-2.c +index 98cf94acf..27d5244ce 100644 +--- a/kernel/x86_64/ddot_microk_steamroller-2.c ++++ b/kernel/x86_64/ddot_microk_steamroller-2.c +@@ -80,7 +80,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 49789c39fb2a55dacc146f079c1c5fab45d3ce2e Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:22:17 +0100 +Subject: [PATCH 095/111] Update saxpy_microk_haswell-2.c + +--- + kernel/x86_64/saxpy_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/saxpy_microk_haswell-2.c b/kernel/x86_64/saxpy_microk_haswell-2.c +index 3bc450f7b..7099ba4c6 100644 +--- a/kernel/x86_64/saxpy_microk_haswell-2.c ++++ b/kernel/x86_64/saxpy_microk_haswell-2.c +@@ -61,7 +61,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 7f556b81fb40ca6d90529829b802b38adbc747d7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:22:46 +0100 +Subject: [PATCH 096/111] Update saxpy_microk_nehalem-2.c + +--- + kernel/x86_64/saxpy_microk_nehalem-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/saxpy_microk_nehalem-2.c b/kernel/x86_64/saxpy_microk_nehalem-2.c +index e25156939..88bbb695d 100644 +--- a/kernel/x86_64/saxpy_microk_nehalem-2.c ++++ b/kernel/x86_64/saxpy_microk_nehalem-2.c +@@ -74,7 +74,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From cb75878f98892850b29fc7a0b427500a56d244dd Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:23:16 +0100 +Subject: [PATCH 097/111] Update saxpy_microk_piledriver-2.c + +--- + kernel/x86_64/saxpy_microk_piledriver-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/saxpy_microk_piledriver-2.c b/kernel/x86_64/saxpy_microk_piledriver-2.c +index 87e742ac7..5feea7f24 100644 +--- a/kernel/x86_64/saxpy_microk_piledriver-2.c ++++ b/kernel/x86_64/saxpy_microk_piledriver-2.c +@@ -80,7 +80,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -141,7 +141,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 65719fcb41987c499c31455fe7b0290800cacdd6 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:23:44 +0100 +Subject: [PATCH 098/111] Update saxpy_microk_sandy-2.c + +--- + kernel/x86_64/saxpy_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/saxpy_microk_sandy-2.c b/kernel/x86_64/saxpy_microk_sandy-2.c +index 6ce67a7d1..0d448d5f8 100644 +--- a/kernel/x86_64/saxpy_microk_sandy-2.c ++++ b/kernel/x86_64/saxpy_microk_sandy-2.c +@@ -101,7 +101,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From b52e763084040ed624fff574fba1fe1bc58b1cc7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:24:16 +0100 +Subject: [PATCH 099/111] Update sdot_microk_bulldozer-2.c + +--- + kernel/x86_64/sdot_microk_bulldozer-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/sdot_microk_bulldozer-2.c b/kernel/x86_64/sdot_microk_bulldozer-2.c +index c7f8cb1a7..8958a33dc 100644 +--- a/kernel/x86_64/sdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/sdot_microk_bulldozer-2.c +@@ -68,7 +68,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 2c021aeb9c018e4da2a7a0a5c0315d06d689a3c2 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:24:42 +0100 +Subject: [PATCH 100/111] Update sdot_microk_haswell-2.c + +--- + kernel/x86_64/sdot_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/sdot_microk_haswell-2.c b/kernel/x86_64/sdot_microk_haswell-2.c +index 417fb3862..91dc928d3 100644 +--- a/kernel/x86_64/sdot_microk_haswell-2.c ++++ b/kernel/x86_64/sdot_microk_haswell-2.c +@@ -81,7 +81,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From bb43f185cf2f4354b62b779a369b53db3607598d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:25:15 +0100 +Subject: [PATCH 101/111] Update sdot_microk_nehalem-2.c + +--- + kernel/x86_64/sdot_microk_nehalem-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/sdot_microk_nehalem-2.c b/kernel/x86_64/sdot_microk_nehalem-2.c +index 115e7a410..5a715d008 100644 +--- a/kernel/x86_64/sdot_microk_nehalem-2.c ++++ b/kernel/x86_64/sdot_microk_nehalem-2.c +@@ -77,7 +77,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 3b98d1e16d48f08540952624e9aa7843d5384ceb Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:25:43 +0100 +Subject: [PATCH 102/111] Update sdot_microk_sandy-2.c + +--- + kernel/x86_64/sdot_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/sdot_microk_sandy-2.c b/kernel/x86_64/sdot_microk_sandy-2.c +index 9d0795181..ae25d5a50 100644 +--- a/kernel/x86_64/sdot_microk_sandy-2.c ++++ b/kernel/x86_64/sdot_microk_sandy-2.c +@@ -84,7 +84,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 7009a0337f674911ebe6d9ce6d1bf9b21472e05e Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:26:24 +0100 +Subject: [PATCH 103/111] Update sdot_microk_steamroller-2.c + +--- + kernel/x86_64/sdot_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/sdot_microk_steamroller-2.c b/kernel/x86_64/sdot_microk_steamroller-2.c +index 3475f890d..bf6a5f287 100644 +--- a/kernel/x86_64/sdot_microk_steamroller-2.c ++++ b/kernel/x86_64/sdot_microk_steamroller-2.c +@@ -82,7 +82,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -145,7 +145,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From f117a2e4aa3e100015d479dd61530019db66e53f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:27:34 +0100 +Subject: [PATCH 104/111] Update zaxpy_microk_bulldozer-2.c + +--- + kernel/x86_64/zaxpy_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_bulldozer-2.c b/kernel/x86_64/zaxpy_microk_bulldozer-2.c +index eed36ffd0..15d367971 100644 +--- a/kernel/x86_64/zaxpy_microk_bulldozer-2.c ++++ b/kernel/x86_64/zaxpy_microk_bulldozer-2.c +@@ -115,7 +115,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -182,7 +182,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 752d4e88089ce1ff5ab27b25de382750b5e4a9c7 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:28:00 +0100 +Subject: [PATCH 105/111] Update zaxpy_microk_haswell-2.c + +--- + kernel/x86_64/zaxpy_microk_haswell-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/zaxpy_microk_haswell-2.c b/kernel/x86_64/zaxpy_microk_haswell-2.c +index 9aeea975b..89d23daf3 100644 +--- a/kernel/x86_64/zaxpy_microk_haswell-2.c ++++ b/kernel/x86_64/zaxpy_microk_haswell-2.c +@@ -113,7 +113,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 0f905d346e8c0bda5bbf7cb6ae7f7a6ad137aa76 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:28:40 +0100 +Subject: [PATCH 106/111] Update zaxpy_microk_sandy-2.c + +--- + kernel/x86_64/zaxpy_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_sandy-2.c b/kernel/x86_64/zaxpy_microk_sandy-2.c +index cbd9b378f..17b8b24f7 100644 +--- a/kernel/x86_64/zaxpy_microk_sandy-2.c ++++ b/kernel/x86_64/zaxpy_microk_sandy-2.c +@@ -101,7 +101,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -178,7 +178,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 39a29ef0ce2de84526cf8e71881e6117b4532f84 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:29:18 +0100 +Subject: [PATCH 107/111] Update zaxpy_microk_steamroller-2.c + +--- + kernel/x86_64/zaxpy_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zaxpy_microk_steamroller-2.c b/kernel/x86_64/zaxpy_microk_steamroller-2.c +index 5fc56aec7..907b1ae00 100644 +--- a/kernel/x86_64/zaxpy_microk_steamroller-2.c ++++ b/kernel/x86_64/zaxpy_microk_steamroller-2.c +@@ -115,7 +115,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -182,7 +182,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 1496c1a69f4d0c521d797b1847363c38e46958d5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:30:03 +0100 +Subject: [PATCH 108/111] Update zdot_microk_bulldozer-2.c + +--- + kernel/x86_64/zdot_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_bulldozer-2.c b/kernel/x86_64/zdot_microk_bulldozer-2.c +index a80eac003..db9a48cce 100644 +--- a/kernel/x86_64/zdot_microk_bulldozer-2.c ++++ b/kernel/x86_64/zdot_microk_bulldozer-2.c +@@ -98,7 +98,7 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -177,7 +177,7 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 8f09f06f2c964ece75730dadd99e569844497fe6 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:30:43 +0100 +Subject: [PATCH 109/111] Update zdot_microk_haswell-2.c + +--- + kernel/x86_64/zdot_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_haswell-2.c b/kernel/x86_64/zdot_microk_haswell-2.c +index 963d2e3bd..9f2fc2c1d 100644 +--- a/kernel/x86_64/zdot_microk_haswell-2.c ++++ b/kernel/x86_64/zdot_microk_haswell-2.c +@@ -103,7 +103,7 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -188,7 +188,7 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From fca3f8610fbeb0a4a4198eb0f2fc74f91cd6e85d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:31:24 +0100 +Subject: [PATCH 110/111] Update zdot_microk_sandy-2.c + +--- + kernel/x86_64/zdot_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_sandy-2.c b/kernel/x86_64/zdot_microk_sandy-2.c +index 88d4e1bbb..33415e26e 100644 +--- a/kernel/x86_64/zdot_microk_sandy-2.c ++++ b/kernel/x86_64/zdot_microk_sandy-2.c +@@ -109,7 +109,7 @@ if ( n < 1280 ) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -201,7 +201,7 @@ if ( n < 1280 ) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From 6976222962772b395054016e99faac34986b5e59 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:32:05 +0100 +Subject: [PATCH 111/111] Update zdot_microk_steamroller-2.c + +--- + kernel/x86_64/zdot_microk_steamroller-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/zdot_microk_steamroller-2.c b/kernel/x86_64/zdot_microk_steamroller-2.c +index 2f11fe562..87138fe9a 100644 +--- a/kernel/x86_64/zdot_microk_steamroller-2.c ++++ b/kernel/x86_64/zdot_microk_steamroller-2.c +@@ -97,7 +97,7 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 +@@ -174,7 +174,7 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 diff --git a/1966.patch b/1966.patch new file mode 100644 index 0000000..c2663cd --- /dev/null +++ b/1966.patch @@ -0,0 +1,960 @@ +From 63cdd8f4a04f3a5ac1733e202b6b3678c34fb8dd Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:27:38 +0100 +Subject: [PATCH 01/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/cscal_microk_bulldozer-2.c | 32 ++++++++++++------------ + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/kernel/x86_64/cscal_microk_bulldozer-2.c b/kernel/x86_64/cscal_microk_bulldozer-2.c +index 3abffc4cf..f526fd611 100644 +--- a/kernel/x86_64/cscal_microk_bulldozer-2.c ++++ b/kernel/x86_64/cscal_microk_bulldozer-2.c +@@ -116,11 +116,11 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -208,11 +208,11 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -285,11 +285,11 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -330,11 +330,11 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + +From b6136be686e415fbdb035267c5020cb08e4e49ac Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:30:03 +0100 +Subject: [PATCH 02/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/cscal_microk_haswell-2.c | 30 +++++++++++++------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/kernel/x86_64/cscal_microk_haswell-2.c b/kernel/x86_64/cscal_microk_haswell-2.c +index 0a4eb683c..8623dcd10 100644 +--- a/kernel/x86_64/cscal_microk_haswell-2.c ++++ b/kernel/x86_64/cscal_microk_haswell-2.c +@@ -116,11 +116,11 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"0", "1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -208,9 +208,9 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 + : "cc", // "0", "1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", +@@ -285,9 +285,9 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 + : "cc", //"%0", "%1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", +@@ -329,12 +329,12 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + "vzeroupper \n\t" + +- : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ : ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"0", "1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + +From f447fb4c54870710cd6304553df59f50ff51b8f5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:32:48 +0100 +Subject: [PATCH 03/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/cscal_microk_steamroller-2.c | 32 +++++++++++----------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/kernel/x86_64/cscal_microk_steamroller-2.c b/kernel/x86_64/cscal_microk_steamroller-2.c +index 8346e1748..fbeb857e2 100644 +--- a/kernel/x86_64/cscal_microk_steamroller-2.c ++++ b/kernel/x86_64/cscal_microk_steamroller-2.c +@@ -117,11 +117,11 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"0", "1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -208,12 +208,12 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + "vzeroupper \n\t" + ++ : ++ "+r" (n), // 0 ++ "+r" (x), // 1 + : +- : +- "r" (n), // 0 +- "r" (x), // 1 + "r" (alpha) // 2 +- : "cc", //"0", "1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -286,11 +286,11 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -331,11 +331,11 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"0", "1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + +From fcd7fde5702cf7270332a5dd747f83efe7be93dd Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:35:18 +0100 +Subject: [PATCH 04/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/dscal_microk_bulldozer-2.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/kernel/x86_64/dscal_microk_bulldozer-2.c b/kernel/x86_64/dscal_microk_bulldozer-2.c +index de53b0bc4..71d3a9846 100644 +--- a/kernel/x86_64/dscal_microk_bulldozer-2.c ++++ b/kernel/x86_64/dscal_microk_bulldozer-2.c +@@ -122,9 +122,9 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n1), // 0 +- "r" (x), // 1 ++ "+r" (n1), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha), // 2 + "r" (n2) // 3 + : "cc", +@@ -188,9 +188,9 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n1), // 0 +- "r" (x), // 1 ++ "+r" (n1), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha), // 2 + "r" (n2) // 3 + : "cc", + +From 05e961994401bfc6dc8639fa9bc159148569ca9d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:36:37 +0100 +Subject: [PATCH 05/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/dscal_microk_haswell-2.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/kernel/x86_64/dscal_microk_haswell-2.c b/kernel/x86_64/dscal_microk_haswell-2.c +index e732a2718..90790cfdc 100644 +--- a/kernel/x86_64/dscal_microk_haswell-2.c ++++ b/kernel/x86_64/dscal_microk_haswell-2.c +@@ -122,9 +122,9 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n1), // 0 +- "r" (x), // 1 ++ "+r" (n1), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha), // 2 + "r" (n2) // 3 + : "cc", +@@ -187,10 +187,10 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + "vzeroupper \n\t" + ++ : ++ "+r" (n1), // 0 ++ "+r" (x), // 1 + : +- : +- "r" (n1), // 0 +- "r" (x), // 1 + "r" (alpha), // 2 + "r" (n2) // 3 + : "cc", + +From 7a11cc5b9f7c9669ee1f9818a1ea3f44c2f6d98d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:37:49 +0100 +Subject: [PATCH 06/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/dscal_microk_sandy-2.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/kernel/x86_64/dscal_microk_sandy-2.c b/kernel/x86_64/dscal_microk_sandy-2.c +index 8d855072b..0f187ba88 100644 +--- a/kernel/x86_64/dscal_microk_sandy-2.c ++++ b/kernel/x86_64/dscal_microk_sandy-2.c +@@ -122,9 +122,9 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n1), // 0 +- "r" (x), // 1 ++ "+r" (n1), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha), // 2 + "r" (n2) // 3 + : "cc", +@@ -187,10 +187,10 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + "vzeroupper \n\t" + ++ : ++ "+r" (n1), // 0 ++ "+r" (x), // 1 + : +- : +- "r" (n1), // 0 +- "r" (x), // 1 + "r" (alpha), // 2 + "r" (n2) // 3 + : "cc", + +From a6c06bffe1ec60ec359b300b8cc9e18b30c72d0d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:40:28 +0100 +Subject: [PATCH 07/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/zscal_microk_bulldozer-2.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/kernel/x86_64/zscal_microk_bulldozer-2.c b/kernel/x86_64/zscal_microk_bulldozer-2.c +index 03882d6b6..1ce59d2c7 100644 +--- a/kernel/x86_64/zscal_microk_bulldozer-2.c ++++ b/kernel/x86_64/zscal_microk_bulldozer-2.c +@@ -116,11 +116,11 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -208,11 +208,11 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + +From 5efc7ce079fd87de9ab7ca20aaaf8c5c627170fa Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:42:34 +0100 +Subject: [PATCH 08/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/zscal_microk_haswell-2.c | 32 +++++++++++++------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/kernel/x86_64/zscal_microk_haswell-2.c b/kernel/x86_64/zscal_microk_haswell-2.c +index d9253c1ed..534370959 100644 +--- a/kernel/x86_64/zscal_microk_haswell-2.c ++++ b/kernel/x86_64/zscal_microk_haswell-2.c +@@ -116,11 +116,11 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -208,11 +208,11 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -285,11 +285,11 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -330,11 +330,11 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + +From 1a1471c6be597a176a4dbfe2757c134eb3780af0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 16 Jan 2019 23:44:42 +0100 +Subject: [PATCH 09/18] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/zscal_microk_steamroller-2.c | 32 +++++++++++----------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/kernel/x86_64/zscal_microk_steamroller-2.c b/kernel/x86_64/zscal_microk_steamroller-2.c +index 97b07add6..4b489d9f3 100644 +--- a/kernel/x86_64/zscal_microk_steamroller-2.c ++++ b/kernel/x86_64/zscal_microk_steamroller-2.c +@@ -116,12 +116,12 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + "vzeroupper \n\t" + ++ : ++ "+r" (n), // 0 ++ "+r" (x), // 1 + : +- : +- "r" (n), // 0 +- "r" (x), // 1 + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -209,11 +209,11 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -286,11 +286,11 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", +@@ -331,11 +331,11 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x), // 1 ++ : + "r" (alpha) // 2 +- : "cc", //"%0", "%1", ++ : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + +From 90e28665183cd8da3a6129016977f57dd415c6a9 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:38:20 +0100 +Subject: [PATCH 10/18] Remove stray comma + +--- + kernel/x86_64/cscal_microk_bulldozer-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/cscal_microk_bulldozer-2.c b/kernel/x86_64/cscal_microk_bulldozer-2.c +index f526fd611..31451aa6c 100644 +--- a/kernel/x86_64/cscal_microk_bulldozer-2.c ++++ b/kernel/x86_64/cscal_microk_bulldozer-2.c +@@ -117,7 +117,7 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -209,7 +209,7 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -286,7 +286,7 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -331,7 +331,7 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", + +From b8dd71bddcb41d3d88af1a1eb77f845760452f5f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:39:23 +0100 +Subject: [PATCH 11/18] Remove stray comma + +--- + kernel/x86_64/cscal_microk_haswell-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/cscal_microk_haswell-2.c b/kernel/x86_64/cscal_microk_haswell-2.c +index 8623dcd10..a04a4c4ab 100644 +--- a/kernel/x86_64/cscal_microk_haswell-2.c ++++ b/kernel/x86_64/cscal_microk_haswell-2.c +@@ -117,7 +117,7 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -209,7 +209,7 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", // "0", "1", +@@ -286,7 +286,7 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", //"%0", "%1", +@@ -331,7 +331,7 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", + +From 8c9a6356eaba102124147856422b9a0570daeb55 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:40:25 +0100 +Subject: [PATCH 12/18] Remove stray comma + +--- + kernel/x86_64/cscal_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/cscal_microk_steamroller-2.c b/kernel/x86_64/cscal_microk_steamroller-2.c +index fbeb857e2..e8073d485 100644 +--- a/kernel/x86_64/cscal_microk_steamroller-2.c ++++ b/kernel/x86_64/cscal_microk_steamroller-2.c +@@ -118,7 +118,7 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -210,7 +210,7 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -287,7 +287,7 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -332,7 +332,7 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", + +From ebe8882eb23e88d410f824d8d6a113f0fca94a3b Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:41:27 +0100 +Subject: [PATCH 13/18] Remove stray comma + +--- + kernel/x86_64/dscal_microk_bulldozer-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/dscal_microk_bulldozer-2.c b/kernel/x86_64/dscal_microk_bulldozer-2.c +index 71d3a9846..096662781 100644 +--- a/kernel/x86_64/dscal_microk_bulldozer-2.c ++++ b/kernel/x86_64/dscal_microk_bulldozer-2.c +@@ -123,7 +123,7 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n1), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha), // 2 + "r" (n2) // 3 +@@ -189,7 +189,7 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n1), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha), // 2 + "r" (n2) // 3 + +From fd3e2c862286019589530ece0a61be6d86a01e92 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:42:12 +0100 +Subject: [PATCH 14/18] Remove stray comma + +--- + kernel/x86_64/dscal_microk_sandy-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/dscal_microk_sandy-2.c b/kernel/x86_64/dscal_microk_sandy-2.c +index 0f187ba88..9982b8e58 100644 +--- a/kernel/x86_64/dscal_microk_sandy-2.c ++++ b/kernel/x86_64/dscal_microk_sandy-2.c +@@ -123,7 +123,7 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n1), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha), // 2 + "r" (n2) // 3 +@@ -189,7 +189,7 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n1), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha), // 2 + "r" (n2) // 3 + +From 45339034256043b4405fd6330f918cbed3660ac4 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:43:14 +0100 +Subject: [PATCH 15/18] Remove stray comma + +--- + kernel/x86_64/dscal_microk_haswell-2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/x86_64/dscal_microk_haswell-2.c b/kernel/x86_64/dscal_microk_haswell-2.c +index 90790cfdc..77ed59a4e 100644 +--- a/kernel/x86_64/dscal_microk_haswell-2.c ++++ b/kernel/x86_64/dscal_microk_haswell-2.c +@@ -123,7 +123,7 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n1), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha), // 2 + "r" (n2) // 3 +@@ -189,7 +189,7 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n1), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha), // 2 + "r" (n2) // 3 + +From 3b0b5ce0f69a45753b126d8bd96a48de2f882a4c Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:46:05 +0100 +Subject: [PATCH 16/18] Remove stray comma + +--- + kernel/x86_64/zscal_microk_bulldozer-2.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/kernel/x86_64/zscal_microk_bulldozer-2.c b/kernel/x86_64/zscal_microk_bulldozer-2.c +index 1ce59d2c7..5e733ffda 100644 +--- a/kernel/x86_64/zscal_microk_bulldozer-2.c ++++ b/kernel/x86_64/zscal_microk_bulldozer-2.c +@@ -117,7 +117,7 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -209,7 +209,7 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -285,9 +285,9 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + "vzeroupper \n\t" + + : +- : +- "r" (n), // 0 +- "r" (x), // 1 ++ "+r" (n), // 0 ++ "+r" (x) // 1 ++ : + "r" (alpha) // 2 + : "cc", //"%0", "%1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", +@@ -329,10 +329,10 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + "vzeroupper \n\t" + ++ : ++ "+r" (n), // 0 ++ "+r" (x) // 1 + : +- : +- "r" (n), // 0 +- "r" (x), // 1 + "r" (alpha) // 2 + : "cc", //"%0", "%1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + +From c17d2f61c2387b5a6cfab22d964d70afcce69b23 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:47:12 +0100 +Subject: [PATCH 17/18] Remove stray comma + +--- + kernel/x86_64/zscal_microk_haswell-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zscal_microk_haswell-2.c b/kernel/x86_64/zscal_microk_haswell-2.c +index 534370959..8c8f5b75c 100644 +--- a/kernel/x86_64/zscal_microk_haswell-2.c ++++ b/kernel/x86_64/zscal_microk_haswell-2.c +@@ -117,7 +117,7 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -209,7 +209,7 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -286,7 +286,7 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -331,7 +331,7 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", + +From ccb2b2175751037b5625b4ec3c60ddca26a04394 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:48:40 +0100 +Subject: [PATCH 18/18] Remove stray comma + +--- + kernel/x86_64/zscal_microk_steamroller-2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/x86_64/zscal_microk_steamroller-2.c b/kernel/x86_64/zscal_microk_steamroller-2.c +index 4b489d9f3..c9267ee0c 100644 +--- a/kernel/x86_64/zscal_microk_steamroller-2.c ++++ b/kernel/x86_64/zscal_microk_steamroller-2.c +@@ -118,7 +118,7 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -210,7 +210,7 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -287,7 +287,7 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", +@@ -332,7 +332,7 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) + + : + "+r" (n), // 0 +- "+r" (x), // 1 ++ "+r" (x) // 1 + : + "r" (alpha) // 2 + : "cc", diff --git a/1967.patch b/1967.patch new file mode 100644 index 0000000..c7066fa --- /dev/null +++ b/1967.patch @@ -0,0 +1,99 @@ +From 7ff08e4b06e2c643829b566a4f2c1daba25b1029 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 00:04:44 +0100 +Subject: [PATCH 1/4] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/dger_microk_sandy-2.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/x86_64/dger_microk_sandy-2.c b/kernel/x86_64/dger_microk_sandy-2.c +index 2bf966a5f..944d4c6f1 100644 +--- a/kernel/x86_64/dger_microk_sandy-2.c ++++ b/kernel/x86_64/dger_microk_sandy-2.c +@@ -105,9 +105,9 @@ static void dger_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "vzeroupper \n\t" + + : +- : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 003583675d31ce5ddabfede7fc0f93cfbac51e5f Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 00:05:47 +0100 +Subject: [PATCH 2/4] Tag arguments 0 and 1 as both input and output + +--- + kernel/x86_64/sger_microk_sandy-2.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/x86_64/sger_microk_sandy-2.c b/kernel/x86_64/sger_microk_sandy-2.c +index 79180b991..d38fdd551 100644 +--- a/kernel/x86_64/sger_microk_sandy-2.c ++++ b/kernel/x86_64/sger_microk_sandy-2.c +@@ -105,9 +105,9 @@ static void sger_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + "vzeroupper \n\t" + + : +- : +- "r" (i), // 0 +- "r" (n), // 1 ++ "+r" (i), // 0 ++ "+r" (n), // 1 ++ : + "r" (x), // 2 + "r" (y), // 3 + "r" (alpha) // 4 + +From 78aeb19e4613104c1ae8ea1c67022451dcfed7e6 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:34:12 +0100 +Subject: [PATCH 3/4] Remove stray comma + +--- + kernel/x86_64/sger_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/sger_microk_sandy-2.c b/kernel/x86_64/sger_microk_sandy-2.c +index d38fdd551..14f13475b 100644 +--- a/kernel/x86_64/sger_microk_sandy-2.c ++++ b/kernel/x86_64/sger_microk_sandy-2.c +@@ -106,7 +106,7 @@ static void sger_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 + +From d3e7e25bfb73e16bdbf89ee07d0ab584339be2a0 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Thu, 17 Jan 2019 09:35:56 +0100 +Subject: [PATCH 4/4] Remove stray comma + +--- + kernel/x86_64/dger_microk_sandy-2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/x86_64/dger_microk_sandy-2.c b/kernel/x86_64/dger_microk_sandy-2.c +index 944d4c6f1..e8494500f 100644 +--- a/kernel/x86_64/dger_microk_sandy-2.c ++++ b/kernel/x86_64/dger_microk_sandy-2.c +@@ -106,7 +106,7 @@ static void dger_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) + + : + "+r" (i), // 0 +- "+r" (n), // 1 ++ "+r" (n) // 1 + : + "r" (x), // 2 + "r" (y), // 3 diff --git a/openblas.spec b/openblas.spec index 93e9af3..45cc85f 100644 --- a/openblas.spec +++ b/openblas.spec @@ -15,7 +15,7 @@ Name: openblas Version: 0.3.5 -Release: 4%{?dist} +Release: 5%{?dist} Summary: An optimized BLAS library based on GotoBLAS2 License: BSD URL: https://github.com/xianyi/OpenBLAS/ @@ -37,6 +37,9 @@ Patch13: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pul Patch14: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pull/2023.patch Patch15: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pull/2024.patch Patch16: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pull/2028.patch +Patch17: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pull/1965.patch +Patch18: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pull/1966.patch +Patch19: https://patch-diff.githubusercontent.com/raw/xianyi/OpenBLAS/pull/1967.patch BuildRequires: gcc BuildRequires: gcc-gfortran @@ -255,6 +258,9 @@ cd OpenBLAS-%{version} %patch14 -p1 %patch15 -p1 %patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 # Fix source permissions find -name \*.f -exec chmod 644 {} \; @@ -691,6 +697,9 @@ rm -rf %{buildroot}%{_libdir}/pkgconfig %endif %changelog +* Tue Feb 26 2019 Susi Lehtola - 0.3.5-5 +- Even more assembly kernel patches. + * Mon Feb 25 2019 Susi Lehtola - 0.3.5-4 - Another assembly kernel patch.