Compare commits
9 Commits
108e16551f
...
e68b99ed3b
Author | SHA1 | Date | |
---|---|---|---|
e68b99ed3b | |||
|
0dd29cbca1 | ||
|
4c0dcdd8a4 | ||
|
b316bfef29 | ||
|
fc952d7fda | ||
|
8f143edfab | ||
|
54c877216e | ||
|
5123a10ecd | ||
|
529f54963b |
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1 @@
|
||||
/v*.tar.gz
|
||||
/openblas-*.tar.gz
|
||||
/OpenBLAS-*.tar.gz
|
||||
|
@ -1,64 +0,0 @@
|
||||
From f703846ad9400a8ea175cb8dd43e18c152aeab93 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Sat, 13 Aug 2022 11:38:27 +0200
|
||||
Subject: [PATCH] Add function prototypes
|
||||
Content-type: text/plain
|
||||
|
||||
---
|
||||
exports/gensymbol | 16 ++++++++++++++++
|
||||
exports/gensymbol.pl | 12 ++++++++++++
|
||||
2 files changed, 28 insertions(+)
|
||||
|
||||
diff --git a/exports/gensymbol b/exports/gensymbol
|
||||
index 83222a21..f05de626 100755
|
||||
--- a/exports/gensymbol
|
||||
+++ b/exports/gensymbol
|
||||
@@ -4000,6 +4000,22 @@ case "$p1" in
|
||||
no_underscore_objs="$no_underscore_objs $misc_common_objs"
|
||||
|
||||
printf 'int main(void){\n'
|
||||
+ for obj in $underscore_objs; do
|
||||
+ [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
|
||||
+ "$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||
+ done
|
||||
+
|
||||
+ for obj in $need_2underscore_objs; do
|
||||
+ printf 'extern void %s%s%s%s%s();\n' \
|
||||
+ "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
|
||||
+ done
|
||||
+
|
||||
+ for obj in $no_underscore_objs; do
|
||||
+ printf 'extern void %s%s%s();\n' \
|
||||
+ "$symbolprefix" "$obj" "$symbolsuffix"
|
||||
+ done
|
||||
+
|
||||
+ printf '\n'
|
||||
for obj in $underscore_objs; do
|
||||
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||
diff --git a/exports/gensymbol.pl b/exports/gensymbol.pl
|
||||
index ac62bc05..e38a3cc8 100644
|
||||
--- a/exports/gensymbol.pl
|
||||
+++ b/exports/gensymbol.pl
|
||||
@@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") {
|
||||
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
|
||||
|
||||
print "int main(void){\n";
|
||||
+ foreach $objs (@underscore_objs) {
|
||||
+ print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||
+ }
|
||||
+
|
||||
+ foreach $objs (@need_2underscore_objs) {
|
||||
+ print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
|
||||
+ }
|
||||
+
|
||||
+ foreach $objs (@no_underscore_objs) {
|
||||
+ print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
|
||||
+ }
|
||||
+
|
||||
foreach $objs (@underscore_objs) {
|
||||
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,510 +0,0 @@
|
||||
From 515cf269291bec0d43651fe7bf99a71fb074a0ad Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed, 14 Sep 2022 11:48:36 +0200
|
||||
Subject: [PATCH] Fix pointer/integer argument mismatch in calls to pow()
|
||||
Content-type: text/plain
|
||||
|
||||
---
|
||||
lapack-netlib/SRC/claed0.c | 4 ++--
|
||||
lapack-netlib/SRC/claed7.c | 4 ++--
|
||||
lapack-netlib/SRC/clalsa.c | 6 +++---
|
||||
lapack-netlib/SRC/cstedc.c | 4 ++--
|
||||
lapack-netlib/SRC/dlaed0.c | 4 ++--
|
||||
lapack-netlib/SRC/dlaed7.c | 4 ++--
|
||||
lapack-netlib/SRC/dlaeda.c | 8 ++++----
|
||||
lapack-netlib/SRC/dlalsa.c | 6 +++---
|
||||
lapack-netlib/SRC/dlasd0.c | 2 +-
|
||||
lapack-netlib/SRC/dlasda.c | 4 ++--
|
||||
lapack-netlib/SRC/dstedc.c | 4 ++--
|
||||
lapack-netlib/SRC/slaed0.c | 4 ++--
|
||||
lapack-netlib/SRC/slaed7.c | 4 ++--
|
||||
lapack-netlib/SRC/slaeda.c | 8 ++++----
|
||||
lapack-netlib/SRC/slalsa.c | 6 +++---
|
||||
lapack-netlib/SRC/slasd0.c | 2 +-
|
||||
lapack-netlib/SRC/slasda.c | 4 ++--
|
||||
lapack-netlib/SRC/sstedc.c | 4 ++--
|
||||
lapack-netlib/SRC/zlaed0.c | 4 ++--
|
||||
lapack-netlib/SRC/zlaed7.c | 4 ++--
|
||||
lapack-netlib/SRC/zlalsa.c | 6 +++---
|
||||
lapack-netlib/SRC/zstedc.c | 4 ++--
|
||||
22 files changed, 50 insertions(+), 50 deletions(-)
|
||||
|
||||
diff --git a/lapack-netlib/SRC/claed0.c b/lapack-netlib/SRC/claed0.c
|
||||
index 21e40839..2b696508 100644
|
||||
--- a/lapack-netlib/SRC/claed0.c
|
||||
+++ b/lapack-netlib/SRC/claed0.c
|
||||
@@ -796,10 +796,10 @@ L10:
|
||||
|
||||
temp = log((real) (*n)) / log(2.f);
|
||||
lgn = (integer) temp;
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
diff --git a/lapack-netlib/SRC/claed7.c b/lapack-netlib/SRC/claed7.c
|
||||
index 49fc9ed4..1eaa7e9c 100644
|
||||
--- a/lapack-netlib/SRC/claed7.c
|
||||
+++ b/lapack-netlib/SRC/claed7.c
|
||||
@@ -864,11 +864,11 @@ f"> */
|
||||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
- ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
+ ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
- ptr += pow_ii(&c__2, &i__2);
|
||||
+ ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
diff --git a/lapack-netlib/SRC/clalsa.c b/lapack-netlib/SRC/clalsa.c
|
||||
index 4bc3830a..2ef3e123 100644
|
||||
--- a/lapack-netlib/SRC/clalsa.c
|
||||
+++ b/lapack-netlib/SRC/clalsa.c
|
||||
@@ -1051,7 +1051,7 @@ f"> */
|
||||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
- j = pow_ii(&c__2, &nlvl);
|
||||
+ j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
@@ -1065,7 +1065,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
@@ -1110,7 +1110,7 @@ L170:
|
||||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__2);
|
||||
+ lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
diff --git a/lapack-netlib/SRC/cstedc.c b/lapack-netlib/SRC/cstedc.c
|
||||
index 437c39e9..8f047d1c 100644
|
||||
--- a/lapack-netlib/SRC/cstedc.c
|
||||
+++ b/lapack-netlib/SRC/cstedc.c
|
||||
@@ -836,10 +836,10 @@ f"> */
|
||||
lrwmin = *n - 1 << 1;
|
||||
} else if (icompz == 1) {
|
||||
lgn = (integer) (log((real) (*n)) / log(2.f));
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
lwmin = *n * *n;
|
||||
diff --git a/lapack-netlib/SRC/dlaed0.c b/lapack-netlib/SRC/dlaed0.c
|
||||
index 95e39b0d..74e58dd2 100644
|
||||
--- a/lapack-netlib/SRC/dlaed0.c
|
||||
+++ b/lapack-netlib/SRC/dlaed0.c
|
||||
@@ -827,10 +827,10 @@ L10:
|
||||
|
||||
temp = log((doublereal) (*n)) / log(2.);
|
||||
lgn = (integer) temp;
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
diff --git a/lapack-netlib/SRC/dlaed7.c b/lapack-netlib/SRC/dlaed7.c
|
||||
index fd851526..d23a72be 100644
|
||||
--- a/lapack-netlib/SRC/dlaed7.c
|
||||
+++ b/lapack-netlib/SRC/dlaed7.c
|
||||
@@ -885,11 +885,11 @@ f"> */
|
||||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
- ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
+ ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
- ptr += pow_ii(&c__2, &i__2);
|
||||
+ ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
diff --git a/lapack-netlib/SRC/dlaeda.c b/lapack-netlib/SRC/dlaeda.c
|
||||
index f4bb214d..202e1b63 100644
|
||||
--- a/lapack-netlib/SRC/dlaeda.c
|
||||
+++ b/lapack-netlib/SRC/dlaeda.c
|
||||
@@ -754,7 +754,7 @@ f"> */
|
||||
/* scheme */
|
||||
|
||||
i__1 = *curlvl - 1;
|
||||
- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
|
||||
+ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
|
||||
|
||||
/* Determine size of these matrices. We add HALF to the value of */
|
||||
/* the SQRT in case the machine underestimates one of these square */
|
||||
@@ -781,12 +781,12 @@ f"> */
|
||||
/* rotations and permutation and then multiplying the center matrices */
|
||||
/* against the current Z. */
|
||||
|
||||
- ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
+ ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (k = 1; k <= i__1; ++k) {
|
||||
i__2 = *curlvl - k;
|
||||
i__3 = *curlvl - k - 1;
|
||||
- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
|
||||
+ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
|
||||
1;
|
||||
psiz1 = prmptr[curr + 1] - prmptr[curr];
|
||||
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
|
||||
@@ -847,7 +847,7 @@ f"> */
|
||||
c__1);
|
||||
|
||||
i__2 = *tlvls - k;
|
||||
- ptr += pow_ii(&c__2, &i__2);
|
||||
+ ptr += pow_ii(c__2, i__2);
|
||||
/* L70: */
|
||||
}
|
||||
|
||||
diff --git a/lapack-netlib/SRC/dlalsa.c b/lapack-netlib/SRC/dlalsa.c
|
||||
index 891ed66a..4d5c347c 100644
|
||||
--- a/lapack-netlib/SRC/dlalsa.c
|
||||
+++ b/lapack-netlib/SRC/dlalsa.c
|
||||
@@ -951,7 +951,7 @@ f"> */
|
||||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
- j = pow_ii(&c__2, &nlvl);
|
||||
+ j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
@@ -965,7 +965,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
@@ -1010,7 +1010,7 @@ L50:
|
||||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__2);
|
||||
+ lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
diff --git a/lapack-netlib/SRC/dlasd0.c b/lapack-netlib/SRC/dlasd0.c
|
||||
index c702665b..0f88527e 100644
|
||||
--- a/lapack-netlib/SRC/dlasd0.c
|
||||
+++ b/lapack-netlib/SRC/dlasd0.c
|
||||
@@ -824,7 +824,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
diff --git a/lapack-netlib/SRC/dlasda.c b/lapack-netlib/SRC/dlasda.c
|
||||
index 72f9d55f..a9190f80 100644
|
||||
--- a/lapack-netlib/SRC/dlasda.c
|
||||
+++ b/lapack-netlib/SRC/dlasda.c
|
||||
@@ -1027,7 +1027,7 @@ f"> */
|
||||
|
||||
/* Now conquer each subproblem bottom-up. */
|
||||
|
||||
- j = pow_ii(&c__2, &nlvl);
|
||||
+ j = pow_ii(c__2, nlvl);
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
lvl2 = (lvl << 1) - 1;
|
||||
|
||||
@@ -1039,7 +1039,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
diff --git a/lapack-netlib/SRC/dstedc.c b/lapack-netlib/SRC/dstedc.c
|
||||
index ef2eeabe..56511d6c 100644
|
||||
--- a/lapack-netlib/SRC/dstedc.c
|
||||
+++ b/lapack-netlib/SRC/dstedc.c
|
||||
@@ -806,10 +806,10 @@ f"> */
|
||||
lwmin = *n - 1 << 1;
|
||||
} else {
|
||||
lgn = (integer) (log((doublereal) (*n)) / log(2.));
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (icompz == 1) {
|
||||
diff --git a/lapack-netlib/SRC/slaed0.c b/lapack-netlib/SRC/slaed0.c
|
||||
index 33f7134c..4c523090 100644
|
||||
--- a/lapack-netlib/SRC/slaed0.c
|
||||
+++ b/lapack-netlib/SRC/slaed0.c
|
||||
@@ -823,10 +823,10 @@ L10:
|
||||
|
||||
temp = log((real) (*n)) / log(2.f);
|
||||
lgn = (integer) temp;
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
diff --git a/lapack-netlib/SRC/slaed7.c b/lapack-netlib/SRC/slaed7.c
|
||||
index 210d796d..22fcaf76 100644
|
||||
--- a/lapack-netlib/SRC/slaed7.c
|
||||
+++ b/lapack-netlib/SRC/slaed7.c
|
||||
@@ -883,11 +883,11 @@ f"> */
|
||||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
- ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
+ ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
- ptr += pow_ii(&c__2, &i__2);
|
||||
+ ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
diff --git a/lapack-netlib/SRC/slaeda.c b/lapack-netlib/SRC/slaeda.c
|
||||
index 7edaf8a7..3806427c 100644
|
||||
--- a/lapack-netlib/SRC/slaeda.c
|
||||
+++ b/lapack-netlib/SRC/slaeda.c
|
||||
@@ -753,7 +753,7 @@ f"> */
|
||||
/* scheme */
|
||||
|
||||
i__1 = *curlvl - 1;
|
||||
- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
|
||||
+ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
|
||||
|
||||
/* Determine size of these matrices. We add HALF to the value of */
|
||||
/* the SQRT in case the machine underestimates one of these square */
|
||||
@@ -779,12 +779,12 @@ f"> */
|
||||
/* rotations and permutation and then multiplying the center matrices */
|
||||
/* against the current Z. */
|
||||
|
||||
- ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
+ ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (k = 1; k <= i__1; ++k) {
|
||||
i__2 = *curlvl - k;
|
||||
i__3 = *curlvl - k - 1;
|
||||
- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
|
||||
+ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
|
||||
1;
|
||||
psiz1 = prmptr[curr + 1] - prmptr[curr];
|
||||
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
|
||||
@@ -844,7 +844,7 @@ f"> */
|
||||
c__1);
|
||||
|
||||
i__2 = *tlvls - k;
|
||||
- ptr += pow_ii(&c__2, &i__2);
|
||||
+ ptr += pow_ii(c__2, i__2);
|
||||
/* L70: */
|
||||
}
|
||||
|
||||
diff --git a/lapack-netlib/SRC/slalsa.c b/lapack-netlib/SRC/slalsa.c
|
||||
index 53da2c7b..77a79b80 100644
|
||||
--- a/lapack-netlib/SRC/slalsa.c
|
||||
+++ b/lapack-netlib/SRC/slalsa.c
|
||||
@@ -946,7 +946,7 @@ f"> */
|
||||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
- j = pow_ii(&c__2, &nlvl);
|
||||
+ j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
@@ -960,7 +960,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
@@ -1005,7 +1005,7 @@ L50:
|
||||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__2);
|
||||
+ lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
diff --git a/lapack-netlib/SRC/slasd0.c b/lapack-netlib/SRC/slasd0.c
|
||||
index aa553579..be1a7419 100644
|
||||
--- a/lapack-netlib/SRC/slasd0.c
|
||||
+++ b/lapack-netlib/SRC/slasd0.c
|
||||
@@ -821,7 +821,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
diff --git a/lapack-netlib/SRC/slasda.c b/lapack-netlib/SRC/slasda.c
|
||||
index 71424c3f..1d336d1c 100644
|
||||
--- a/lapack-netlib/SRC/slasda.c
|
||||
+++ b/lapack-netlib/SRC/slasda.c
|
||||
@@ -1023,7 +1023,7 @@ f"> */
|
||||
|
||||
/* Now conquer each subproblem bottom-up. */
|
||||
|
||||
- j = pow_ii(&c__2, &nlvl);
|
||||
+ j = pow_ii(c__2, nlvl);
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
lvl2 = (lvl << 1) - 1;
|
||||
|
||||
@@ -1035,7 +1035,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
diff --git a/lapack-netlib/SRC/sstedc.c b/lapack-netlib/SRC/sstedc.c
|
||||
index 46ed15a1..61ad3dd3 100644
|
||||
--- a/lapack-netlib/SRC/sstedc.c
|
||||
+++ b/lapack-netlib/SRC/sstedc.c
|
||||
@@ -804,10 +804,10 @@ f"> */
|
||||
lwmin = *n - 1 << 1;
|
||||
} else {
|
||||
lgn = (integer) (log((real) (*n)) / log(2.f));
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
if (icompz == 1) {
|
||||
diff --git a/lapack-netlib/SRC/zlaed0.c b/lapack-netlib/SRC/zlaed0.c
|
||||
index 37bd12b0..2b25f6e4 100644
|
||||
--- a/lapack-netlib/SRC/zlaed0.c
|
||||
+++ b/lapack-netlib/SRC/zlaed0.c
|
||||
@@ -793,10 +793,10 @@ L10:
|
||||
|
||||
temp = log((doublereal) (*n)) / log(2.);
|
||||
lgn = (integer) temp;
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
iprmpt = indxq + *n + 1;
|
||||
diff --git a/lapack-netlib/SRC/zlaed7.c b/lapack-netlib/SRC/zlaed7.c
|
||||
index 09305191..8665ee12 100644
|
||||
--- a/lapack-netlib/SRC/zlaed7.c
|
||||
+++ b/lapack-netlib/SRC/zlaed7.c
|
||||
@@ -864,11 +864,11 @@ f"> */
|
||||
/* Form the z-vector which consists of the last row of Q_1 and the */
|
||||
/* first row of Q_2. */
|
||||
|
||||
- ptr = pow_ii(&c__2, tlvls) + 1;
|
||||
+ ptr = pow_ii(c__2, *tlvls) + 1;
|
||||
i__1 = *curlvl - 1;
|
||||
for (i__ = 1; i__ <= i__1; ++i__) {
|
||||
i__2 = *tlvls - i__;
|
||||
- ptr += pow_ii(&c__2, &i__2);
|
||||
+ ptr += pow_ii(c__2, i__2);
|
||||
/* L10: */
|
||||
}
|
||||
curr = ptr + *curpbm;
|
||||
diff --git a/lapack-netlib/SRC/zlalsa.c b/lapack-netlib/SRC/zlalsa.c
|
||||
index d17016e7..cd0819c3 100644
|
||||
--- a/lapack-netlib/SRC/zlalsa.c
|
||||
+++ b/lapack-netlib/SRC/zlalsa.c
|
||||
@@ -1051,7 +1051,7 @@ f"> */
|
||||
/* Finally go through the left singular vector matrices of all */
|
||||
/* the other subproblems bottom-up on the tree. */
|
||||
|
||||
- j = pow_ii(&c__2, &nlvl);
|
||||
+ j = pow_ii(c__2, nlvl);
|
||||
sqre = 0;
|
||||
|
||||
for (lvl = nlvl; lvl >= 1; --lvl) {
|
||||
@@ -1065,7 +1065,7 @@ f"> */
|
||||
ll = 1;
|
||||
} else {
|
||||
i__1 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__1);
|
||||
+ lf = pow_ii(c__2, i__1);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__1 = ll;
|
||||
@@ -1110,7 +1110,7 @@ L170:
|
||||
ll = 1;
|
||||
} else {
|
||||
i__2 = lvl - 1;
|
||||
- lf = pow_ii(&c__2, &i__2);
|
||||
+ lf = pow_ii(c__2, i__2);
|
||||
ll = (lf << 1) - 1;
|
||||
}
|
||||
i__2 = lf;
|
||||
diff --git a/lapack-netlib/SRC/zstedc.c b/lapack-netlib/SRC/zstedc.c
|
||||
index 4cfc4184..55baba2d 100644
|
||||
--- a/lapack-netlib/SRC/zstedc.c
|
||||
+++ b/lapack-netlib/SRC/zstedc.c
|
||||
@@ -836,10 +836,10 @@ f"> */
|
||||
lrwmin = *n - 1 << 1;
|
||||
} else if (icompz == 1) {
|
||||
lgn = (integer) (log((doublereal) (*n)) / log(2.));
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
- if (pow_ii(&c__2, &lgn) < *n) {
|
||||
+ if (pow_ii(c__2, lgn) < *n) {
|
||||
++lgn;
|
||||
}
|
||||
lwmin = *n * *n;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,26 +0,0 @@
|
||||
From 91110f92d218492d0efbdc1fdf34277ca45f4b36 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed, 14 Sep 2022 14:03:31 +0200
|
||||
Subject: [PATCH] fix missing return type in function declaration
|
||||
Content-type: text/plain
|
||||
|
||||
---
|
||||
ctest/c_sblat1c.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/ctest/c_sblat1c.c b/ctest/c_sblat1c.c
|
||||
index 4993d31b..57e4707a 100644
|
||||
--- a/ctest/c_sblat1c.c
|
||||
+++ b/ctest/c_sblat1c.c
|
||||
@@ -969,7 +969,7 @@ real *sfac;
|
||||
1.17 };
|
||||
|
||||
/* Local variables */
|
||||
- extern /* Subroutine */ srottest_();
|
||||
+ extern /* Subroutine */ void srottest_();
|
||||
static integer i__, k, ksize;
|
||||
extern /* Subroutine */ int stest_(), srotmtest_();
|
||||
static integer ki, kn;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,26 +0,0 @@
|
||||
From 9402df5604e69f86f58953e3883f33f98c930baf Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed, 14 Sep 2022 21:44:34 +0200
|
||||
Subject: [PATCH] Fix missing external declaration
|
||||
Content-type: text/plain
|
||||
|
||||
---
|
||||
driver/others/blas_server_omp.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c
|
||||
index 1a5fd06a..c158f92e 100644
|
||||
--- a/driver/others/blas_server_omp.c
|
||||
+++ b/driver/others/blas_server_omp.c
|
||||
@@ -69,6 +69,8 @@
|
||||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
+extern int openblas_omp_adaptive_env();
|
||||
+
|
||||
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
|
||||
#ifdef HAVE_C11
|
||||
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,62 +0,0 @@
|
||||
From 101a2c77c3f3610933f450cefca3e312edab2186 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Thu, 15 Sep 2022 09:19:19 +0200
|
||||
Subject: [PATCH] Fix warnings
|
||||
Content-type: text/plain
|
||||
|
||||
---
|
||||
kernel/x86_64/dgemm_ncopy_8_skylakex.c | 24 ++++++++++++------------
|
||||
kernel/x86_64/omatcopy_rt.c | 2 +-
|
||||
2 files changed, 13 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/kernel/x86_64/dgemm_ncopy_8_skylakex.c b/kernel/x86_64/dgemm_ncopy_8_skylakex.c
|
||||
index 74b336f3..874ef68d 100644
|
||||
--- a/kernel/x86_64/dgemm_ncopy_8_skylakex.c
|
||||
+++ b/kernel/x86_64/dgemm_ncopy_8_skylakex.c
|
||||
@@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __
|
||||
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
|
||||
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
|
||||
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
|
||||
- FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
|
||||
- FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
|
||||
- FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
|
||||
- FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
|
||||
- FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
|
||||
- FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
|
||||
- FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
|
||||
- FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
|
||||
- FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
|
||||
- FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
|
||||
- FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
|
||||
- FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
|
||||
+ FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ;
|
||||
+ FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24;
|
||||
+ FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ;
|
||||
+ FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32;
|
||||
+ FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ;
|
||||
+ FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40;
|
||||
+ FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ;
|
||||
+ FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48;
|
||||
+ FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ;
|
||||
+ FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56;
|
||||
+ FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ;
|
||||
+ FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64;
|
||||
|
||||
|
||||
aoffset = a;
|
||||
diff --git a/kernel/x86_64/omatcopy_rt.c b/kernel/x86_64/omatcopy_rt.c
|
||||
index e695f00c..b11893f5 100644
|
||||
--- a/kernel/x86_64/omatcopy_rt.c
|
||||
+++ b/kernel/x86_64/omatcopy_rt.c
|
||||
@@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\
|
||||
}
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){
|
||||
- float *src, *dst, *dst_tmp, *src_base, *dst_base;
|
||||
+ float *src, *dst, *dst_tmp=0, *src_base, *dst_base;
|
||||
uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0;
|
||||
BLASLONG cols_left, rows_done; float ALPHA = alpha;
|
||||
if(ALPHA==0.0){
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
Fixing FTBFS on power:
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=2120974
|
||||
|
||||
Upstream issue: https://github.com/xianyi/OpenBLAS/issues/3738
|
||||
Upstream fix: https://github.com/xianyi/OpenBLAS/pull/3718
|
||||
|
||||
commit d9dc015cfc78fc32f555995a89d6957ef0184ea2
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Mon Aug 8 14:52:10 2022 +0200
|
||||
|
||||
Use blasint for INTERFACE64 compatibility
|
||||
|
||||
diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c
|
||||
index a2c358cf..d4b59145 100644
|
||||
--- a/test/compare_sgemm_sbgemm.c
|
||||
+++ b/test/compare_sgemm_sbgemm.c
|
||||
@@ -76,9 +76,9 @@ float16to32 (bfloat16_bits f16)
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
- int m, n, k;
|
||||
+ blasint m, n, k;
|
||||
int i, j, l;
|
||||
- int x;
|
||||
+ blasint x;
|
||||
int ret = 0;
|
||||
int loop = 100;
|
||||
char transA = 'N', transB = 'N';
|
||||
|
||||
commit 3d338b57de1837f1e2264a1262a9ee9203f31c6c
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Mon Aug 8 17:09:45 2022 +0200
|
||||
|
||||
remove spurious loops
|
||||
|
||||
diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c
|
||||
index d4b59145..276fecae 100644
|
||||
--- a/test/compare_sgemm_sbgemm.c
|
||||
+++ b/test/compare_sgemm_sbgemm.c
|
||||
@@ -112,7 +112,6 @@ main (int argc, char *argv[])
|
||||
&m, BB, &k, &beta, CC, &m);
|
||||
for (i = 0; i < n; i++)
|
||||
for (j = 0; j < m; j++)
|
||||
- for (l = 0; l < k; l++)
|
||||
if (fabs (CC[i * m + j] - C[i * m + j]) > 1.0)
|
||||
ret++;
|
||||
if (transA == 'N' && transB == 'N')
|
||||
@@ -126,7 +125,6 @@ main (int argc, char *argv[])
|
||||
}
|
||||
for (i = 0; i < n; i++)
|
||||
for (j = 0; j < m; j++)
|
||||
- for (l = 0; l < k; l++)
|
||||
if (CC[i * m + j] != DD[i * m + j])
|
||||
ret++;
|
||||
}
|
752
openblas-0.3.26-incompatibletypes.patch
Normal file
752
openblas-0.3.26-incompatibletypes.patch
Normal file
@ -0,0 +1,752 @@
|
||||
This is a compilation of more upstream commits related to:
|
||||
https://github.com/OpenMathLib/OpenBLAS/issues/4475
|
||||
|
||||
|
||||
From 63004fa5f76ef1058975271314bc4591e7878726 Mon Sep 17 00:00:00 2001
|
||||
From: Honza Horak <hhorak@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 09:49:41 +0100
|
||||
Subject: [PATCH 1/6] Fix incompatible pointer type in BFLOAT16 mode
|
||||
|
||||
Upstream commit:
|
||||
|
||||
commit 68d354814f9f846338e1988c4f609c8add419012
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Sun Feb 4 01:14:22 2024 +0100
|
||||
|
||||
Fix incompatible pointer type in BFLOAT16 mode
|
||||
---
|
||||
interface/gemmt.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/interface/gemmt.c b/interface/gemmt.c
|
||||
index 046432670..2fb9954ad 100644
|
||||
--- a/interface/gemmt.c
|
||||
+++ b/interface/gemmt.c
|
||||
@@ -478,7 +478,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||
#endif
|
||||
// for alignment
|
||||
buffer_size = (buffer_size + 3) & ~3;
|
||||
- STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||
+ STACK_ALLOC(buffer_size, IFLOAT, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
@@ -567,7 +567,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||
#endif
|
||||
// for alignment
|
||||
buffer_size = (buffer_size + 3) & ~3;
|
||||
- STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||
+ STACK_ALLOC(buffer_size, IFLOAT, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
||||
From edfd4f52f3f22344863c233411ae792fb12aa81b Mon Sep 17 00:00:00 2001
|
||||
From: Honza Horak <hhorak@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 09:53:40 +0100
|
||||
Subject: [PATCH 2/6] Separate the interface for SBGEMMT from GEMMT due to
|
||||
differences in GEMV arguments
|
||||
|
||||
Upstream commit:
|
||||
|
||||
commit d4db6a9f16a5c82bbe1860f591cc731c4d83d7c8
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Tue Feb 6 22:23:47 2024 +0100
|
||||
|
||||
Separate the interface for SBGEMMT from GEMMT due to differences in GEMV arguments
|
||||
---
|
||||
interface/CMakeLists.txt | 1 +
|
||||
interface/Makefile | 4 +-
|
||||
interface/sbgemmt.c | 447 +++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 450 insertions(+), 2 deletions(-)
|
||||
create mode 100644 interface/sbgemmt.c
|
||||
|
||||
diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt
|
||||
index 4e082928b..3110f2e90 100644
|
||||
--- a/interface/CMakeLists.txt
|
||||
+++ b/interface/CMakeLists.txt
|
||||
@@ -119,6 +119,7 @@ endif ()
|
||||
if (BUILD_BFLOAT16)
|
||||
GenerateNamedObjects("bf16dot.c" "" "sbdot" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("gemm.c" "" "sbgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
+ GenerateNamedObjects("gemmt.c" "" "sbgemmt" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("sbgemv.c" "" "sbgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("tobf16.c" "SINGLE_PREC" "sbstobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("tobf16.c" "DOUBLE_PREC" "sbdtobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
diff --git a/interface/Makefile b/interface/Makefile
|
||||
index 78335357b..d106ca568 100644
|
||||
--- a/interface/Makefile
|
||||
+++ b/interface/Makefile
|
||||
@@ -1301,7 +1301,7 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
-sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
+sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : sbgemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
endif
|
||||
|
||||
@@ -1932,7 +1932,7 @@ cblas_sgemmt.$(SUFFIX) cblas_sgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
-cblas_sbgemmt.$(SUFFIX) cblas_sbgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
+cblas_sbgemmt.$(SUFFIX) cblas_sbgemmt.$(PSUFFIX) : sbgemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
endif
|
||||
|
||||
diff --git a/interface/sbgemmt.c b/interface/sbgemmt.c
|
||||
new file mode 100644
|
||||
index 000000000..759af4bfb
|
||||
--- /dev/null
|
||||
+++ b/interface/sbgemmt.c
|
||||
@@ -0,0 +1,447 @@
|
||||
+/*********************************************************************/
|
||||
+/* Copyright 2024, The OpenBLAS Project. */
|
||||
+/* All rights reserved. */
|
||||
+/* */
|
||||
+/* Redistribution and use in source and binary forms, with or */
|
||||
+/* without modification, are permitted provided that the following */
|
||||
+/* conditions are met: */
|
||||
+/* */
|
||||
+/* 1. Redistributions of source code must retain the above */
|
||||
+/* copyright notice, this list of conditions and the following */
|
||||
+/* disclaimer. */
|
||||
+/* */
|
||||
+/* 2. Redistributions in binary form must reproduce the above */
|
||||
+/* copyright notice, this list of conditions and the following */
|
||||
+/* disclaimer in the documentation and/or other materials */
|
||||
+/* provided with the distribution. */
|
||||
+/* */
|
||||
+/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
+/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
+/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
+/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
+/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
+/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
+/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
+/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
+/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
+/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
+/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
+/* */
|
||||
+/*********************************************************************/
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include "common.h"
|
||||
+
|
||||
+#define SMP_THRESHOLD_MIN 65536.0
|
||||
+#define ERROR_NAME "SBGEMMT "
|
||||
+
|
||||
+#ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
+#define GEMM_MULTITHREAD_THRESHOLD 4
|
||||
+#endif
|
||||
+
|
||||
+#ifndef CBLAS
|
||||
+
|
||||
+void NAME(char *UPLO, char *TRANSA, char *TRANSB,
|
||||
+ blasint * M, blasint * K,
|
||||
+ FLOAT * Alpha,
|
||||
+ IFLOAT * a, blasint * ldA,
|
||||
+ IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
|
||||
+{
|
||||
+
|
||||
+ blasint m, k;
|
||||
+ blasint lda, ldb, ldc;
|
||||
+ int transa, transb, uplo;
|
||||
+ blasint info;
|
||||
+
|
||||
+ char transA, transB, Uplo;
|
||||
+ blasint nrowa, nrowb;
|
||||
+ IFLOAT *buffer;
|
||||
+ IFLOAT *aa, *bb;
|
||||
+ FLOAT *cc;
|
||||
+ FLOAT alpha, beta;
|
||||
+
|
||||
+ PRINT_DEBUG_NAME;
|
||||
+
|
||||
+ m = *M;
|
||||
+ k = *K;
|
||||
+
|
||||
+ alpha = *Alpha;
|
||||
+ beta = *Beta;
|
||||
+
|
||||
+ lda = *ldA;
|
||||
+ ldb = *ldB;
|
||||
+ ldc = *ldC;
|
||||
+
|
||||
+ transA = *TRANSA;
|
||||
+ transB = *TRANSB;
|
||||
+ Uplo = *UPLO;
|
||||
+ TOUPPER(transA);
|
||||
+ TOUPPER(transB);
|
||||
+ TOUPPER(Uplo);
|
||||
+
|
||||
+ transa = -1;
|
||||
+ transb = -1;
|
||||
+ uplo = -1;
|
||||
+
|
||||
+ if (transA == 'N')
|
||||
+ transa = 0;
|
||||
+ if (transA == 'T')
|
||||
+ transa = 1;
|
||||
+
|
||||
+ if (transA == 'R')
|
||||
+ transa = 0;
|
||||
+ if (transA == 'C')
|
||||
+ transa = 1;
|
||||
+
|
||||
+ if (transB == 'N')
|
||||
+ transb = 0;
|
||||
+ if (transB == 'T')
|
||||
+ transb = 1;
|
||||
+
|
||||
+ if (transB == 'R')
|
||||
+ transb = 0;
|
||||
+ if (transB == 'C')
|
||||
+ transb = 1;
|
||||
+
|
||||
+ if (Uplo == 'U')
|
||||
+ uplo = 0;
|
||||
+ if (Uplo == 'L')
|
||||
+ uplo = 1;
|
||||
+ nrowa = m;
|
||||
+ if (transa & 1) nrowa = k;
|
||||
+ nrowb = k;
|
||||
+ if (transb & 1) nrowb = m;
|
||||
+
|
||||
+ info = 0;
|
||||
+
|
||||
+ if (ldc < MAX(1, m))
|
||||
+ info = 13;
|
||||
+ if (ldb < MAX(1, nrowb))
|
||||
+ info = 10;
|
||||
+ if (lda < MAX(1, nrowa))
|
||||
+ info = 8;
|
||||
+ if (k < 0)
|
||||
+ info = 5;
|
||||
+ if (m < 0)
|
||||
+ info = 4;
|
||||
+ if (transb < 0)
|
||||
+ info = 3;
|
||||
+ if (transa < 0)
|
||||
+ info = 2;
|
||||
+ if (uplo < 0)
|
||||
+ info = 1;
|
||||
+
|
||||
+ if (info != 0) {
|
||||
+ BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
+ return;
|
||||
+ }
|
||||
+#else
|
||||
+
|
||||
+void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||
+ enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m,
|
||||
+ blasint k,
|
||||
+ FLOAT alpha,
|
||||
+ IFLOAT * A, blasint LDA,
|
||||
+ IFLOAT * B, blasint LDB, FLOAT beta, FLOAT * c, blasint ldc)
|
||||
+{
|
||||
+ IFLOAT *aa, *bb;
|
||||
+ FLOAT *cc;
|
||||
+
|
||||
+ int transa, transb, uplo;
|
||||
+ blasint info;
|
||||
+ blasint lda, ldb;
|
||||
+ IFLOAT *a, *b;
|
||||
+ XFLOAT *buffer;
|
||||
+
|
||||
+ PRINT_DEBUG_CNAME;
|
||||
+
|
||||
+ uplo = -1;
|
||||
+ transa = -1;
|
||||
+ transb = -1;
|
||||
+ info = 0;
|
||||
+
|
||||
+ if (order == CblasColMajor) {
|
||||
+ if (Uplo == CblasUpper) uplo = 0;
|
||||
+ if (Uplo == CblasLower) uplo = 1;
|
||||
+
|
||||
+ if (TransA == CblasNoTrans)
|
||||
+ transa = 0;
|
||||
+ if (TransA == CblasTrans)
|
||||
+ transa = 1;
|
||||
+
|
||||
+ if (TransA == CblasConjNoTrans)
|
||||
+ transa = 0;
|
||||
+ if (TransA == CblasConjTrans)
|
||||
+ transa = 1;
|
||||
+
|
||||
+ if (TransB == CblasNoTrans)
|
||||
+ transb = 0;
|
||||
+ if (TransB == CblasTrans)
|
||||
+ transb = 1;
|
||||
+
|
||||
+ if (TransB == CblasConjNoTrans)
|
||||
+ transb = 0;
|
||||
+ if (TransB == CblasConjTrans)
|
||||
+ transb = 1;
|
||||
+
|
||||
+ a = (void *)A;
|
||||
+ b = (void *)B;
|
||||
+ lda = LDA;
|
||||
+ ldb = LDB;
|
||||
+
|
||||
+ info = -1;
|
||||
+
|
||||
+ blasint nrowa;
|
||||
+ blasint nrowb;
|
||||
+ nrowa = m;
|
||||
+ if (transa & 1) nrowa = k;
|
||||
+ nrowb = k;
|
||||
+ if (transb & 1) nrowb = m;
|
||||
+
|
||||
+ if (ldc < MAX(1, m))
|
||||
+ info = 13;
|
||||
+ if (ldb < MAX(1, nrowb))
|
||||
+ info = 10;
|
||||
+ if (lda < MAX(1, nrowa))
|
||||
+ info = 8;
|
||||
+ if (k < 0)
|
||||
+ info = 5;
|
||||
+ if (m < 0)
|
||||
+ info = 4;
|
||||
+ if (transb < 0)
|
||||
+ info = 3;
|
||||
+ if (transa < 0)
|
||||
+ info = 2;
|
||||
+ if (uplo < 0)
|
||||
+ info = 1;
|
||||
+ }
|
||||
+
|
||||
+ if (order == CblasRowMajor) {
|
||||
+
|
||||
+ a = (void *)B;
|
||||
+ b = (void *)A;
|
||||
+
|
||||
+ lda = LDB;
|
||||
+ ldb = LDA;
|
||||
+
|
||||
+ if (Uplo == CblasUpper) uplo = 0;
|
||||
+ if (Uplo == CblasLower) uplo = 1;
|
||||
+
|
||||
+ if (TransB == CblasNoTrans)
|
||||
+ transa = 0;
|
||||
+ if (TransB == CblasTrans)
|
||||
+ transa = 1;
|
||||
+
|
||||
+ if (TransB == CblasConjNoTrans)
|
||||
+ transa = 0;
|
||||
+ if (TransB == CblasConjTrans)
|
||||
+ transa = 1;
|
||||
+
|
||||
+ if (TransA == CblasNoTrans)
|
||||
+ transb = 0;
|
||||
+ if (TransA == CblasTrans)
|
||||
+ transb = 1;
|
||||
+
|
||||
+ if (TransA == CblasConjNoTrans)
|
||||
+ transb = 0;
|
||||
+ if (TransA == CblasConjTrans)
|
||||
+ transb = 1;
|
||||
+
|
||||
+ info = -1;
|
||||
+
|
||||
+ blasint ncola;
|
||||
+ blasint ncolb;
|
||||
+
|
||||
+ ncola = m;
|
||||
+ if (transa & 1) ncola = k;
|
||||
+ ncolb = k;
|
||||
+
|
||||
+ if (transb & 1) {
|
||||
+ ncolb = m;
|
||||
+ }
|
||||
+
|
||||
+ if (ldc < MAX(1,m))
|
||||
+ info = 13;
|
||||
+ if (ldb < MAX(1, ncolb))
|
||||
+ info = 8;
|
||||
+ if (lda < MAX(1, ncola))
|
||||
+ info = 10;
|
||||
+ if (k < 0)
|
||||
+ info = 5;
|
||||
+ if (m < 0)
|
||||
+ info = 4;
|
||||
+ if (transb < 0)
|
||||
+ info = 2;
|
||||
+ if (transa < 0)
|
||||
+ info = 3;
|
||||
+ if (uplo < 0)
|
||||
+ info = 1;
|
||||
+ }
|
||||
+
|
||||
+ if (info >= 0) {
|
||||
+ BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+#endif
|
||||
+ int buffer_size;
|
||||
+ blasint i, j;
|
||||
+
|
||||
+#ifdef SMP
|
||||
+ int nthreads;
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
+#ifdef SMP
|
||||
+ static int (*gemv_thread[]) (BLASLONG, BLASLONG, FLOAT, IFLOAT *,
|
||||
+ BLASLONG, IFLOAT *, BLASLONG, FLOAT,
|
||||
+ FLOAT *, BLASLONG, int) = {
|
||||
+ sbgemv_thread_n, sbgemv_thread_t,
|
||||
+ };
|
||||
+#endif
|
||||
+ int (*gemv[]) (BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG,
|
||||
+ IFLOAT *, BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
|
||||
+ SBGEMV_N, SBGEMV_T,};
|
||||
+
|
||||
+
|
||||
+ if (m == 0)
|
||||
+ return;
|
||||
+
|
||||
+ IDEBUG_START;
|
||||
+
|
||||
+ const blasint incb = ((transb & 1) == 0) ? 1 : ldb;
|
||||
+
|
||||
+ if (uplo == 1) {
|
||||
+ for (i = 0; i < m; i++) {
|
||||
+ j = m - i;
|
||||
+
|
||||
+ aa = a + i;
|
||||
+ bb = b + i * ldb;
|
||||
+ if (transa & 1) {
|
||||
+ aa = a + lda * i;
|
||||
+ }
|
||||
+ if (transb & 1)
|
||||
+ bb = b + i;
|
||||
+ cc = c + i * ldc + i;
|
||||
+
|
||||
+#if 0
|
||||
+ if (beta != ONE)
|
||||
+ SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
|
||||
+
|
||||
+ if (alpha == ZERO)
|
||||
+ continue;
|
||||
+#endif
|
||||
+
|
||||
+ IDEBUG_START;
|
||||
+
|
||||
+ buffer_size = j + k + 128 / sizeof(FLOAT);
|
||||
+#ifdef WINDOWS_ABI
|
||||
+ buffer_size += 160 / sizeof(FLOAT);
|
||||
+#endif
|
||||
+ // for alignment
|
||||
+ buffer_size = (buffer_size + 3) & ~3;
|
||||
+ STACK_ALLOC(buffer_size, IFLOAT, buffer);
|
||||
+
|
||||
+#ifdef SMP
|
||||
+
|
||||
+ if (1L * j * k < 2304L * GEMM_MULTITHREAD_THRESHOLD)
|
||||
+ nthreads = 1;
|
||||
+ else
|
||||
+ nthreads = num_cpu_avail(2);
|
||||
+
|
||||
+ if (nthreads == 1) {
|
||||
+#endif
|
||||
+
|
||||
+ if (!(transa & 1))
|
||||
+ (gemv[(int)transa]) (j, k, alpha, aa, lda,
|
||||
+ bb, incb, beta, cc, 1);
|
||||
+ else
|
||||
+ (gemv[(int)transa]) (k, j, alpha, aa, lda,
|
||||
+ bb, incb, beta, cc, 1);
|
||||
+
|
||||
+#ifdef SMP
|
||||
+ } else {
|
||||
+ if (!(transa & 1))
|
||||
+ (gemv_thread[(int)transa]) (j, k, alpha, aa,
|
||||
+ lda, bb, incb, beta, cc,
|
||||
+ 1, nthreads);
|
||||
+ else
|
||||
+ (gemv_thread[(int)transa]) (k, j, alpha, aa,
|
||||
+ lda, bb, incb, beta, cc,
|
||||
+ 1, nthreads);
|
||||
+
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ STACK_FREE(buffer);
|
||||
+ }
|
||||
+ } else {
|
||||
+
|
||||
+ for (i = 0; i < m; i++) {
|
||||
+ j = i + 1;
|
||||
+
|
||||
+ bb = b + i * ldb;
|
||||
+ if (transb & 1) {
|
||||
+ bb = b + i;
|
||||
+ }
|
||||
+ cc = c + i * ldc;
|
||||
+
|
||||
+#if 0
|
||||
+ if (beta != ONE)
|
||||
+ SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
|
||||
+
|
||||
+ if (alpha == ZERO)
|
||||
+ continue;
|
||||
+#endif
|
||||
+ IDEBUG_START;
|
||||
+
|
||||
+ buffer_size = j + k + 128 / sizeof(FLOAT);
|
||||
+#ifdef WINDOWS_ABI
|
||||
+ buffer_size += 160 / sizeof(FLOAT);
|
||||
+#endif
|
||||
+ // for alignment
|
||||
+ buffer_size = (buffer_size + 3) & ~3;
|
||||
+ STACK_ALLOC(buffer_size, IFLOAT, buffer);
|
||||
+
|
||||
+#ifdef SMP
|
||||
+
|
||||
+ if (1L * j * k < 2304L * GEMM_MULTITHREAD_THRESHOLD)
|
||||
+ nthreads = 1;
|
||||
+ else
|
||||
+ nthreads = num_cpu_avail(2);
|
||||
+
|
||||
+ if (nthreads == 1) {
|
||||
+#endif
|
||||
+
|
||||
+ if (!(transa & 1))
|
||||
+ (gemv[(int)transa]) (j, k, alpha, a, lda, bb,
|
||||
+ incb, beta, cc, 1);
|
||||
+ else
|
||||
+ (gemv[(int)transa]) (k, j, alpha, a, lda, bb,
|
||||
+ incb, beta, cc, 1);
|
||||
+
|
||||
+#ifdef SMP
|
||||
+ } else {
|
||||
+ if (!(transa & 1))
|
||||
+ (gemv_thread[(int)transa]) (j, k, alpha, a, lda,
|
||||
+ bb, incb, beta, cc, 1,
|
||||
+ nthreads);
|
||||
+ else
|
||||
+ (gemv_thread[(int)transa]) (k, j, alpha, a, lda,
|
||||
+ bb, incb, beta, cc, 1,
|
||||
+ nthreads);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ STACK_FREE(buffer);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ IDEBUG_END;
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
--
|
||||
2.41.0
|
||||
|
||||
From 9a4c2d61a345866e4540f9d6da87eb881419b411 Mon Sep 17 00:00:00 2001
|
||||
From: Honza Horak <hhorak@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 09:54:52 +0100
|
||||
Subject: [PATCH 3/6] fix type conversion warnings
|
||||
|
||||
upstream commit:
|
||||
|
||||
commit fb99fc2e6e4ec8ecdcfffe1ca1aeb787464d2825
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed Feb 7 13:42:08 2024 +0100
|
||||
|
||||
fix type conversion warnings
|
||||
---
|
||||
test/compare_sgemm_sbgemm.c | 18 ++++++++++++++----
|
||||
1 file changed, 14 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c
|
||||
index cf808b56d..4afa8bf93 100644
|
||||
--- a/test/compare_sgemm_sbgemm.c
|
||||
+++ b/test/compare_sgemm_sbgemm.c
|
||||
@@ -81,6 +81,16 @@ float16to32 (bfloat16_bits f16)
|
||||
return f32.v;
|
||||
}
|
||||
|
||||
+float
|
||||
+float32to16 (float32_bits f32)
|
||||
+{
|
||||
+ bfloat16_bits f16;
|
||||
+ f16.bits.s = f32.bits.s;
|
||||
+ f16.bits.e = f32.bits.e;
|
||||
+ f16.bits.m = (uint32_t) f32.bits.m >> 16;
|
||||
+ return f32.v;
|
||||
+}
|
||||
+
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
@@ -108,16 +118,16 @@ main (int argc, char *argv[])
|
||||
A[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
B[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
C[j * k + i] = 0;
|
||||
- AA[j * k + i].v = *(uint32_t *) & A[j * k + i] >> 16;
|
||||
- BB[j * k + i].v = *(uint32_t *) & B[j * k + i] >> 16;
|
||||
+ AA[j * k + i].v = float32to16( A[j * k + i] );
|
||||
+ BB[j * k + i].v = float32to16( B[j * k + i] );
|
||||
CC[j * k + i] = 0;
|
||||
DD[j * k + i] = 0;
|
||||
}
|
||||
}
|
||||
SGEMM (&transA, &transB, &m, &n, &k, &alpha, A,
|
||||
&m, B, &k, &beta, C, &m);
|
||||
- SBGEMM (&transA, &transB, &m, &n, &k, &alpha, AA,
|
||||
- &m, BB, &k, &beta, CC, &m);
|
||||
+ SBGEMM (&transA, &transB, &m, &n, &k, &alpha, (bfloat16*) AA,
|
||||
+ &m, (bfloat16*)BB, &k, &beta, CC, &m);
|
||||
for (i = 0; i < n; i++)
|
||||
for (j = 0; j < m; j++)
|
||||
if (fabs (CC[i * m + j] - C[i * m + j]) > 1.0)
|
||||
--
|
||||
2.41.0
|
||||
|
||||
From 5593507ddbd5d35d088cd4db6285de6b9d84a405 Mon Sep 17 00:00:00 2001
|
||||
From: Honza Horak <hhorak@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 09:56:11 +0100
|
||||
Subject: [PATCH 4/6] fix prototype for c/zaxpby
|
||||
|
||||
Upstream commit:
|
||||
|
||||
commit b3fa16345d83b723b8984b78dc6a2bb5d9f3d479
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Thu Feb 8 13:15:34 2024 +0100
|
||||
|
||||
fix prototype for c/zaxpby
|
||||
---
|
||||
common_interface.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/common_interface.h b/common_interface.h
|
||||
index 318827920..1f6cb5f6d 100644
|
||||
--- a/common_interface.h
|
||||
+++ b/common_interface.h
|
||||
@@ -764,8 +764,8 @@ xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||
|
||||
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
-void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
-void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
+void BLASFUNC(caxpby) (blasint *, void *, float *, blasint *, void *, float *, blasint *);
|
||||
+void BLASFUNC(zaxpby) (blasint *, void *, double *, blasint *, void *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
--
|
||||
2.41.0
|
||||
|
||||
From 42b30ed2c54034b2b1dbb15bb9e3e705e704b6a9 Mon Sep 17 00:00:00 2001
|
||||
From: Honza Horak <hhorak@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 09:56:46 +0100
|
||||
Subject: [PATCH 5/6] fix incompatible pointer types
|
||||
|
||||
Upstream commit:
|
||||
|
||||
commit 500ac4de5e20596d5cd797d745db97dd0a62ff86
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Thu Feb 8 13:18:34 2024 +0100
|
||||
|
||||
fix incompatible pointer types
|
||||
---
|
||||
interface/zaxpby.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/interface/zaxpby.c b/interface/zaxpby.c
|
||||
index 3a4db7403..e5065270d 100644
|
||||
--- a/interface/zaxpby.c
|
||||
+++ b/interface/zaxpby.c
|
||||
@@ -39,12 +39,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
-void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY)
|
||||
+void NAME(blasint *N, void *VALPHA, FLOAT *x, blasint *INCX, void *VBETA, FLOAT *y, blasint *INCY)
|
||||
{
|
||||
|
||||
blasint n = *N;
|
||||
blasint incx = *INCX;
|
||||
blasint incy = *INCY;
|
||||
+ FLOAT* ALPHA = (FLOAT*) VALPHA;
|
||||
+ FLOAT* BETA = (FLOAT*) VBETA;
|
||||
|
||||
#else
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
||||
From 1c525b6e704523912a04fbd026300a2ff95341f3 Mon Sep 17 00:00:00 2001
|
||||
From: Honza Horak <hhorak@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 15:29:17 +0100
|
||||
Subject: [PATCH 6/6] fix sbgemm bfloat16 conversion errors introduced in PR
|
||||
4488
|
||||
|
||||
Upstream commit:
|
||||
|
||||
commit e9f480111e1d5b6f69c8053f79375b0a4242712f
|
||||
Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
||||
Date: Wed Feb 7 19:57:18 2024 +0100
|
||||
|
||||
fix sbgemm bfloat16 conversion errors introduced in PR 4488
|
||||
---
|
||||
test/compare_sgemm_sbgemm.c | 18 ++++++------------
|
||||
1 file changed, 6 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c
|
||||
index 4afa8bf93..bc74233ab 100644
|
||||
--- a/test/compare_sgemm_sbgemm.c
|
||||
+++ b/test/compare_sgemm_sbgemm.c
|
||||
@@ -81,16 +81,6 @@ float16to32 (bfloat16_bits f16)
|
||||
return f32.v;
|
||||
}
|
||||
|
||||
-float
|
||||
-float32to16 (float32_bits f32)
|
||||
-{
|
||||
- bfloat16_bits f16;
|
||||
- f16.bits.s = f32.bits.s;
|
||||
- f16.bits.e = f32.bits.e;
|
||||
- f16.bits.m = (uint32_t) f32.bits.m >> 16;
|
||||
- return f32.v;
|
||||
-}
|
||||
-
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
@@ -110,6 +100,8 @@ main (int argc, char *argv[])
|
||||
float C[m * n];
|
||||
bfloat16_bits AA[m * k], BB[k * n];
|
||||
float DD[m * n], CC[m * n];
|
||||
+ bfloat16 atmp,btmp;
|
||||
+ blasint one=1;
|
||||
|
||||
for (j = 0; j < m; j++)
|
||||
{
|
||||
@@ -118,8 +110,10 @@ main (int argc, char *argv[])
|
||||
A[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
B[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
C[j * k + i] = 0;
|
||||
- AA[j * k + i].v = float32to16( A[j * k + i] );
|
||||
- BB[j * k + i].v = float32to16( B[j * k + i] );
|
||||
+ sbstobf16_(&one, &A[j*k+i], &one, &atmp, &one);
|
||||
+ sbstobf16_(&one, &B[j*k+i], &one, &btmp, &one);
|
||||
+ AA[j * k + i].v = atmp;
|
||||
+ BB[j * k + i].v = btmp;
|
||||
CC[j * k + i] = 0;
|
||||
DD[j * k + i] = 0;
|
||||
}
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,6 +1,6 @@
|
||||
%bcond_with system_lapack
|
||||
# Version of bundled lapack
|
||||
%global lapackver 3.9.1
|
||||
%global lapackver 3.11.0
|
||||
|
||||
# DO NOT "CLEAN UP" OR MODIFY THIS SPEC FILE WITHOUT ASKING THE
|
||||
# MAINTAINER FIRST!
|
||||
@ -14,28 +14,24 @@
|
||||
# "obsoleted" features are still kept in the spec.
|
||||
|
||||
Name: openblas
|
||||
Version: 0.3.21
|
||||
Release: 5%{?dist}
|
||||
Version: 0.3.26
|
||||
Release: 5.0.riscv64%{?dist}
|
||||
Summary: An optimized BLAS library based on GotoBLAS2
|
||||
License: BSD
|
||||
URL: https://github.com/xianyi/OpenBLAS/
|
||||
Source0: https://github.com/xianyi/OpenBLAS/archive/v%{version}/openblas-%{version}.tar.gz
|
||||
|
||||
License: BSD-3-Clause
|
||||
URL: https://github.com/OpenMathLib/OpenBLAS
|
||||
Source0: %url/archive/v%{version}/OpenBLAS-%{version}.tar.gz
|
||||
|
||||
# Use system lapack
|
||||
Patch0: openblas-0.2.15-system_lapack.patch
|
||||
# Drop extra p from threaded library name
|
||||
Patch1: openblas-0.2.5-libname.patch
|
||||
# Don't use constructor priorities on too old architectures
|
||||
Patch2: openblas-0.2.15-constructor.patch
|
||||
# Fix SBGEMM test to work with INTERFACE64
|
||||
Patch3: openblas-0.3.21-sbgemm-test.patch
|
||||
# Supply the proper flags to the test makefile
|
||||
Patch4: openblas-0.3.11-tests.patch
|
||||
# C99 porting.
|
||||
Patch5: openblas-0.3.21-c99-1.patch
|
||||
Patch6: openblas-0.3.21-c99-2.patch
|
||||
Patch7: openblas-0.3.21-c99-3.patch
|
||||
Patch8: openblas-0.3.21-c99-4.patch
|
||||
Patch9: openblas-0.3.21-c99-5.patch
|
||||
Patch3: openblas-0.3.11-tests.patch
|
||||
# Fix incompatible pointer types (causes FTBFS on ppc64le)
|
||||
Patch4: openblas-0.3.26-incompatibletypes.patch
|
||||
|
||||
BuildRequires: make
|
||||
BuildRequires: gcc
|
||||
@ -248,13 +244,8 @@ cd OpenBLAS-%{version}
|
||||
%if 0%{?rhel} == 5
|
||||
%patch 2 -p1 -b .constructor
|
||||
%endif
|
||||
%patch3 -p1 -b .sbgemm
|
||||
%patch4 -p1 -b .tests
|
||||
%patch5 -p1 -b .c99-1
|
||||
%patch6 -p1 -b .c99-2
|
||||
%patch7 -p1 -b .c99-3
|
||||
%patch8 -p1 -b .c99-4
|
||||
%patch9 -p1 -b .c99-5
|
||||
%patch 3 -p1 -b .tests
|
||||
%patch 4 -p1 -b .incompatibletypes
|
||||
|
||||
# Fix source permissions
|
||||
find -name \*.f -exec chmod 644 {} \;
|
||||
@ -392,6 +383,9 @@ TARGET="TARGET=ARMV8 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1"
|
||||
%ifarch s390x
|
||||
TARGET="TARGET=ZARCH_GENERIC DYNAMIC_ARCH=1 DYNAMIC_OLDER=1"
|
||||
%endif
|
||||
%ifarch riscv64
|
||||
TARGET="TARGET=RISCV64_GENERIC DYNAMIC_ARCH=0"
|
||||
%endif
|
||||
|
||||
%if 0%{?rhel} == 5
|
||||
# Gfortran too old to recognize -frecursive
|
||||
@ -454,6 +448,9 @@ suffix=""
|
||||
%ifarch armv7hl
|
||||
suffix="_armv7"
|
||||
%endif
|
||||
%ifarch riscv64
|
||||
suffix="_riscv64_generic"
|
||||
%endif
|
||||
slibname=`basename %{buildroot}%{_libdir}/libopenblas${suffix}-*.so .so`
|
||||
mv %{buildroot}%{_libdir}/${slibname}.a %{buildroot}%{_libdir}/lib%{name}.a
|
||||
if [[ "$suffix" != "" ]]; then
|
||||
@ -660,6 +657,38 @@ rm -rf %{buildroot}%{_libdir}/pkgconfig
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Thu Nov 21 2024 David Abdurachmanov <davidlt@rivosinc.com> - 0.3.26-5.0.riscv64
|
||||
- Add support for riscv64
|
||||
|
||||
* Thu Jul 18 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.3.26-5
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_41_Mass_Rebuild
|
||||
|
||||
* Fri Feb 09 2024 Honza Horak <hhorak@redhat.com> - 0.3.26-4
|
||||
- Fix FTBFS on ppc64le and s390x
|
||||
Resolves: BZ#2261415
|
||||
Resolves: RHEL-24745
|
||||
|
||||
* Thu Jan 25 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.3.26-3
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
|
||||
|
||||
* Sun Jan 21 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.3.26-2
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
|
||||
|
||||
* Sun Jan 07 2024 İsmail Dönmez <ismail@i10z.com> - 0.3.26-1
|
||||
- Update to 0.3.26
|
||||
|
||||
* Thu Nov 30 2023 İsmail Dönmez <ismail@i10z.com> - 0.3.25-1
|
||||
- Update to 0.3.25
|
||||
|
||||
* Fri Oct 27 2023 Ali Erdinc Koroglu <aekoroglu@fedoraproject.org> - 0.3.24-1
|
||||
- Update to 0.3.24
|
||||
|
||||
* Tue Aug 01 2023 Pavel Šimovec <psimovec@redhat.com> - 0.3.23-1
|
||||
- Update to 0.3.23 (RHBZ #2182038)
|
||||
|
||||
* Thu Jul 20 2023 Fedora Release Engineering <releng@fedoraproject.org> - 0.3.21-6
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild
|
||||
|
||||
* Thu Jan 19 2023 Fedora Release Engineering <releng@fedoraproject.org> - 0.3.21-5
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_38_Mass_Rebuild
|
||||
|
||||
|
2
sources
2
sources
@ -1 +1 @@
|
||||
SHA512 (openblas-0.3.21.tar.gz) = 4625c8e6ccfa9120281fd714d3f6b7c3ba2265470c1be76121d6b25dc3dacb899d26e5d9a417ddc616d23909f1411495aa995ef8d8d6df8511cd5cefbabcb1c5
|
||||
SHA512 (OpenBLAS-0.3.26.tar.gz) = 01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373
|
||||
|
Loading…
Reference in New Issue
Block a user