ntl-9.9.1

This commit is contained in:
Jerry James 2016-06-02 13:06:01 -06:00
parent 0da5f48a1e
commit e69d8b7672
4 changed files with 71 additions and 66 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@
/ntl-9.6.4.tar.gz
/ntl-9.7.0.tar.gz
/ntl-9.8.0.tar.gz
/ntl-9.9.1.tar.gz

View File

@ -1,6 +1,6 @@
--- doc/config.txt.orig 2016-04-26 05:40:15.000000000 -0600
+++ doc/config.txt 2016-04-26 18:22:38.925719916 -0600
@@ -291,6 +291,7 @@ NTL_GF2X_NOINLINE=off
--- doc/config.txt.orig 2016-06-02 04:59:09.000000000 -0600
+++ doc/config.txt 2016-06-02 09:48:29.369273869 -0600
@@ -292,6 +292,7 @@ NTL_GF2X_NOINLINE=off
NTL_GF2X_ALTCODE=off
NTL_GF2X_ALTCODE1=off
NTL_PCLMUL=off
@ -8,7 +8,7 @@
GMP_INCDIR=$(GMP_PREFIX)/include
GMP_LIBDIR=$(GMP_PREFIX)/lib
@@ -638,6 +639,10 @@ NTL_PCLMUL=off
@@ -644,6 +645,10 @@ NTL_PCLMUL=off
# switch to enable the PCLMUL instruction on x86 machines for faster arithmetic
# over GF(2)[X] (without relying on the gf2x package)
@ -19,9 +19,9 @@
########## More GMP Options:
--- include/NTL/config.h.orig 2016-04-26 05:40:16.000000000 -0600
+++ include/NTL/config.h 2016-04-26 18:22:38.929719590 -0600
@@ -616,6 +616,23 @@ using the configure script.
--- include/NTL/config.h.orig 2016-06-02 04:59:09.000000000 -0600
+++ include/NTL/config.h 2016-06-02 09:48:29.374273466 -0600
@@ -625,6 +625,23 @@ using the configure script.
#endif
@ -45,8 +45,8 @@
--- include/NTL/ctools.h.orig 2016-04-26 05:40:15.000000000 -0600
+++ include/NTL/ctools.h 2016-04-26 18:22:38.930719509 -0600
--- include/NTL/ctools.h.orig 2016-06-02 04:59:09.000000000 -0600
+++ include/NTL/ctools.h 2016-06-02 09:48:29.376273304 -0600
@@ -422,6 +422,137 @@ void _ntl_swap(T*& a, T*& b)
// this should be big enough to satisfy any SIMD instructions,
// and it should also be as big as a cache line
@ -185,9 +185,9 @@
--- include/NTL/def_config.h.orig 2016-04-26 05:40:15.000000000 -0600
+++ include/NTL/def_config.h 2016-04-26 18:22:38.930719509 -0600
@@ -616,6 +616,22 @@ using the configure script.
--- include/NTL/def_config.h.orig 2016-06-02 04:59:09.000000000 -0600
+++ include/NTL/def_config.h 2016-06-02 09:48:29.377273224 -0600
@@ -625,6 +625,22 @@ using the configure script.
#endif
@ -210,9 +210,9 @@
--- src/cfile.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/cfile 2016-04-26 18:22:38.931719428 -0600
@@ -616,6 +616,23 @@ using the configure script.
--- src/cfile.orig 2016-06-02 04:59:09.000000000 -0600
+++ src/cfile 2016-06-02 09:48:29.377273224 -0600
@@ -625,6 +625,23 @@ using the configure script.
#endif
@ -236,20 +236,21 @@
@{WIZARD_HACK}
--- src/DispSettings.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/DispSettings.c 2016-04-26 18:22:38.931719428 -0600
@@ -186,6 +186,9 @@ cout << "Performance Options:\n";
--- src/DispSettings.c.orig 2016-06-02 04:59:09.000000000 -0600
+++ src/DispSettings.c 2016-06-02 09:50:13.512867963 -0600
@@ -191,6 +191,10 @@ cout << "Performance Options:\n";
cout << "NTL_PCLMUL\n";
#endif
+#ifdef NTL_LOADTIME_CPU
+ cout << "NTL_LOADTIME_CPU\n";
+#endif
+
cout << "***************************/\n";
cout << "\n\n";
--- src/DoConfig.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/DoConfig 2016-04-26 18:24:47.237292382 -0600
--- src/DoConfig.orig 2016-06-02 04:59:09.000000000 -0600
+++ src/DoConfig 2016-06-02 09:48:29.379273062 -0600
@@ -1,7 +1,7 @@
# This is a perl script, invoked from a shell
@ -259,7 +260,7 @@
%MakeFlag = (
@@ -82,6 +82,7 @@
@@ -83,6 +83,7 @@
'NTL_RANGE_CHECK' => 'off',
'NTL_FFT_BIGTAB' => 'off',
'NTL_FFT_LAZYMUL' => 'off',
@ -267,7 +268,7 @@
);
@@ -148,6 +149,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' &
@@ -149,6 +150,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' &
}
@ -283,8 +284,8 @@
# some special MakeVal values that are determined by SHARED
--- src/GF2X1.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/GF2X1.c 2016-04-26 18:22:38.933719265 -0600
--- src/GF2X1.c.orig 2016-06-02 04:59:08.000000000 -0600
+++ src/GF2X1.c 2016-06-02 09:48:29.381272901 -0600
@@ -19,7 +19,7 @@
// simple scaling factor for some crossover points:
// we use a lower crossover of the underlying multiplication
@ -294,8 +295,8 @@
#define XOVER_SCALE (1L)
#else
#define XOVER_SCALE (2L)
--- src/GF2X.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/GF2X.c 2016-04-26 18:22:38.933719265 -0600
--- src/GF2X.c.orig 2016-06-02 04:59:08.000000000 -0600
+++ src/GF2X.c 2016-06-02 09:48:29.382272820 -0600
@@ -31,6 +31,22 @@ pclmul_mul1 (unsigned long *c, unsigned
__m128i bb = _mm_setr_epi64( _mm_cvtsi64_m64(b), _mm_cvtsi64_m64(0));
_mm_storeu_si128((__m128i*)c, _mm_clmulepi64_si128(aa, bb, 0));
@ -639,9 +640,9 @@
void LeftShift(GF2X& c, const GF2X& a, long n)
--- src/InitSettings.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/InitSettings.c 2016-04-26 20:06:27.078557786 -0600
@@ -150,6 +150,11 @@ int main()
--- src/InitSettings.c.orig 2016-06-02 04:59:09.000000000 -0600
+++ src/InitSettings.c 2016-06-02 09:48:29.382272820 -0600
@@ -156,6 +156,11 @@ int main()
cout << "NTL_RANGE_CHECK=0\n";
#endif
@ -653,8 +654,8 @@
// the following is synthetically defined
#ifdef NTL_LONGLONG_SP_MULMOD
--- src/lzz_pX1.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/lzz_pX1.c 2016-04-26 18:22:38.934719184 -0600
--- src/lzz_pX1.c.orig 2016-06-02 04:59:08.000000000 -0600
+++ src/lzz_pX1.c 2016-06-02 09:48:29.383272740 -0600
@@ -4,6 +4,12 @@
#ifdef NTL_HAVE_AVX
@ -1083,8 +1084,8 @@
default:
LogicError("CompMod: bad strategy");
--- src/mat_lzz_p.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/mat_lzz_p.c 2016-04-26 18:22:38.937718940 -0600
--- src/mat_lzz_p.c.orig 2016-06-02 04:59:08.000000000 -0600
+++ src/mat_lzz_p.c 2016-06-02 09:48:29.385272578 -0600
@@ -10,6 +10,15 @@
#ifdef NTL_HAVE_AVX
@ -1101,7 +1102,7 @@
#endif
NTL_START_IMPL
@@ -632,7 +641,7 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
@@ -626,7 +635,7 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
#ifdef NTL_HAVE_LL_TYPE
@ -1110,7 +1111,7 @@
#define MAX_DBL_INT ((1L << NTL_DOUBLE_PRECISION)-1)
// max int representable exactly as a double
@@ -646,19 +655,120 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
@@ -640,19 +649,120 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
// MUL_ADD(a, b, c): a += b*c
@ -1237,7 +1238,7 @@
__m256d acc0=_mm256_load_pd(x + 0*4);
__m256d acc1=_mm256_load_pd(x + 1*4);
__m256d acc2=_mm256_load_pd(x + 2*4);
@@ -668,19 +778,82 @@ void muladd1_by_32(double *x, const doub
@@ -662,19 +772,82 @@ void muladd1_by_32(double *x, const doub
__m256d acc6=_mm256_load_pd(x + 6*4);
__m256d acc7=_mm256_load_pd(x + 7*4);
@ -1330,7 +1331,7 @@
}
@@ -694,6 +867,9 @@ void muladd1_by_32(double *x, const doub
@@ -688,6 +861,9 @@ void muladd1_by_32(double *x, const doub
_mm256_store_pd(x + 7*4, acc7);
}
@ -1340,7 +1341,7 @@
#else
static
@@ -800,7 +976,164 @@ void muladd1_by_32(double *x, const doub
@@ -794,7 +970,164 @@ void muladd1_by_32(double *x, const doub
#endif
// experiment: process two rows at a time
@ -1506,7 +1507,7 @@
static
void muladd2_by_32(double *x, const double *a, const double *b, long n)
{
@@ -876,96 +1209,217 @@ void muladd2_by_32(double *x, const doub
@@ -870,96 +1203,217 @@ void muladd2_by_32(double *x, const doub
_mm256_store_pd(x + 7*4 + 1*MAT_BLK_SZ, acc13);
}
@ -1788,7 +1789,7 @@
static
void muladd3_by_32(double *x, const double *a, const double *b, long n)
@@ -1066,6 +1520,32 @@ void muladd3_by_32(double *x, const doub
@@ -1060,6 +1514,32 @@ void muladd3_by_32(double *x, const doub
}
@ -1821,7 +1822,7 @@
static inline
void muladd_all_by_32(long first, long last, double *x, const double *a, const double *b, long n)
{
@@ -1085,8 +1565,79 @@ void muladd_all_by_32(long first, long l
@@ -1079,8 +1559,79 @@ void muladd_all_by_32(long first, long l
#endif
}
@ -1901,7 +1902,7 @@
static inline
void muladd_interval(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n)
{
@@ -1117,9 +1668,109 @@ void muladd_interval(double * NTL_RESTRI
@@ -1111,9 +1662,109 @@ void muladd_interval(double * NTL_RESTRI
_mm256_store_pd(x + 3*4, xvec3);
}
}
@ -2011,7 +2012,7 @@
static inline
void muladd_interval1(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n)
{
@@ -1165,11 +1816,74 @@ void muladd_interval1(double * NTL_RESTR
@@ -1159,11 +1810,74 @@ void muladd_interval1(double * NTL_RESTR
*x += (*y)*c;
}
}
@ -2087,7 +2088,7 @@
static inline
void muladd_interval2(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n)
{
@@ -1197,13 +1911,6 @@ void muladd_interval2(double * NTL_RESTR
@@ -1191,13 +1905,6 @@ void muladd_interval2(double * NTL_RESTR
}
}
@ -2101,7 +2102,7 @@
#endif
#endif
@@ -2245,10 +2952,10 @@ void alt_mul_LL(const mat_window_zz_p& X
@@ -2031,10 +2738,10 @@ void alt_mul_LL(const mat_window_zz_p& X
}
@ -2115,7 +2116,7 @@
const const_mat_window_zz_p& A, const const_mat_window_zz_p& B)
{
long n = A.NumRows();
@@ -2615,8 +3322,9 @@ void mul_base (const mat_window_zz_p& X,
@@ -2401,8 +3108,9 @@ void mul_base (const mat_window_zz_p& X,
long V = MAT_BLK_SZ*4;
@ -2127,7 +2128,7 @@
V <= (MAX_DBL_INT-(p-1))/(p-1) &&
V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
@@ -2696,7 +3404,8 @@ void mul_strassen(const mat_window_zz_p&
@@ -2482,7 +3190,8 @@ void mul_strassen(const mat_window_zz_p&
// this code determines if mul_base triggers blk_mul_DD,
// in which case a higher crossover is used
@ -2137,7 +2138,7 @@
{
long V = MAT_BLK_SZ*4;
long p = zz_p::modulus();
@@ -3196,10 +3905,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con
@@ -2982,10 +3691,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con
@ -2151,7 +2152,7 @@
{
long n = A.NumRows();
@@ -3365,10 +4074,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co
@@ -3151,10 +3860,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co
@ -2165,7 +2166,7 @@
{
long n = A.NumRows();
@@ -4126,8 +4835,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
@@ -3912,8 +4621,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
else if (n/MAT_BLK_SZ < 4) {
long V = 64;
@ -2177,7 +2178,7 @@
V <= (MAX_DBL_INT-(p-1))/(p-1) &&
V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
@@ -4152,8 +4862,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
@@ -3938,8 +4648,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
else {
long V = 4*MAT_BLK_SZ;
@ -2189,7 +2190,7 @@
V <= (MAX_DBL_INT-(p-1))/(p-1) &&
V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
@@ -4559,10 +5270,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p&
@@ -4345,10 +5056,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p&
@ -2203,7 +2204,7 @@
vec_zz_p *xp, bool trans, bool relax)
{
long n = A.NumRows();
@@ -4749,10 +5460,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p&
@@ -4535,10 +5246,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p&
@ -2217,7 +2218,7 @@
vec_zz_p *xp, bool trans, bool relax)
{
long n = A.NumRows();
@@ -5563,8 +6274,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
@@ -5349,8 +6060,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
else if (n/MAT_BLK_SZ < 4) {
long V = 64;
@ -2229,7 +2230,7 @@
V <= (MAX_DBL_INT-(p-1))/(p-1) &&
V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
@@ -5589,8 +6301,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
@@ -5375,8 +6087,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
else {
long V = 4*MAT_BLK_SZ;
@ -2241,7 +2242,7 @@
V <= (MAX_DBL_INT-(p-1))/(p-1) &&
V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
@@ -5836,7 +6549,7 @@ long elim_basic(const mat_zz_p& A, mat_z
@@ -5622,7 +6335,7 @@ long elim_basic(const mat_zz_p& A, mat_z
#ifdef NTL_HAVE_LL_TYPE
@ -2250,7 +2251,7 @@
static inline
@@ -7289,8 +8002,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i
@@ -7075,8 +7788,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i
else {
long V = 4*MAT_BLK_SZ;
@ -2262,9 +2263,9 @@
V <= (MAX_DBL_INT-(p-1))/(p-1) &&
V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
--- src/QuickTest.c.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/QuickTest.c 2016-04-26 18:22:38.938718859 -0600
@@ -335,6 +335,9 @@ cerr << "Performance Options:\n";
--- src/QuickTest.c.orig 2016-06-02 04:59:09.000000000 -0600
+++ src/QuickTest.c 2016-06-02 09:48:29.386272498 -0600
@@ -339,6 +339,9 @@ cerr << "Performance Options:\n";
cerr << "NTL_PCLMUL\n";
#endif
@ -2274,8 +2275,8 @@
cerr << "\n\n";
--- src/WizardAux.orig 2016-04-26 05:40:15.000000000 -0600
+++ src/WizardAux 2016-04-26 18:22:38.938718859 -0600
--- src/WizardAux.orig 2016-06-02 04:59:09.000000000 -0600
+++ src/WizardAux 2016-06-02 09:48:29.386272498 -0600
@@ -94,6 +94,7 @@ system("make InitSettings");
'NTL_PCLMUL' => 0,
'NTL_FFT_BIGTAB' => 0,

View File

@ -10,7 +10,7 @@
Summary: High-performance algorithms for vectors, matrices, and polynomials
Name: ntl
Version: 9.8.0
Version: 9.9.1
Release: 1%{?dist}
License: GPLv2+
@ -136,7 +136,7 @@ done
%files
%doc README
%license doc/copying.txt
%{_libdir}/libntl.so.24*
%{_libdir}/libntl.so.27*
%files devel
%doc doc/*
@ -150,6 +150,9 @@ done
%changelog
* Thu Jun 2 2016 Jerry James <loganjerry@gmail.com> - 9.9.1-1
- ntl-9.9.1
* Fri Apr 29 2016 Jerry James <loganjerry@gmail.com> - 9.8.0-1
- ntl-9.8.0
- Add -loadtime-cpu patch

View File

@ -1 +1 @@
a7e87d859511c15023169fa0fcf9903b ntl-9.8.0.tar.gz
2b189eb103e588d498ead4c6a8b09b3c ntl-9.9.1.tar.gz