From e69d8b7672604b080652fb0b54626db5b1f24991 Mon Sep 17 00:00:00 2001 From: Jerry James Date: Thu, 2 Jun 2016 13:06:01 -0600 Subject: [PATCH] ntl-9.9.1 --- .gitignore | 1 + ntl-loadtime-cpu.patch | 127 +++++++++++++++++++++-------------------- ntl.spec | 7 ++- sources | 2 +- 4 files changed, 71 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index 779591a..49a47af 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /ntl-9.6.4.tar.gz /ntl-9.7.0.tar.gz /ntl-9.8.0.tar.gz +/ntl-9.9.1.tar.gz diff --git a/ntl-loadtime-cpu.patch b/ntl-loadtime-cpu.patch index 117711f..3ee9724 100644 --- a/ntl-loadtime-cpu.patch +++ b/ntl-loadtime-cpu.patch @@ -1,6 +1,6 @@ ---- doc/config.txt.orig 2016-04-26 05:40:15.000000000 -0600 -+++ doc/config.txt 2016-04-26 18:22:38.925719916 -0600 -@@ -291,6 +291,7 @@ NTL_GF2X_NOINLINE=off +--- doc/config.txt.orig 2016-06-02 04:59:09.000000000 -0600 ++++ doc/config.txt 2016-06-02 09:48:29.369273869 -0600 +@@ -292,6 +292,7 @@ NTL_GF2X_NOINLINE=off NTL_GF2X_ALTCODE=off NTL_GF2X_ALTCODE1=off NTL_PCLMUL=off @@ -8,7 +8,7 @@ GMP_INCDIR=$(GMP_PREFIX)/include GMP_LIBDIR=$(GMP_PREFIX)/lib -@@ -638,6 +639,10 @@ NTL_PCLMUL=off +@@ -644,6 +645,10 @@ NTL_PCLMUL=off # switch to enable the PCLMUL instruction on x86 machines for faster arithmetic # over GF(2)[X] (without relying on the gf2x package) @@ -19,9 +19,9 @@ ########## More GMP Options: ---- include/NTL/config.h.orig 2016-04-26 05:40:16.000000000 -0600 -+++ include/NTL/config.h 2016-04-26 18:22:38.929719590 -0600 -@@ -616,6 +616,23 @@ using the configure script. +--- include/NTL/config.h.orig 2016-06-02 04:59:09.000000000 -0600 ++++ include/NTL/config.h 2016-06-02 09:48:29.374273466 -0600 +@@ -625,6 +625,23 @@ using the configure script. #endif @@ -45,8 +45,8 @@ ---- include/NTL/ctools.h.orig 2016-04-26 05:40:15.000000000 -0600 -+++ include/NTL/ctools.h 2016-04-26 18:22:38.930719509 -0600 +--- include/NTL/ctools.h.orig 2016-06-02 04:59:09.000000000 -0600 ++++ include/NTL/ctools.h 2016-06-02 09:48:29.376273304 -0600 @@ -422,6 +422,137 @@ void _ntl_swap(T*& a, T*& b) // this should be big enough to satisfy any SIMD instructions, // and it should also be as big as a cache line @@ -185,9 +185,9 @@ ---- include/NTL/def_config.h.orig 2016-04-26 05:40:15.000000000 -0600 -+++ include/NTL/def_config.h 2016-04-26 18:22:38.930719509 -0600 -@@ -616,6 +616,22 @@ using the configure script. +--- include/NTL/def_config.h.orig 2016-06-02 04:59:09.000000000 -0600 ++++ include/NTL/def_config.h 2016-06-02 09:48:29.377273224 -0600 +@@ -625,6 +625,22 @@ using the configure script. #endif @@ -210,9 +210,9 @@ ---- src/cfile.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/cfile 2016-04-26 18:22:38.931719428 -0600 -@@ -616,6 +616,23 @@ using the configure script. +--- src/cfile.orig 2016-06-02 04:59:09.000000000 -0600 ++++ src/cfile 2016-06-02 09:48:29.377273224 -0600 +@@ -625,6 +625,23 @@ using the configure script. #endif @@ -236,20 +236,21 @@ @{WIZARD_HACK} ---- src/DispSettings.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/DispSettings.c 2016-04-26 18:22:38.931719428 -0600 -@@ -186,6 +186,9 @@ cout << "Performance Options:\n"; +--- src/DispSettings.c.orig 2016-06-02 04:59:09.000000000 -0600 ++++ src/DispSettings.c 2016-06-02 09:50:13.512867963 -0600 +@@ -191,6 +191,10 @@ cout << "Performance Options:\n"; cout << "NTL_PCLMUL\n"; #endif +#ifdef NTL_LOADTIME_CPU + cout << "NTL_LOADTIME_CPU\n"; +#endif ++ + cout << "***************************/\n"; cout << "\n\n"; - ---- src/DoConfig.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/DoConfig 2016-04-26 18:24:47.237292382 -0600 +--- src/DoConfig.orig 2016-06-02 04:59:09.000000000 -0600 ++++ src/DoConfig 2016-06-02 09:48:29.379273062 -0600 @@ -1,7 +1,7 @@ # This is a perl script, invoked from a shell @@ -259,7 +260,7 @@ %MakeFlag = ( -@@ -82,6 +82,7 @@ +@@ -83,6 +83,7 @@ 'NTL_RANGE_CHECK' => 'off', 'NTL_FFT_BIGTAB' => 'off', 'NTL_FFT_LAZYMUL' => 'off', @@ -267,7 +268,7 @@ ); -@@ -148,6 +149,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' & +@@ -149,6 +150,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' & } @@ -283,8 +284,8 @@ # some special MakeVal values that are determined by SHARED ---- src/GF2X1.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/GF2X1.c 2016-04-26 18:22:38.933719265 -0600 +--- src/GF2X1.c.orig 2016-06-02 04:59:08.000000000 -0600 ++++ src/GF2X1.c 2016-06-02 09:48:29.381272901 -0600 @@ -19,7 +19,7 @@ // simple scaling factor for some crossover points: // we use a lower crossover of the underlying multiplication @@ -294,8 +295,8 @@ #define XOVER_SCALE (1L) #else #define XOVER_SCALE (2L) ---- src/GF2X.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/GF2X.c 2016-04-26 18:22:38.933719265 -0600 +--- src/GF2X.c.orig 2016-06-02 04:59:08.000000000 -0600 ++++ src/GF2X.c 2016-06-02 09:48:29.382272820 -0600 @@ -31,6 +31,22 @@ pclmul_mul1 (unsigned long *c, unsigned __m128i bb = _mm_setr_epi64( _mm_cvtsi64_m64(b), _mm_cvtsi64_m64(0)); _mm_storeu_si128((__m128i*)c, _mm_clmulepi64_si128(aa, bb, 0)); @@ -639,9 +640,9 @@ void LeftShift(GF2X& c, const GF2X& a, long n) ---- src/InitSettings.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/InitSettings.c 2016-04-26 20:06:27.078557786 -0600 -@@ -150,6 +150,11 @@ int main() +--- src/InitSettings.c.orig 2016-06-02 04:59:09.000000000 -0600 ++++ src/InitSettings.c 2016-06-02 09:48:29.382272820 -0600 +@@ -156,6 +156,11 @@ int main() cout << "NTL_RANGE_CHECK=0\n"; #endif @@ -653,8 +654,8 @@ // the following is synthetically defined #ifdef NTL_LONGLONG_SP_MULMOD ---- src/lzz_pX1.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/lzz_pX1.c 2016-04-26 18:22:38.934719184 -0600 +--- src/lzz_pX1.c.orig 2016-06-02 04:59:08.000000000 -0600 ++++ src/lzz_pX1.c 2016-06-02 09:48:29.383272740 -0600 @@ -4,6 +4,12 @@ #ifdef NTL_HAVE_AVX @@ -1083,8 +1084,8 @@ default: LogicError("CompMod: bad strategy"); ---- src/mat_lzz_p.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/mat_lzz_p.c 2016-04-26 18:22:38.937718940 -0600 +--- src/mat_lzz_p.c.orig 2016-06-02 04:59:08.000000000 -0600 ++++ src/mat_lzz_p.c 2016-06-02 09:48:29.385272578 -0600 @@ -10,6 +10,15 @@ #ifdef NTL_HAVE_AVX @@ -1101,7 +1102,7 @@ #endif NTL_START_IMPL -@@ -632,7 +641,7 @@ void mul(mat_zz_p& X, const mat_zz_p& A, +@@ -626,7 +635,7 @@ void mul(mat_zz_p& X, const mat_zz_p& A, #ifdef NTL_HAVE_LL_TYPE @@ -1110,7 +1111,7 @@ #define MAX_DBL_INT ((1L << NTL_DOUBLE_PRECISION)-1) // max int representable exactly as a double -@@ -646,19 +655,120 @@ void mul(mat_zz_p& X, const mat_zz_p& A, +@@ -640,19 +649,120 @@ void mul(mat_zz_p& X, const mat_zz_p& A, // MUL_ADD(a, b, c): a += b*c @@ -1237,7 +1238,7 @@ __m256d acc0=_mm256_load_pd(x + 0*4); __m256d acc1=_mm256_load_pd(x + 1*4); __m256d acc2=_mm256_load_pd(x + 2*4); -@@ -668,19 +778,82 @@ void muladd1_by_32(double *x, const doub +@@ -662,19 +772,82 @@ void muladd1_by_32(double *x, const doub __m256d acc6=_mm256_load_pd(x + 6*4); __m256d acc7=_mm256_load_pd(x + 7*4); @@ -1330,7 +1331,7 @@ } -@@ -694,6 +867,9 @@ void muladd1_by_32(double *x, const doub +@@ -688,6 +861,9 @@ void muladd1_by_32(double *x, const doub _mm256_store_pd(x + 7*4, acc7); } @@ -1340,7 +1341,7 @@ #else static -@@ -800,7 +976,164 @@ void muladd1_by_32(double *x, const doub +@@ -794,7 +970,164 @@ void muladd1_by_32(double *x, const doub #endif // experiment: process two rows at a time @@ -1506,7 +1507,7 @@ static void muladd2_by_32(double *x, const double *a, const double *b, long n) { -@@ -876,96 +1209,217 @@ void muladd2_by_32(double *x, const doub +@@ -870,96 +1203,217 @@ void muladd2_by_32(double *x, const doub _mm256_store_pd(x + 7*4 + 1*MAT_BLK_SZ, acc13); } @@ -1788,7 +1789,7 @@ static void muladd3_by_32(double *x, const double *a, const double *b, long n) -@@ -1066,6 +1520,32 @@ void muladd3_by_32(double *x, const doub +@@ -1060,6 +1514,32 @@ void muladd3_by_32(double *x, const doub } @@ -1821,7 +1822,7 @@ static inline void muladd_all_by_32(long first, long last, double *x, const double *a, const double *b, long n) { -@@ -1085,8 +1565,79 @@ void muladd_all_by_32(long first, long l +@@ -1079,8 +1559,79 @@ void muladd_all_by_32(long first, long l #endif } @@ -1901,7 +1902,7 @@ static inline void muladd_interval(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) { -@@ -1117,9 +1668,109 @@ void muladd_interval(double * NTL_RESTRI +@@ -1111,9 +1662,109 @@ void muladd_interval(double * NTL_RESTRI _mm256_store_pd(x + 3*4, xvec3); } } @@ -2011,7 +2012,7 @@ static inline void muladd_interval1(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) { -@@ -1165,11 +1816,74 @@ void muladd_interval1(double * NTL_RESTR +@@ -1159,11 +1810,74 @@ void muladd_interval1(double * NTL_RESTR *x += (*y)*c; } } @@ -2087,7 +2088,7 @@ static inline void muladd_interval2(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n) { -@@ -1197,13 +1911,6 @@ void muladd_interval2(double * NTL_RESTR +@@ -1191,13 +1905,6 @@ void muladd_interval2(double * NTL_RESTR } } @@ -2101,7 +2102,7 @@ #endif #endif -@@ -2245,10 +2952,10 @@ void alt_mul_LL(const mat_window_zz_p& X +@@ -2031,10 +2738,10 @@ void alt_mul_LL(const mat_window_zz_p& X } @@ -2115,7 +2116,7 @@ const const_mat_window_zz_p& A, const const_mat_window_zz_p& B) { long n = A.NumRows(); -@@ -2615,8 +3322,9 @@ void mul_base (const mat_window_zz_p& X, +@@ -2401,8 +3108,9 @@ void mul_base (const mat_window_zz_p& X, long V = MAT_BLK_SZ*4; @@ -2127,7 +2128,7 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { -@@ -2696,7 +3404,8 @@ void mul_strassen(const mat_window_zz_p& +@@ -2482,7 +3190,8 @@ void mul_strassen(const mat_window_zz_p& // this code determines if mul_base triggers blk_mul_DD, // in which case a higher crossover is used @@ -2137,7 +2138,7 @@ { long V = MAT_BLK_SZ*4; long p = zz_p::modulus(); -@@ -3196,10 +3905,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con +@@ -2982,10 +3691,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con @@ -2151,7 +2152,7 @@ { long n = A.NumRows(); -@@ -3365,10 +4074,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co +@@ -3151,10 +3860,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co @@ -2165,7 +2166,7 @@ { long n = A.NumRows(); -@@ -4126,8 +4835,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c +@@ -3912,8 +4621,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c else if (n/MAT_BLK_SZ < 4) { long V = 64; @@ -2177,7 +2178,7 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { -@@ -4152,8 +4862,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c +@@ -3938,8 +4648,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c else { long V = 4*MAT_BLK_SZ; @@ -2189,7 +2190,7 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { -@@ -4559,10 +5270,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p& +@@ -4345,10 +5056,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p& @@ -2203,7 +2204,7 @@ vec_zz_p *xp, bool trans, bool relax) { long n = A.NumRows(); -@@ -4749,10 +5460,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p& +@@ -4535,10 +5246,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p& @@ -2217,7 +2218,7 @@ vec_zz_p *xp, bool trans, bool relax) { long n = A.NumRows(); -@@ -5563,8 +6274,9 @@ void tri(zz_p& d, const mat_zz_p& A, con +@@ -5349,8 +6060,9 @@ void tri(zz_p& d, const mat_zz_p& A, con else if (n/MAT_BLK_SZ < 4) { long V = 64; @@ -2229,7 +2230,7 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { -@@ -5589,8 +6301,9 @@ void tri(zz_p& d, const mat_zz_p& A, con +@@ -5375,8 +6087,9 @@ void tri(zz_p& d, const mat_zz_p& A, con else { long V = 4*MAT_BLK_SZ; @@ -2241,7 +2242,7 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { -@@ -5836,7 +6549,7 @@ long elim_basic(const mat_zz_p& A, mat_z +@@ -5622,7 +6335,7 @@ long elim_basic(const mat_zz_p& A, mat_z #ifdef NTL_HAVE_LL_TYPE @@ -2250,7 +2251,7 @@ static inline -@@ -7289,8 +8002,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i +@@ -7075,8 +7788,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i else { long V = 4*MAT_BLK_SZ; @@ -2262,9 +2263,9 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { ---- src/QuickTest.c.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/QuickTest.c 2016-04-26 18:22:38.938718859 -0600 -@@ -335,6 +335,9 @@ cerr << "Performance Options:\n"; +--- src/QuickTest.c.orig 2016-06-02 04:59:09.000000000 -0600 ++++ src/QuickTest.c 2016-06-02 09:48:29.386272498 -0600 +@@ -339,6 +339,9 @@ cerr << "Performance Options:\n"; cerr << "NTL_PCLMUL\n"; #endif @@ -2274,8 +2275,8 @@ cerr << "\n\n"; ---- src/WizardAux.orig 2016-04-26 05:40:15.000000000 -0600 -+++ src/WizardAux 2016-04-26 18:22:38.938718859 -0600 +--- src/WizardAux.orig 2016-06-02 04:59:09.000000000 -0600 ++++ src/WizardAux 2016-06-02 09:48:29.386272498 -0600 @@ -94,6 +94,7 @@ system("make InitSettings"); 'NTL_PCLMUL' => 0, 'NTL_FFT_BIGTAB' => 0, diff --git a/ntl.spec b/ntl.spec index 89f1902..fa67172 100644 --- a/ntl.spec +++ b/ntl.spec @@ -10,7 +10,7 @@ Summary: High-performance algorithms for vectors, matrices, and polynomials Name: ntl -Version: 9.8.0 +Version: 9.9.1 Release: 1%{?dist} License: GPLv2+ @@ -136,7 +136,7 @@ done %files %doc README %license doc/copying.txt -%{_libdir}/libntl.so.24* +%{_libdir}/libntl.so.27* %files devel %doc doc/* @@ -150,6 +150,9 @@ done %changelog +* Thu Jun 2 2016 Jerry James - 9.9.1-1 +- ntl-9.9.1 + * Fri Apr 29 2016 Jerry James - 9.8.0-1 - ntl-9.8.0 - Add -loadtime-cpu patch diff --git a/sources b/sources index d04a3a8..df9dc70 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -a7e87d859511c15023169fa0fcf9903b ntl-9.8.0.tar.gz +2b189eb103e588d498ead4c6a8b09b3c ntl-9.9.1.tar.gz