ntl-9.9.1

2016-06-02 13:06:01 -06:00 · 2016-06-02 13:06:01 -06:00 · e69d8b7672
commit e69d8b7672
parent 0da5f48a1e
4 changed files with 71 additions and 66 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,4 @@
 /ntl-9.6.4.tar.gz
 /ntl-9.7.0.tar.gz
 /ntl-9.8.0.tar.gz
+/ntl-9.9.1.tar.gz
--- a/ntl-loadtime-cpu.patch
+++ b/ntl-loadtime-cpu.patch
@ -1,6 +1,6 @@
--- doc/config.txt.orig	2016-04-26 05:40:15.000000000 -0600
-+++ doc/config.txt	2016-04-26 18:22:38.925719916 -0600
-@@ -291,6 +291,7 @@ NTL_GF2X_NOINLINE=off
+--- doc/config.txt.orig	2016-06-02 04:59:09.000000000 -0600
+++ doc/config.txt	2016-06-02 09:48:29.369273869 -0600
+@@ -292,6 +292,7 @@ NTL_GF2X_NOINLINE=off
 NTL_GF2X_ALTCODE=off
 NTL_GF2X_ALTCODE1=off
 NTL_PCLMUL=off
@ -8,7 +8,7 @@
 
 GMP_INCDIR=$(GMP_PREFIX)/include
 GMP_LIBDIR=$(GMP_PREFIX)/lib
-@@ -638,6 +639,10 @@ NTL_PCLMUL=off
+@@ -644,6 +645,10 @@ NTL_PCLMUL=off
 # switch to enable the PCLMUL instruction on x86 machines for faster arithmetic
 # over GF(2)[X] (without relying on the gf2x package)
 
@ -19,9 +19,9 @@
 
 
 ########## More GMP Options:
--- include/NTL/config.h.orig	2016-04-26 05:40:16.000000000 -0600
-+++ include/NTL/config.h	2016-04-26 18:22:38.929719590 -0600
-@@ -616,6 +616,23 @@ using the configure script.
+--- include/NTL/config.h.orig	2016-06-02 04:59:09.000000000 -0600
+++ include/NTL/config.h	2016-06-02 09:48:29.374273466 -0600
+@@ -625,6 +625,23 @@ using the configure script.
 #endif
 
 
@ -45,8 +45,8 @@
 
 
 
--- include/NTL/ctools.h.orig	2016-04-26 05:40:15.000000000 -0600
-+++ include/NTL/ctools.h	2016-04-26 18:22:38.930719509 -0600
+--- include/NTL/ctools.h.orig	2016-06-02 04:59:09.000000000 -0600
+++ include/NTL/ctools.h	2016-06-02 09:48:29.376273304 -0600
@@ -422,6 +422,137 @@ void _ntl_swap(T*& a, T*& b)
 // this should be big enough to satisfy any SIMD instructions,
 // and it should also be as big as a cache line
@ -185,9 +185,9 @@
 
    
 
--- include/NTL/def_config.h.orig	2016-04-26 05:40:15.000000000 -0600
-+++ include/NTL/def_config.h	2016-04-26 18:22:38.930719509 -0600
-@@ -616,6 +616,22 @@ using the configure script.
+--- include/NTL/def_config.h.orig	2016-06-02 04:59:09.000000000 -0600
+++ include/NTL/def_config.h	2016-06-02 09:48:29.377273224 -0600
+@@ -625,6 +625,22 @@ using the configure script.
 #endif
 
 
@ -210,9 +210,9 @@
 
 
 
--- src/cfile.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/cfile	2016-04-26 18:22:38.931719428 -0600
-@@ -616,6 +616,23 @@ using the configure script.
+--- src/cfile.orig	2016-06-02 04:59:09.000000000 -0600
+++ src/cfile	2016-06-02 09:48:29.377273224 -0600
+@@ -625,6 +625,23 @@ using the configure script.
 #endif
 
 
@ -236,20 +236,21 @@
 
 @{WIZARD_HACK}
 
--- src/DispSettings.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/DispSettings.c	2016-04-26 18:22:38.931719428 -0600
-@@ -186,6 +186,9 @@ cout << "Performance Options:\n";
+--- src/DispSettings.c.orig	2016-06-02 04:59:09.000000000 -0600
+++ src/DispSettings.c	2016-06-02 09:50:13.512867963 -0600
+@@ -191,6 +191,10 @@ cout << "Performance Options:\n";
    cout << "NTL_PCLMUL\n";
 #endif
 
 +#ifdef NTL_LOADTIME_CPU
 +   cout << "NTL_LOADTIME_CPU\n";
 +#endif
+
 
+    cout << "***************************/\n";
    cout << "\n\n";
- 
--- src/DoConfig.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/DoConfig	2016-04-26 18:24:47.237292382 -0600
+--- src/DoConfig.orig	2016-06-02 04:59:09.000000000 -0600
+++ src/DoConfig	2016-06-02 09:48:29.379273062 -0600
@@ -1,7 +1,7 @@
 # This is a perl script, invoked from a shell
 
@ -259,7 +260,7 @@
 
 
 %MakeFlag = (
-@@ -82,6 +82,7 @@
+@@ -83,6 +83,7 @@
 'NTL_RANGE_CHECK'         => 'off',
 'NTL_FFT_BIGTAB'          => 'off',
 'NTL_FFT_LAZYMUL'         => 'off',
@ -267,7 +268,7 @@
 
 );
 
-@@ -148,6 +149,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' &
+@@ -149,6 +150,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' &
 }
 
 
@ -283,8 +284,8 @@
 
 # some special MakeVal values that are determined by SHARED
 
--- src/GF2X1.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/GF2X1.c	2016-04-26 18:22:38.933719265 -0600
+--- src/GF2X1.c.orig	2016-06-02 04:59:08.000000000 -0600
+++ src/GF2X1.c	2016-06-02 09:48:29.381272901 -0600
@@ -19,7 +19,7 @@
 // simple scaling factor for some crossover points:
 // we use a lower crossover of the underlying multiplication
@ -294,8 +295,8 @@
 #define XOVER_SCALE (1L)
 #else
 #define XOVER_SCALE (2L)
--- src/GF2X.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/GF2X.c	2016-04-26 18:22:38.933719265 -0600
+--- src/GF2X.c.orig	2016-06-02 04:59:08.000000000 -0600
+++ src/GF2X.c	2016-06-02 09:48:29.382272820 -0600
@@ -31,6 +31,22 @@ pclmul_mul1 (unsigned long *c, unsigned
    __m128i bb = _mm_setr_epi64( _mm_cvtsi64_m64(b), _mm_cvtsi64_m64(0));
    _mm_storeu_si128((__m128i*)c, _mm_clmulepi64_si128(aa, bb, 0));
@ -639,9 +640,9 @@
 
 
 void LeftShift(GF2X& c, const GF2X& a, long n)
--- src/InitSettings.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/InitSettings.c	2016-04-26 20:06:27.078557786 -0600
-@@ -150,6 +150,11 @@ int main()
+--- src/InitSettings.c.orig	2016-06-02 04:59:09.000000000 -0600
+++ src/InitSettings.c	2016-06-02 09:48:29.382272820 -0600
+@@ -156,6 +156,11 @@ int main()
    cout << "NTL_RANGE_CHECK=0\n";
 #endif
 
@ -653,8 +654,8 @@
 
 // the following is synthetically defined
 #ifdef NTL_LONGLONG_SP_MULMOD
--- src/lzz_pX1.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/lzz_pX1.c	2016-04-26 18:22:38.934719184 -0600
+--- src/lzz_pX1.c.orig	2016-06-02 04:59:08.000000000 -0600
+++ src/lzz_pX1.c	2016-06-02 09:48:29.383272740 -0600
@@ -4,6 +4,12 @@
 
 #ifdef NTL_HAVE_AVX
@ -1083,8 +1084,8 @@
 
    default:
       LogicError("CompMod: bad strategy");
--- src/mat_lzz_p.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/mat_lzz_p.c	2016-04-26 18:22:38.937718940 -0600
+--- src/mat_lzz_p.c.orig	2016-06-02 04:59:08.000000000 -0600
+++ src/mat_lzz_p.c	2016-06-02 09:48:29.385272578 -0600
@@ -10,6 +10,15 @@
 
 #ifdef NTL_HAVE_AVX
@ -1101,7 +1102,7 @@
 #endif
 
 NTL_START_IMPL
-@@ -632,7 +641,7 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
+@@ -626,7 +635,7 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
 
 #ifdef NTL_HAVE_LL_TYPE
 
@ -1110,7 +1111,7 @@
 
 #define MAX_DBL_INT ((1L << NTL_DOUBLE_PRECISION)-1)
 // max int representable exactly as a double
-@@ -646,19 +655,120 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
+@@ -640,19 +649,120 @@ void mul(mat_zz_p& X, const mat_zz_p& A,
 
 
 // MUL_ADD(a, b, c): a += b*c
@ -1237,7 +1238,7 @@
    __m256d acc0=_mm256_load_pd(x + 0*4);
    __m256d acc1=_mm256_load_pd(x + 1*4);
    __m256d acc2=_mm256_load_pd(x + 2*4);
-@@ -668,19 +778,82 @@ void muladd1_by_32(double *x, const doub
+@@ -662,19 +772,82 @@ void muladd1_by_32(double *x, const doub
    __m256d acc6=_mm256_load_pd(x + 6*4);
    __m256d acc7=_mm256_load_pd(x + 7*4);
 
@ -1330,7 +1331,7 @@
    }
 
 
-@@ -694,6 +867,9 @@ void muladd1_by_32(double *x, const doub
+@@ -688,6 +861,9 @@ void muladd1_by_32(double *x, const doub
    _mm256_store_pd(x + 7*4, acc7);
 }
 
@ -1340,7 +1341,7 @@
 #else
 
 static
-@@ -800,7 +976,164 @@ void muladd1_by_32(double *x, const doub
+@@ -794,7 +970,164 @@ void muladd1_by_32(double *x, const doub
 #endif
 
 // experiment: process two rows at a time
@ -1506,7 +1507,7 @@
 static
 void muladd2_by_32(double *x, const double *a, const double *b, long n)
 {
-@@ -876,96 +1209,217 @@ void muladd2_by_32(double *x, const doub
+@@ -870,96 +1203,217 @@ void muladd2_by_32(double *x, const doub
    _mm256_store_pd(x + 7*4 + 1*MAT_BLK_SZ, acc13);
 
 }
@ -1788,7 +1789,7 @@
 
 static
 void muladd3_by_32(double *x, const double *a, const double *b, long n)
-@@ -1066,6 +1520,32 @@ void muladd3_by_32(double *x, const doub
+@@ -1060,6 +1514,32 @@ void muladd3_by_32(double *x, const doub
 
 }
 
@ -1821,7 +1822,7 @@
 static inline
 void muladd_all_by_32(long first, long last, double *x, const double *a, const double *b, long n)
 {
-@@ -1085,8 +1565,79 @@ void muladd_all_by_32(long first, long l
+@@ -1079,8 +1559,79 @@ void muladd_all_by_32(long first, long l
 #endif
 }
 
@ -1901,7 +1902,7 @@
 static inline
 void muladd_interval(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n)
 {
-@@ -1117,9 +1668,109 @@ void muladd_interval(double * NTL_RESTRI
+@@ -1111,9 +1662,109 @@ void muladd_interval(double * NTL_RESTRI
       _mm256_store_pd(x + 3*4, xvec3);
    }
 }
@ -2011,7 +2012,7 @@
 static inline
 void muladd_interval1(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n)
 {
-@@ -1165,11 +1816,74 @@ void muladd_interval1(double * NTL_RESTR
+@@ -1159,11 +1810,74 @@ void muladd_interval1(double * NTL_RESTR
       *x += (*y)*c;
    }
 }
@ -2087,7 +2088,7 @@
 static inline
 void muladd_interval2(double * NTL_RESTRICT x, double * NTL_RESTRICT y, double c, long n)
 {
-@@ -1197,13 +1911,6 @@ void muladd_interval2(double * NTL_RESTR
+@@ -1191,13 +1905,6 @@ void muladd_interval2(double * NTL_RESTR
    }
    
 }
@ -2101,7 +2102,7 @@
 #endif
 
 #endif
-@@ -2245,10 +2952,10 @@ void alt_mul_LL(const mat_window_zz_p& X
+@@ -2031,10 +2738,10 @@ void alt_mul_LL(const mat_window_zz_p& X
 }  
 
 
@ -2115,7 +2116,7 @@
                 const const_mat_window_zz_p& A, const const_mat_window_zz_p& B)  
 {  
    long n = A.NumRows();  
-@@ -2615,8 +3322,9 @@ void mul_base (const mat_window_zz_p& X,
+@@ -2401,8 +3108,9 @@ void mul_base (const mat_window_zz_p& X,
 
       long V = MAT_BLK_SZ*4;
 
@ -2127,7 +2128,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -2696,7 +3404,8 @@ void mul_strassen(const mat_window_zz_p&
+@@ -2482,7 +3190,8 @@ void mul_strassen(const mat_window_zz_p&
     // this code determines if mul_base triggers blk_mul_DD,
     // in which case a higher crossover is used
 
@ -2137,7 +2138,7 @@
     {
        long V = MAT_BLK_SZ*4;
        long p = zz_p::modulus();
-@@ -3196,10 +3905,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con
+@@ -2982,10 +3691,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con
 
 
 
@ -2151,7 +2152,7 @@
 {
    long n = A.NumRows();
 
-@@ -3365,10 +4074,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co
+@@ -3151,10 +3860,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co
 
 
 
@ -2165,7 +2166,7 @@
 {
    long n = A.NumRows();
 
-@@ -4126,8 +4835,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
+@@ -3912,8 +4621,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
    else if (n/MAT_BLK_SZ < 4) {
       long V = 64;
 
@ -2177,7 +2178,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -4152,8 +4862,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
+@@ -3938,8 +4648,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
    else {
       long V = 4*MAT_BLK_SZ;
 
@ -2189,7 +2190,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -4559,10 +5270,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p&
+@@ -4345,10 +5056,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p&
 
 
 
@ -2203,7 +2204,7 @@
                vec_zz_p *xp, bool trans, bool relax)
 {
    long n = A.NumRows();
-@@ -4749,10 +5460,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p&
+@@ -4535,10 +5246,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p&
 
 
 
@ -2217,7 +2218,7 @@
                vec_zz_p *xp, bool trans, bool relax)
 {
    long n = A.NumRows();
-@@ -5563,8 +6274,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
+@@ -5349,8 +6060,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
    else if (n/MAT_BLK_SZ < 4) {
       long V = 64;
 
@ -2229,7 +2230,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -5589,8 +6301,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
+@@ -5375,8 +6087,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
    else {
       long V = 4*MAT_BLK_SZ;
 
@ -2241,7 +2242,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -5836,7 +6549,7 @@ long elim_basic(const mat_zz_p& A, mat_z
+@@ -5622,7 +6335,7 @@ long elim_basic(const mat_zz_p& A, mat_z
 #ifdef NTL_HAVE_LL_TYPE
 
 
@ -2250,7 +2251,7 @@
 
 
 static inline
-@@ -7289,8 +8002,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i
+@@ -7075,8 +7788,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i
    else {
       long V = 4*MAT_BLK_SZ;
 
@ -2262,9 +2263,9 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
--- src/QuickTest.c.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/QuickTest.c	2016-04-26 18:22:38.938718859 -0600
-@@ -335,6 +335,9 @@ cerr << "Performance Options:\n";
+--- src/QuickTest.c.orig	2016-06-02 04:59:09.000000000 -0600
+++ src/QuickTest.c	2016-06-02 09:48:29.386272498 -0600
+@@ -339,6 +339,9 @@ cerr << "Performance Options:\n";
    cerr << "NTL_PCLMUL\n";
 #endif
 
@ -2274,8 +2275,8 @@
 
    cerr << "\n\n";
 
--- src/WizardAux.orig	2016-04-26 05:40:15.000000000 -0600
-+++ src/WizardAux	2016-04-26 18:22:38.938718859 -0600
+--- src/WizardAux.orig	2016-06-02 04:59:09.000000000 -0600
+++ src/WizardAux	2016-06-02 09:48:29.386272498 -0600
@@ -94,6 +94,7 @@ system("make InitSettings");
 'NTL_PCLMUL'           => 0,
 'NTL_FFT_BIGTAB'       => 0,
--- a/ntl.spec
+++ b/ntl.spec
@ -10,7 +10,7 @@

 Summary: High-performance algorithms for vectors, matrices, and polynomials 
 Name:    ntl 
-Version: 9.8.0
+Version: 9.9.1
 Release: 1%{?dist}

 License: GPLv2+
@ -136,7 +136,7 @@ done
 %files
 %doc README
 %license doc/copying.txt
-%{_libdir}/libntl.so.24*
+%{_libdir}/libntl.so.27*

 %files devel 
 %doc doc/*
@ -150,6 +150,9 @@ done


 %changelog
+* Thu Jun  2 2016 Jerry James <loganjerry@gmail.com> - 9.9.1-1
+- ntl-9.9.1
+
 * Fri Apr 29 2016 Jerry James <loganjerry@gmail.com> - 9.8.0-1
 - ntl-9.8.0
 - Add -loadtime-cpu patch
--- a/2
+++ b/2
@ -1 +1 @@
-a7e87d859511c15023169fa0fcf9903b  ntl-9.8.0.tar.gz
+2b189eb103e588d498ead4c6a8b09b3c  ntl-9.9.1.tar.gz