ntl-10.1.0.

2016-10-20 22:54:40 -06:00 · 2016-10-20 22:54:40 -06:00 · a5419949fa
parent 14995b0b61
commit a5419949fa
4 changed files with 101 additions and 105 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@
 /ntl-9.9.1.tar.gz
 /ntl-9.10.0.tar.gz
 /ntl-9.11.0.tar.gz
+/ntl-10.1.0.tar.gz
--- a/ntl-loadtime-cpu.patch
+++ b/ntl-loadtime-cpu.patch
@ -1,16 +1,16 @@
--- doc/config.txt.orig	2016-06-21 12:46:44.000000000 -0600
-+++ doc/config.txt	2016-07-20 19:57:16.756611904 -0600
-@@ -292,6 +292,7 @@ NTL_GF2X_NOINLINE=off
+--- doc/config.txt.orig	2016-10-14 07:53:33.000000000 -0600
+++ doc/config.txt	2016-10-19 19:06:48.837854327 -0600
+@@ -300,6 +300,7 @@ NTL_AVOID_BRANCHING=off
+ NTL_GF2X_NOINLINE=off
 NTL_GF2X_ALTCODE=off
 NTL_GF2X_ALTCODE1=off
- NTL_PCLMUL=off
 +NTL_LOADTIME_CPU=off
 
 GMP_INCDIR=$(GMP_PREFIX)/include
 GMP_LIBDIR=$(GMP_PREFIX)/lib
-@@ -644,6 +645,10 @@ NTL_PCLMUL=off
- # switch to enable the PCLMUL instruction on x86 machines for faster arithmetic
- # over GF(2)[X] (without relying on the gf2x package)
+@@ -597,6 +598,10 @@ NTL_GF2X_ALTCODE1=off
+ 
+ # Yet another alternative implementation for GF2X multiplication.
 
 +NTL_LOADTIME_CPU=off
 +
@ -19,9 +19,9 @@
 
 
 ########## More GMP Options:
--- include/NTL/config.h.orig	2016-06-21 12:46:44.000000000 -0600
-+++ include/NTL/config.h	2016-07-20 19:57:16.766611105 -0600
-@@ -625,6 +625,23 @@ using the configure script.
+--- include/NTL/config.h.orig	2016-10-14 07:53:33.000000000 -0600
+++ include/NTL/config.h	2016-10-19 19:03:46.635500859 -0600
+@@ -475,6 +475,20 @@ using the configure script.
 #endif
 
 
@ -35,9 +35,6 @@
 + * CPUs.
 + *
 + * This flag is useful only on x86_64 platforms with gcc 4.8 or later.
-+ *
-+ * To re-build after changing this flag:
-+ * rm GF2X.o GF2X1.o lzz_pX1.o mat_lzz_p.o; make ntl.a
 + */
 +
 +#endif
@ -45,12 +42,12 @@
 
 
 
--- include/NTL/ctools.h.orig	2016-06-21 12:46:44.000000000 -0600
-+++ include/NTL/ctools.h	2016-07-20 19:57:16.767611025 -0600
-@@ -473,6 +473,137 @@ char *_ntl_make_aligned(char *p, long al
- // this should be big enough to satisfy any SIMD instructions,
+--- include/NTL/ctools.h.orig	2016-10-14 07:53:32.000000000 -0600
+++ include/NTL/ctools.h	2016-10-19 19:03:46.636500779 -0600
+@@ -447,6 +447,136 @@ char *_ntl_make_aligned(char *p, long al
 // and it should also be as big as a cache line
 
+ 
 +/* Determine CPU characteristics at runtime */
 +#ifdef NTL_LOADTIME_CPU
 +#if !defined(__x86_64__)
@ -174,7 +171,6 @@
 +               have_fma = 0;                                            \
 +            }                                                           \
 +         }                                                              \
-+         unsigned int eax, ebx, ecx, edx;                               \
 +         return have_avx2                                               \
 +               ? (void (*)(void))&name##_avx2                           \
 +               : (void (*)(void))&name##_fma;                           \
@ -183,11 +179,11 @@
 +   type __attribute__((ifunc ("resolve_" #name))) name params
 +#endif
 
-    
+ #ifdef NTL_HAVE_BUILTIN_CLZL
 
--- include/NTL/def_config.h.orig	2016-06-21 12:46:44.000000000 -0600
-+++ include/NTL/def_config.h	2016-07-20 19:57:16.767611025 -0600
-@@ -625,6 +625,22 @@ using the configure script.
+--- include/NTL/def_config.h.orig	2016-10-14 07:53:33.000000000 -0600
+++ include/NTL/def_config.h	2016-10-19 19:03:46.637500698 -0600
+@@ -475,6 +475,19 @@ using the configure script.
 #endif
 
 
@ -201,18 +197,15 @@
 + * CPUs.
 + *
 + * This flag is useful only on x86_64 platforms with gcc 4.8 or later.
-+ *
-+ * To re-build after changing this flag:
-+ * rm GF2X.o GF2X1.o lzz_pX1.o mat_lzz_p.o; make ntl.a
 + */
 +
 +#endif
 
 
 
--- src/cfile.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/cfile	2016-07-20 19:57:16.768610945 -0600
-@@ -625,6 +625,23 @@ using the configure script.
+--- src/cfile.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/cfile	2016-10-19 19:03:46.637500698 -0600
+@@ -475,6 +475,20 @@ using the configure script.
 #endif
 
 
@ -226,9 +219,6 @@
 + * CPUs.
 + *
 + * This flag is useful only on x86_64 platforms with gcc 4.8 or later.
-+ *
-+ * To re-build after changing this flag:
-+ * rm GF2X.o GF2X1.o lzz_pX1.o mat_lzz_p.o; make ntl.a
 + */
 +
 +#endif
@ -236,10 +226,10 @@
 
 @{WIZARD_HACK}
 
--- src/DispSettings.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/DispSettings.c	2016-07-20 19:57:16.768610945 -0600
-@@ -191,6 +191,10 @@ cout << "Performance Options:\n";
-    cout << "NTL_PCLMUL\n";
+--- src/DispSettings.cpp.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/DispSettings.cpp	2016-10-19 19:03:46.637500698 -0600
+@@ -164,6 +164,10 @@ cout << "Performance Options:\n";
+    cout << "NTL_GF2X_NOINLINE\n";
 #endif
 
 +#ifdef NTL_LOADTIME_CPU
@ -249,8 +239,8 @@
 
    cout << "***************************/\n";
    cout << "\n\n";
--- src/DoConfig.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/DoConfig	2016-07-20 19:57:16.769610865 -0600
+--- src/DoConfig.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/DoConfig	2016-10-19 19:09:03.299035983 -0600
@@ -1,7 +1,7 @@
 # This is a perl script, invoked from a shell
 
@ -260,15 +250,15 @@
 
 
 %MakeFlag = (
-@@ -83,6 +83,7 @@
- 'NTL_RANGE_CHECK'         => 'off',
- 'NTL_FFT_BIGTAB'          => 'off',
- 'NTL_FFT_LAZYMUL'         => 'off',
+@@ -82,6 +82,7 @@
+ 'NTL_GF2X_NOINLINE'       => 'off',
+ 'NTL_GF2X_ALTCODE'        => 'off',
+ 'NTL_GF2X_ALTCODE1'       => 'off',
 +'NTL_LOADTIME_CPU'        => 'off',
 
- );
 
-@@ -149,6 +150,15 @@ if ($ConfigFlag{'NTL_THREADS'} eq 'on' &
+ );
+@@ -191,6 +192,15 @@ if ($ConfigFlag{'NTL_THREAD_BOOST'} eq '
 }
 
 
@ -284,23 +274,23 @@
 
 # some special MakeVal values that are determined by SHARED
 
--- src/GF2X1.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/GF2X1.c	2016-07-20 19:57:16.770610785 -0600
+--- src/GF2X1.cpp.orig	2016-10-19 19:03:46.640500457 -0600
+++ src/GF2X1.cpp	2016-10-19 19:10:34.740694338 -0600
@@ -19,7 +19,7 @@
 // simple scaling factor for some crossover points:
 // we use a lower crossover of the underlying multiplication
 // is faster  
-#if (defined(NTL_GF2X_LIB) || defined(NTL_PCLMUL))
-+#if (defined(NTL_GF2X_LIB) || defined(NTL_PCLMUL) || defined (NTL_LOADTIME_CPU))
+-#if (defined(NTL_GF2X_LIB) || defined(NTL_HAVE_PCLMUL))
+#if (defined(NTL_GF2X_LIB) || defined(NTL_HAVE_PCLMUL) || defined(NTL_LOADTIME_CPU))
 #define XOVER_SCALE (1L)
 #else
 #define XOVER_SCALE (2L)
--- src/GF2X.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/GF2X.c	2016-07-20 19:57:16.771610706 -0600
-@@ -31,6 +31,22 @@ pclmul_mul1 (unsigned long *c, unsigned
-    __m128i bb = _mm_setr_epi64( _mm_cvtsi64_m64(b), _mm_cvtsi64_m64(0));
+--- src/GF2X.cpp.orig	2016-10-14 07:53:31.000000000 -0600
+++ src/GF2X.cpp	2016-10-19 19:46:20.799482817 -0600
+@@ -28,6 +28,22 @@ pclmul_mul1 (unsigned long *c, unsigned
    _mm_storeu_si128((__m128i*)c, _mm_clmulepi64_si128(aa, bb, 0));
 }
+ 
 +#elif defined (NTL_LOADTIME_CPU)
 +
 +#include <wmmintrin.h>
@ -311,16 +301,16 @@
 +
 +#define NTL_INLINE inline
 +
-+#define pclmul_mul1(c,a,b) do {											\
+#define pclmul_mul1(c,a,b) do {                                          \
 +   __m128i aa = _mm_setr_epi64( _mm_cvtsi64_m64(a), _mm_cvtsi64_m64(0)); \
 +   __m128i bb = _mm_setr_epi64( _mm_cvtsi64_m64(b), _mm_cvtsi64_m64(0)); \
-+   _mm_storeu_si128((__m128i*)(c), _mm_clmulepi64_si128(aa, bb, 0));	\
+   _mm_storeu_si128((__m128i*)(c), _mm_clmulepi64_si128(aa, bb, 0));     \
 +} while (0)
 +
 #else
 
 
-@@ -579,6 +595,27 @@ void add(GF2X& x, const GF2X& a, const G
+@@ -576,6 +592,27 @@ void add(GF2X& x, const GF2X& a, const G
 
 
 
@ -345,10 +335,10 @@
 +
 +#else
 +
- static 
+ static NTL_INLINE
 void mul1(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b)
 {
-@@ -592,6 +629,7 @@ NTL_EFF_BB_MUL_CODE0
+@@ -588,6 +625,7 @@ NTL_EFF_BB_MUL_CODE0
 
 }
 
@ -356,7 +346,7 @@
 
 #ifdef NTL_GF2X_NOINLINE
 
-@@ -616,6 +654,51 @@ NTL_EFF_BB_MUL_CODE0
+@@ -612,6 +650,51 @@ NTL_EFF_BB_MUL_CODE0
 #endif
 
 
@ -408,7 +398,7 @@
 static 
 void Mul1(_ntl_ulong *cp, const _ntl_ulong *bp, long sb, _ntl_ulong a)
 {
-@@ -643,6 +726,53 @@ NTL_EFF_BB_MUL_CODE1
+@@ -639,6 +722,53 @@ NTL_EFF_BB_MUL_CODE1
 
 }
 
@ -462,7 +452,7 @@
 static 
 void AddMul1(_ntl_ulong *cp, const _ntl_ulong* bp, long sb, _ntl_ulong a)
 {
-@@ -671,6 +801,52 @@ NTL_EFF_BB_MUL_CODE2
+@@ -667,6 +797,52 @@ NTL_EFF_BB_MUL_CODE2
 
 }
 
@ -515,15 +505,16 @@
 
 static 
 void Mul1_short(_ntl_ulong *cp, const _ntl_ulong *bp, long sb, _ntl_ulong a)
-@@ -699,9 +875,29 @@ NTL_EFF_SHORT_BB_MUL_CODE1
+@@ -695,10 +871,31 @@ NTL_EFF_SHORT_BB_MUL_CODE1
 
 }
 
 +#endif
-+
-+
+ 
+ 
+ 
 +#ifdef NTL_LOADTIME_CPU
-+
+ 
 +BASE_FUNC(void,mul_half)(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b)
 +{
 +   NTL_EFF_HALF_BB_MUL_CODE0
@ -533,28 +524,28 @@
 +{
 +   pclmul_mul1(c, a, b);
 +}
- 
+ 
 +AVX_FUNC(void,mul_half)(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b)
 +{
 +   pclmul_mul1(c, a, b);
 +}
- 
+ 
 +PCLMUL_RESOLVER(static void,mul_half,(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b));
- 
+ 
 +#else
- 
+ 
 static 
 void mul_half(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b)
-@@ -716,6 +912,8 @@ NTL_EFF_HALF_BB_MUL_CODE0
+ {
+@@ -712,6 +909,7 @@ NTL_EFF_HALF_BB_MUL_CODE0
 
 }
 
 +#endif
-+
 
 // mul2...mul8 hard-code 2x2...8x8 word multiplies.
 // I adapted these routines from LiDIA (except mul3, see below).
-@@ -1627,6 +1825,77 @@ static const _ntl_ulong sqrtab[256] = {
+@@ -1623,6 +1821,77 @@ static const _ntl_ulong sqrtab[256] = {
 
 
 
@ -632,7 +623,7 @@
 static inline
 void sqr1(_ntl_ulong *c, _ntl_ulong a)
 {
-@@ -1667,6 +1936,7 @@ void sqr(GF2X& c, const GF2X& a)
+@@ -1663,6 +1932,7 @@ void sqr(GF2X& c, const GF2X& a)
    return;
 }
 
@ -640,9 +631,9 @@
 
 
 void LeftShift(GF2X& c, const GF2X& a, long n)
--- src/InitSettings.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/InitSettings.c	2016-07-20 19:57:16.772610626 -0600
-@@ -156,6 +156,11 @@ int main()
+--- src/InitSettings.cpp.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/InitSettings.cpp	2016-10-19 19:03:46.643500216 -0600
+@@ -148,6 +148,11 @@ int main()
    cout << "NTL_RANGE_CHECK=0\n";
 #endif
 
@ -652,10 +643,10 @@
 +   cout << "NTL_LOADTIME_CPU=0\n";
 +#endif
 
- // the following is synthetically defined
- #ifdef NTL_LONGLONG_SP_MULMOD
--- src/lzz_pX1.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/lzz_pX1.c	2016-07-20 19:57:16.773610546 -0600
+ // the following are not actual config flags, but help
+ // in the Wizard logic
+--- src/lzz_pX1.cpp.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/lzz_pX1.cpp	2016-10-19 19:03:46.644500136 -0600
@@ -4,6 +4,12 @@
 
 #ifdef NTL_HAVE_AVX
@ -1084,8 +1075,8 @@
 
    default:
       LogicError("CompMod: bad strategy");
--- src/mat_lzz_p.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/mat_lzz_p.c	2016-07-20 19:57:16.776610306 -0600
+--- src/mat_lzz_p.cpp.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/mat_lzz_p.cpp	2016-10-19 19:03:46.647499895 -0600
@@ -10,6 +10,15 @@
 
 #ifdef NTL_HAVE_AVX
@ -2128,7 +2119,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -2482,7 +3190,8 @@ void mul_strassen(const mat_window_zz_p&
+@@ -2466,7 +3174,8 @@ void mul_strassen(const mat_window_zz_p&
     // this code determines if mul_base triggers blk_mul_DD,
     // in which case a higher crossover is used
 
@ -2138,7 +2129,7 @@
     {
        long V = MAT_BLK_SZ*4;
        long p = zz_p::modulus();
-@@ -2982,10 +3691,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con
+@@ -2966,10 +3675,10 @@ void alt_inv_L(zz_p& d, mat_zz_p& X, con
 
 
 
@ -2152,7 +2143,7 @@
 {
    long n = A.NumRows();
 
-@@ -3151,10 +3860,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co
+@@ -3135,10 +3844,10 @@ void alt_inv_DD(zz_p& d, mat_zz_p& X, co
 
 
 
@ -2166,7 +2157,7 @@
 {
    long n = A.NumRows();
 
-@@ -3912,8 +4621,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
+@@ -3896,8 +4605,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
    else if (n/MAT_BLK_SZ < 4) {
       long V = 64;
 
@ -2178,7 +2169,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -3938,8 +4648,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
+@@ -3922,8 +4632,9 @@ void relaxed_inv(zz_p& d, mat_zz_p& X, c
    else {
       long V = 4*MAT_BLK_SZ;
 
@ -2190,7 +2181,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -4345,10 +5056,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p&
+@@ -4329,10 +5040,10 @@ void alt_tri_L(zz_p& d, const mat_zz_p&
 
 
 
@ -2204,7 +2195,7 @@
                vec_zz_p *xp, bool trans, bool relax)
 {
    long n = A.NumRows();
-@@ -4535,10 +5246,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p&
+@@ -4519,10 +5230,10 @@ void alt_tri_DD(zz_p& d, const mat_zz_p&
 
 
 
@ -2218,7 +2209,7 @@
                vec_zz_p *xp, bool trans, bool relax)
 {
    long n = A.NumRows();
-@@ -5349,8 +6060,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
+@@ -5333,8 +6044,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
    else if (n/MAT_BLK_SZ < 4) {
       long V = 64;
 
@ -2230,7 +2221,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -5375,8 +6087,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
+@@ -5359,8 +6071,9 @@ void tri(zz_p& d, const mat_zz_p& A, con
    else {
       long V = 4*MAT_BLK_SZ;
 
@ -2242,7 +2233,7 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
-@@ -5622,7 +6335,7 @@ long elim_basic(const mat_zz_p& A, mat_z
+@@ -5606,7 +6319,7 @@ long elim_basic(const mat_zz_p& A, mat_z
 #ifdef NTL_HAVE_LL_TYPE
 
 
@ -2251,7 +2242,7 @@
 
 
 static inline
-@@ -7075,8 +7788,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i
+@@ -7059,8 +7772,9 @@ long elim(const mat_zz_p& A, mat_zz_p *i
    else {
       long V = 4*MAT_BLK_SZ;
 
@ -2263,10 +2254,10 @@
           V <= (MAX_DBL_INT-(p-1))/(p-1) &&
           V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) {
 
--- src/QuickTest.c.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/QuickTest.c	2016-07-20 19:57:16.777610226 -0600
-@@ -339,6 +339,9 @@ cerr << "Performance Options:\n";
-    cerr << "NTL_PCLMUL\n";
+--- src/QuickTest.cpp.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/QuickTest.cpp	2016-10-19 19:03:46.647499895 -0600
+@@ -316,6 +316,9 @@ cerr << "Performance Options:\n";
+    cerr << "NTL_GF2X_NOINLINE\n";
 #endif
 
 +#ifdef NTL_LOADTIME_CPU
@ -2275,10 +2266,10 @@
 
    cerr << "\n\n";
 
--- src/WizardAux.orig	2016-06-21 12:46:44.000000000 -0600
-+++ src/WizardAux	2016-07-20 19:57:16.777610226 -0600
-@@ -94,6 +94,7 @@ system("make InitSettings");
- 'NTL_PCLMUL'           => 0,
+--- src/WizardAux.orig	2016-10-14 07:53:32.000000000 -0600
+++ src/WizardAux	2016-10-19 19:03:46.647499895 -0600
+@@ -88,6 +88,7 @@ system("$ARGV[0] InitSettings");
+ 'NTL_GF2X_NOINLINE'    => 0,
 'NTL_FFT_BIGTAB'       => 0,
 'NTL_FFT_LAZYMUL'      => 0,
 +'NTL_LOADTIME_CPU'     => 0,
--- a/ntl.spec
+++ b/ntl.spec
@ -10,10 +10,10 @@

 Summary: High-performance algorithms for vectors, matrices, and polynomials 
 Name:    ntl 
-Version: 9.11.0
+Version: 10.1.0
 Release: 1%{?dist}

-License: GPLv2+
+License: LGPLv2+
 URL:     http://shoup.net/ntl/ 

 Source0: http://shoup.net/ntl/%{name}-%{version}.tar.gz
@ -79,11 +79,12 @@ pushd src
  LIBDIR=%{_libdir} \
  NATIVE=off \
  %{?gf2x:NTL_GF2X_LIB=on} \
-  NTL_PCLMUL=off \
  NTL_DISABLE_TLS_HACK=on \
 %ifarch x86_64
  NTL_LOADTIME_CPU=on \
-  WIZARD=off \
+  TUNE=x86 \
+%else
+  TUNE=generic \
 %endif
  SHARED=on
 popd
@ -136,7 +137,7 @@ done
 %files
 %doc README
 %license doc/copying.txt
-%{_libdir}/libntl.so.29*
+%{_libdir}/libntl.so.31*

 %files devel 
 %doc doc/*
@ -150,6 +151,9 @@ done


 %changelog
+* Thu Oct 20 2016 Jerry James <loganjerry@gmail.com> - 10.1.0-1
+- ntl-10.1.0
+
 * Mon Sep  5 2016 Jerry James <loganjerry@gmail.com> - 9.11.0-1
 - ntl-9.11.0

--- a/2
+++ b/2
@ -1 +1 @@
-e87daf6ca33fbbb628df1984303f3e2c  ntl-9.11.0.tar.gz
+16b3449335163a753d45b5f1231bee23  ntl-10.1.0.tar.gz