elpa/elpa-simd.patch

78 lines
2.8 KiB
Diff

diff -up mpich/configure.ac.simd mpich/configure.ac
--- mpich/configure.ac.simd 2017-10-30 14:41:22.320909606 +0100
+++ mpich/configure.ac 2017-10-30 14:41:22.322909621 +0100
@@ -668,6 +668,8 @@ m4_foreach_w([elpa_m4_kind],[real comple
AC_LANG_PUSH([C])
+save_CFLAGS="$CFLAGS"
+CFLAGS="$CFLAGS -msse3"
if test x"${need_sse}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SSE3 with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
@@ -686,7 +688,9 @@ int main(int argc, char **argv) {
AC_MSG_ERROR([Could not compile test program, try with --disable-sse, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_SSE_INTRINSICS],[1],[gcc intrinsics SSE is supported on this CPU])
+ SSE3CFLAGS="-msse3"
fi
+CFLAGS="$save_CFLAGS"
if test x"${need_sse_assembly}" = x"yes"; then
@@ -728,6 +732,8 @@ fi
if test x"${need_avx}" = x"yes"; then
dnl check whether one can compile AVX gcc intrinsics
AC_MSG_CHECKING([whether we can compile AVX gcc intrinsics in C])
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -mavx"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
@@ -744,11 +750,15 @@ if test x"${need_avx}" = x"yes"; then
AC_MSG_ERROR([Could not compile a test program with AVX, try with --disable-avx, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
+ AVXCFLAGS="-mavx"
+ CFLAGS="$save_CFLAGS"
fi
if test x"${need_avx2}" = x"yes"; then
AC_MSG_CHECKING([whether we can compile AVX2 gcc intrinsics in C])
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -mavx2 -mfma"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
@@ -766,6 +776,8 @@ if test x"${need_avx2}" = x"yes"; then
AC_MSG_ERROR([Could not compile a test program with AVX2, try with --disable-avx2, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_AVX2],[1],[AVX2 is supported on this CPU])
+ AVX2CFLAGS="-mavx2 -mfma"
+ CFLAGS="$save_CFLAGS"
fi
@@ -923,6 +935,9 @@ AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
AC_SUBST([FC_MODINC])
AC_SUBST([FC_MODOUT])
+AC_SUBST([SSE3CFLAGS])
+AC_SUBST([AVXCFLAGS])
+AC_SUBST([AVX2CFLAGS])
AC_SUBST([OPENMP_CFLAGS])
AC_SUBST([OPENMP_FCFLAGS])
AC_SUBST([OPENMP_LDFLAGS])
diff -up mpich/Makefile.am.simd mpich/Makefile.am
--- mpich/Makefile.am.simd 2017-09-01 08:54:37.000000000 +0200
+++ mpich/Makefile.am 2017-10-30 14:41:22.322909621 +0100
@@ -33,6 +33,7 @@ endif
# internal parts
noinst_LTLIBRARIES += libelpa@SUFFIX@_private.la
libelpa@SUFFIX@_private_la_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
+libelpa@SUFFIX@_private_la_CFLAGS = $(SSE3CFLAGS) $(AVXCFLAGS) $(AVX2CFLAGS) $(AM_CFLAGS)
libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa_impl.F90 \
src/elpa_abstract_impl.F90 \