45 lines
1.9 KiB
Diff
45 lines
1.9 KiB
Diff
From 7792b1e909a98703181aecb8810b4b654004c25d Mon Sep 17 00:00:00 2001
|
|
From: Antonio Sanchez <cantonios@google.com>
|
|
Date: Fri, 3 Sep 2021 10:41:35 -0700
|
|
Subject: [PATCH] Fix AVX2 PacketMath.h.
|
|
|
|
There were a couple typos ps -> epi32, and an unaligned load issue.
|
|
---
|
|
Eigen/src/Core/arch/AVX/PacketMath.h | 8 ++++----
|
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h
|
|
index 8da9031dc..41cb7af9c 100644
|
|
--- a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h
|
|
+++ b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h
|
|
@@ -663,7 +663,7 @@ template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
|
|
template<> EIGEN_STRONG_INLINE Packet8i ploaddup<Packet8i>(const int* from)
|
|
{
|
|
#ifdef EIGEN_VECTORIZE_AVX2
|
|
- const Packet8i a = _mm256_castsi128_si256(pload<Packet4i>(from));
|
|
+ const Packet8i a = _mm256_castsi128_si256(ploadu<Packet4i>(from));
|
|
return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3));
|
|
#else
|
|
__m256 tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
|
|
@@ -1015,14 +1015,14 @@ ptranspose(PacketBlock<Packet8f,4>& kernel) {
|
|
#define MM256_SHUFFLE_EPI32(A, B, M) \
|
|
_mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M))
|
|
|
|
-#ifdef EIGEN_VECTORIZE_AVX2
|
|
+#ifndef EIGEN_VECTORIZE_AVX2
|
|
#define MM256_UNPACKLO_EPI32(A, B) \
|
|
_mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
|
|
#define MM256_UNPACKHI_EPI32(A, B) \
|
|
_mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
|
|
#else
|
|
-#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_ps(A, B)
|
|
-#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_ps(A, B)
|
|
+#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_epi32(A, B)
|
|
+#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_epi32(A, B)
|
|
#endif
|
|
|
|
|
|
--
|
|
GitLab
|
|
|