chromium/chromium-95-eigen-avx-3.patch
Tom spot Callaway 9de3cc7e1f 95.0.4638.69
2021-11-15 14:34:13 -05:00

45 lines
1.9 KiB
Diff

From 7792b1e909a98703181aecb8810b4b654004c25d Mon Sep 17 00:00:00 2001
From: Antonio Sanchez <cantonios@google.com>
Date: Fri, 3 Sep 2021 10:41:35 -0700
Subject: [PATCH] Fix AVX2 PacketMath.h.
There were a couple typos ps -> epi32, and an unaligned load issue.
---
Eigen/src/Core/arch/AVX/PacketMath.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h
index 8da9031dc..41cb7af9c 100644
--- a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -663,7 +663,7 @@ template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
template<> EIGEN_STRONG_INLINE Packet8i ploaddup<Packet8i>(const int* from)
{
#ifdef EIGEN_VECTORIZE_AVX2
- const Packet8i a = _mm256_castsi128_si256(pload<Packet4i>(from));
+ const Packet8i a = _mm256_castsi128_si256(ploadu<Packet4i>(from));
return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3));
#else
__m256 tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
@@ -1015,14 +1015,14 @@ ptranspose(PacketBlock<Packet8f,4>& kernel) {
#define MM256_SHUFFLE_EPI32(A, B, M) \
_mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M))
-#ifdef EIGEN_VECTORIZE_AVX2
+#ifndef EIGEN_VECTORIZE_AVX2
#define MM256_UNPACKLO_EPI32(A, B) \
_mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
#define MM256_UNPACKHI_EPI32(A, B) \
_mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B)))
#else
-#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_ps(A, B)
-#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_ps(A, B)
+#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_epi32(A, B)
+#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_epi32(A, B)
#endif
--
GitLab