From 0fc6592cf8867f0cd6d8d41b43392fb52d359649 Mon Sep 17 00:00:00 2001 From: Jose Dapena Paz Date: Tue, 7 Jun 2022 15:44:35 +0200 Subject: [PATCH] GCC: fix compilation of NEON64 extract_first_nonzero_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC fails to compile extract_first_nonzero_index because of the signedness type mismatch in the NEON intrinsics. Bug: chromium:819294 Change-Id: I9b73e5fa1d5fbf161740ab1b5d77f5c494369dfa Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3693709 Reviewed-by: Toon Verwaest Commit-Queue: José Dapena Paz Cr-Commit-Position: refs/heads/main@{#81063} --- v8/src/objects/simd.cc | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/v8/src/objects/simd.cc b/v8/rc/objects/simd.cc index d3cedfe3302..0a73b9c686d 100644 --- a/v8/src/objects/simd.cc +++ b/v8/src/objects/simd.cc @@ -95,24 +95,21 @@ inline int extract_first_nonzero_index(T v) { } template <> -inline int extract_first_nonzero_index(int32x4_t v) { - int32x4_t mask = {4, 3, 2, 1}; +inline int extract_first_nonzero_index(uint32x4_t v) { + uint32x4_t mask = {4, 3, 2, 1}; mask = vandq_u32(mask, v); return 4 - vmaxvq_u32(mask); } template <> -inline int extract_first_nonzero_index(int64x2_t v) { - int32x4_t mask = {2, 0, 1, 0}; // Could also be {2,2,1,1} or {0,2,0,1} - mask = vandq_u32(mask, vreinterpretq_s32_s64(v)); +inline int extract_first_nonzero_index(uint64x2_t v) { + uint32x4_t mask = {2, 0, 1, 0}; // Could also be {2,2,1,1} or {0,2,0,1} + mask = vandq_u32(mask, vreinterpretq_u32_u64(v)); return 2 - vmaxvq_u32(mask); } -template <> -inline int extract_first_nonzero_index(float64x2_t v) { - int32x4_t mask = {2, 0, 1, 0}; // Could also be {2,2,1,1} or {0,2,0,1} - mask = vandq_u32(mask, vreinterpretq_s32_f64(v)); - return 2 - vmaxvq_u32(mask); +inline int32_t reinterpret_vmaxvq_u64(uint64x2_t v) { + return vmaxvq_u32(vreinterpretq_u32_u64(v)); } #endif @@ -204,14 +201,14 @@ inline uintptr_t fast_search_noavx(T* array, uintptr_t array_len, } #elif defined(NEON64) if constexpr (std::is_same::value) { - VECTORIZED_LOOP_Neon(int32x4_t, int32x4_t, vdupq_n_u32, vceqq_u32, + VECTORIZED_LOOP_Neon(uint32x4_t, uint32x4_t, vdupq_n_u32, vceqq_u32, vmaxvq_u32) } else if constexpr (std::is_same::value) { - VECTORIZED_LOOP_Neon(int64x2_t, int64x2_t, vdupq_n_u64, vceqq_u64, - vmaxvq_u32) + VECTORIZED_LOOP_Neon(uint64x2_t, uint64x2_t, vdupq_n_u64, vceqq_u64, + reinterpret_vmaxvq_u64) } else if constexpr (std::is_same::value) { - VECTORIZED_LOOP_Neon(float64x2_t, float64x2_t, vdupq_n_f64, vceqq_f64, - vmaxvq_f64) + VECTORIZED_LOOP_Neon(float64x2_t, uint64x2_t, vdupq_n_f64, vceqq_f64, + reinterpret_vmaxvq_u64) } #else UNREACHABLE();