Add upstream fix to fix SIMD crash on aarch64 (rhbz #1368569)

This commit is contained in:
Peter Robinson 2016-09-13 20:33:52 +01:00
parent 5af0418032
commit 81c58f0640
2 changed files with 168 additions and 2 deletions

161
libjpeg-turbo-aarch64.patch Normal file
View File

@ -0,0 +1,161 @@
From 1120ff29a178ee666504f0067e7c079a6b792296 Mon Sep 17 00:00:00 2001
From: DRC <information@libjpeg-turbo.org>
Date: Wed, 13 Jul 2016 12:15:02 -0500
Subject: [PATCH] Fix AArch64 ABI conformance issue in SIMD code
In the AArch64 ABI, the high (unused) DWORD of a 32-bit argument's
register is undefined, so it was incorrect to use 64-bit
instructions to transfer a JDIMENSION argument in the 64-bit NEON SIMD
functions. The code worked thus far only because the existing compiler
optimizers weren't smart enough to do anything else with the register in
question, so the upper 32 bits happened to be all zeroes.
The latest builds of Clang/LLVM have a smarter optimizer, and under
certain circumstances, it will attempt to load-combine adjacent 32-bit
integers from one of the libjpeg structures into a single 64-bit integer
and pass that 64-bit integer as a 32-bit argument to one of the SIMD
functions (which is allowed by the ABI, since the upper 32 bits of the
32-bit argument's register are undefined.) This caused the
libjpeg-turbo regression tests to crash.
This patch tries to use the Wn registers whenever possible. Otherwise,
it uses a zero-extend instruction to avoid using the upper 32 bits of
the 64-bit registers, which are not guaranteed to be valid for 32-bit
arguments.
Based on https://github.com/sebpop/libjpeg-turbo/commit/1fbae13021eb98f6fffdfaf8678fcdb00b0b04d9
Closes #91. Refer also to android-ndk/ndk#110 and
https://llvm.org/bugs/show_bug.cgi?id=28393
---
simd/jsimd_arm64_neon.S | 50 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/simd/jsimd_arm64_neon.S b/simd/jsimd_arm64_neon.S
index 74d6c76..6c1a959 100644
--- a/simd/jsimd_arm64_neon.S
+++ b/simd/jsimd_arm64_neon.S
@@ -210,6 +210,11 @@ asm_function jsimd_idct_islow_neon
TMP7 .req x13
TMP8 .req x14
+ /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
+ guarantee that the upper (unused) 32 bits of x3 are valid. This
+ instruction ensures that those bits are set to zero. */
+ uxtw x3, w3
+
sub sp, sp, #64
adr x15, Ljsimd_idct_islow_neon_consts
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32
@@ -807,6 +812,11 @@ asm_function jsimd_idct_ifast_neon
TMP7 .req x13
TMP8 .req x14
+ /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
+ guarantee that the upper (unused) 32 bits of x3 are valid. This
+ instruction ensures that those bits are set to zero. */
+ uxtw x3, w3
+
/* Load and dequantize coefficients into NEON registers
* with the following allocation:
* 0 1 2 3 | 4 5 6 7
@@ -1101,6 +1111,11 @@ asm_function jsimd_idct_4x4_neon
TMP3 .req x2
TMP4 .req x15
+ /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
+ guarantee that the upper (unused) 32 bits of x3 are valid. This
+ instruction ensures that those bits are set to zero. */
+ uxtw x3, w3
+
/* Save all used NEON registers */
sub sp, sp, 272
str x15, [sp], 16
@@ -1299,6 +1314,11 @@ asm_function jsimd_idct_2x2_neon
TMP1 .req x0
TMP2 .req x15
+ /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
+ guarantee that the upper (unused) 32 bits of x3 are valid. This
+ instruction ensures that those bits are set to zero. */
+ uxtw x3, w3
+
/* vpush {v8.4h - v15.4h} ; not available */
sub sp, sp, 208
str x15, [sp], 16
@@ -1688,11 +1708,11 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
.else
asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
.endif
- OUTPUT_WIDTH .req x0
+ OUTPUT_WIDTH .req w0
INPUT_BUF .req x1
- INPUT_ROW .req x2
+ INPUT_ROW .req w2
OUTPUT_BUF .req x3
- NUM_ROWS .req x4
+ NUM_ROWS .req w4
INPUT_BUF0 .req x5
INPUT_BUF1 .req x6
@@ -1702,7 +1722,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
Y .req x8
U .req x9
V .req x10
- N .req x15
+ N .req w15
sub sp, sp, 336
str x15, [sp], 16
@@ -1745,11 +1765,10 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
cmp NUM_ROWS, #1
b.lt 9f
0:
- lsl x16, INPUT_ROW, #3
- ldr Y, [INPUT_BUF0, x16]
- ldr U, [INPUT_BUF1, x16]
+ ldr Y, [INPUT_BUF0, INPUT_ROW, uxtw #3]
+ ldr U, [INPUT_BUF1, INPUT_ROW, uxtw #3]
mov N, OUTPUT_WIDTH
- ldr V, [INPUT_BUF2, x16]
+ ldr V, [INPUT_BUF2, INPUT_ROW, uxtw #3]
add INPUT_ROW, INPUT_ROW, #1
ldr RGB, [OUTPUT_BUF], #8
@@ -2054,8 +2073,8 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
OUTPUT_WIDTH .req w0
INPUT_BUF .req x1
OUTPUT_BUF .req x2
- OUTPUT_ROW .req x3
- NUM_ROWS .req x4
+ OUTPUT_ROW .req w3
+ NUM_ROWS .req w4
OUTPUT_BUF0 .req x5
OUTPUT_BUF1 .req x6
@@ -2089,10 +2108,10 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
cmp NUM_ROWS, #1
b.lt 9f
0:
- ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, lsl #3]
- ldr U, [OUTPUT_BUF1, OUTPUT_ROW, lsl #3]
+ ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, uxtw #3]
+ ldr U, [OUTPUT_BUF1, OUTPUT_ROW, uxtw #3]
mov N, OUTPUT_WIDTH
- ldr V, [OUTPUT_BUF2, OUTPUT_ROW, lsl #3]
+ ldr V, [OUTPUT_BUF2, OUTPUT_ROW, uxtw #3]
add OUTPUT_ROW, OUTPUT_ROW, #1
ldr RGB, [INPUT_BUF], #8
@@ -2199,6 +2218,11 @@ asm_function jsimd_convsamp_neon
TMP8 .req x4
TMPDUP .req w3
+ /* START_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
+ guarantee that the upper (unused) 32 bits of x1 are valid. This
+ instruction ensures that those bits are set to zero. */
+ uxtw x1, w1
+
mov TMPDUP, #128
ldp TMP1, TMP2, [SAMPLE_DATA], 16
ldp TMP3, TMP4, [SAMPLE_DATA], 16

View File

@ -1,13 +1,14 @@
Name: libjpeg-turbo
Version: 1.5.0
Release: 1%{?dist}
Summary: A MMX/SSE2 accelerated library for manipulating JPEG image files
Release: 2%{?dist}
Summary: A MMX/SSE2/SIMD accelerated library for manipulating JPEG image files
License: IJG
URL: http://sourceforge.net/projects/libjpeg-turbo
Source0: http://downloads.sourceforge.net/%{name}/%{name}-%{version}.tar.gz
Patch0: libjpeg-turbo14-noinst.patch
Patch1: libjpeg-turbo-header-files.patch
Patch2: libjpeg-turbo-aarch64.patch
BuildRequires: autoconf
BuildRequires: automake
@ -71,6 +72,7 @@ manipulate JPEG files using the TurboJPEG library.
%setup -q
%patch0 -p1 -b .noinst
%patch1 -p1 -b .header-files
%patch2 -p1 -b .aarch64
%build
autoreconf -vif
@ -167,6 +169,9 @@ make test %{?_smp_mflags}
%{_libdir}/pkgconfig/libturbojpeg.pc
%changelog
* Tue Sep 13 2016 Peter Robinson <pbrobinson@fedoraproject.org> 1.5.0-2
- Add upstream fix to fix SIMD crash on aarch64 (rhbz #1368569)
* Tue Jun 21 2016 Petr Hracek <phracek@redhat.com> - 1.5.0-1
- New upstream release 1.5.0 (#1343786)