From eb16415e7a221a59e23165b3548398f69ce34e8f Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 3 Dec 2014 13:12:37 -0500 Subject: [PATCH] aarch64: revert optimized strchrnul.S implementation (rhbz#1167501) --- glibc-aarch64-strchrnul-revert.patch | 144 +++++++++++++++++++++++++++ glibc.spec | 8 +- 2 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 glibc-aarch64-strchrnul-revert.patch diff --git a/glibc-aarch64-strchrnul-revert.patch b/glibc-aarch64-strchrnul-revert.patch new file mode 100644 index 0000000..d448de0 --- /dev/null +++ b/glibc-aarch64-strchrnul-revert.patch @@ -0,0 +1,144 @@ +commit 3ec7d8a6b30659b34693730a374c0265a191c4ba +Author: Kyle McMartin +Date: Wed Dec 3 12:26:23 2014 -0500 + + Revert "[AArch64] Add optimized strchrnul." + + This reverts commit be9d4ccc7fe62751db1a5fdcb31958561dbbda9a. + +diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S +deleted file mode 100644 +index b98c2e9..0000000 +--- a/sysdeps/aarch64/strchrnul.S ++++ /dev/null +@@ -1,130 +0,0 @@ +-/* strchrnul - find a character or nul in a string +- +- Copyright (C) 2014 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +- +-/* Assumptions: +- * +- * ARMv8-a, AArch64 +- * Neon Available. +- */ +- +-/* Arguments and results. */ +-#define srcin x0 +-#define chrin w1 +- +-#define result x0 +- +-/* Locals and temporaries. */ +- +-#define src x2 +-#define tmp1 x3 +-#define wtmp2 w4 +-#define tmp3 x5 +- +-#define vrepchr v0 +-#define vdata1 v1 +-#define vdata2 v2 +-#define vhas_nul1 v3 +-#define vhas_nul2 v4 +-#define vhas_chr1 v5 +-#define vhas_chr2 v6 +-#define vrepmask v15 +-#define vend1 v16 +- +-/* Core algorithm. +- +- For each 32-byte hunk we calculate a 64-bit syndrome value, with +- two bits per byte (LSB is always in bits 0 and 1, for both big +- and little-endian systems). For each tuple, bit 0 is set iff +- the relevant byte matched the requested character or nul. Since the +- bits in the syndrome reflect exactly the order in which things occur +- in the original string a count_trailing_zeros() operation will +- identify exactly which byte is causing the termination. */ +- +-ENTRY (__strchrnul) +- /* Magic constant 0x40100401 to allow us to identify which lane +- matches the termination condition. */ +- mov wtmp2, #0x0401 +- movk wtmp2, #0x4010, lsl #16 +- dup vrepchr.16b, chrin +- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ +- dup vrepmask.4s, wtmp2 +- ands tmp1, srcin, #31 +- b.eq L(loop) +- +- /* Input string is not 32-byte aligned. Rather than forcing +- the padding bytes to a safe value, we calculate the syndrome +- for all the bytes, but then mask off those bits of the +- syndrome that are related to the padding. */ +- ld1 {vdata1.16b, vdata2.16b}, [src], #32 +- neg tmp1, tmp1 +- cmeq vhas_nul1.16b, vdata1.16b, #0 +- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b +- cmeq vhas_nul2.16b, vdata2.16b, #0 +- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b +- orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b +- orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b +- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b +- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b +- lsl tmp1, tmp1, #1 +- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 +- mov tmp3, #~0 +- addp vend1.16b, vend1.16b, vend1.16b // 128->64 +- lsr tmp1, tmp3, tmp1 +- +- mov tmp3, vend1.2d[0] +- bic tmp1, tmp3, tmp1 // Mask padding bits. +- cbnz tmp1, L(tail) +- +-L(loop): +- ld1 {vdata1.16b, vdata2.16b}, [src], #32 +- cmeq vhas_nul1.16b, vdata1.16b, #0 +- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b +- cmeq vhas_nul2.16b, vdata2.16b, #0 +- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b +- /* Use a fast check for the termination condition. */ +- orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b +- orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b +- orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b +- addp vend1.2d, vend1.2d, vend1.2d +- mov tmp1, vend1.2d[0] +- cbz tmp1, L(loop) +- +- /* Termination condition found. Now need to establish exactly why +- we terminated. */ +- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b +- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b +- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 +- addp vend1.16b, vend1.16b, vend1.16b // 128->64 +- +- mov tmp1, vend1.2d[0] +-L(tail): +- /* Count the trailing zeros, by bit reversing... */ +- rbit tmp1, tmp1 +- /* Re-bias source. */ +- sub src, src, #32 +- clz tmp1, tmp1 /* ... and counting the leading zeros. */ +- /* tmp1 is twice the offset into the fragment. */ +- add result, src, tmp1, lsr #1 +- ret +- +-END(__strchrnul) +-weak_alias (__strchrnul, strchrnul) diff --git a/glibc.spec b/glibc.spec index dcfe355..5bf7e68 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.20-276-g0e7e69b %define glibcversion 2.20.90 -%define glibcrelease 10%{?dist} +%define glibcrelease 11%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -237,6 +237,7 @@ Patch2031: %{name}-rh1070416.patch Patch2033: %{name}-aarch64-tls-fixes.patch Patch2034: %{name}-aarch64-workaround-nzcv-clobber-in-tlsdesc.patch +Patch2035: %{name}-aarch64-strchrnul-revert.patch ############################################################################## # End of glibc patches. @@ -569,6 +570,7 @@ package or when debugging this package. %patch0047 -p1 %patch2033 -p1 %patch2034 -p1 +%patch2035 -p1 %patch0050 -p1 %patch0052 -p1 %patch0053 -p1 @@ -1725,6 +1727,10 @@ rm -f *.filelist* %endif %changelog +* Wed Dec 03 2014 Kyle McMartin - 2.20.90-11 +- aarch64: revert optimized strchrnul.S implementation (rhbz#1167501) + until it can be debugged. + * Fri Nov 28 2014 Carlos O'Donell - 2.20.90-10 - Auto-sync with upstream master.