Resolves: #1355780 - fix 'sort -h -k' in locales that use blank as thousands separator
This commit is contained in:
parent
6cf6cd48dd
commit
8b01f2371c
332
coreutils-8.25-sort-thousands-sep.patch
Normal file
332
coreutils-8.25-sort-thousands-sep.patch
Normal file
@ -0,0 +1,332 @@
|
||||
From c479153d77b419a6cae4551b63d2b73096c1130e Mon Sep 17 00:00:00 2001
|
||||
From: Kamil Dudka <kdudka@redhat.com>
|
||||
Date: Mon, 18 Jul 2016 19:04:43 +0200
|
||||
Subject: [PATCH 1/3] maint: sort.c: deduplicate code for traversing numbers
|
||||
|
||||
* src/sort.c (traverse_raw_number): New function for traversing numbers.
|
||||
(find_unit_order): Use traverse_raw_number() instead of open-coding it.
|
||||
(debug_key): Likewise.
|
||||
---
|
||||
src/sort.c | 63 ++++++++++++++++++++++++++++++++++----------------------------
|
||||
1 file changed, 35 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/src/sort.c b/src/sort.c
|
||||
index 5b02343..e28bb6c 100644
|
||||
--- a/src/sort.c
|
||||
+++ b/src/sort.c
|
||||
@@ -2231,18 +2231,16 @@ static char const unit_order[UCHAR_LIM] =
|
||||
#endif
|
||||
};
|
||||
|
||||
-/* Return an integer that represents the order of magnitude of the
|
||||
- unit following the number. The number may contain thousands
|
||||
- separators and a decimal point, but it may not contain leading blanks.
|
||||
- Negative numbers get negative orders; zero numbers have a zero order. */
|
||||
-
|
||||
-static int _GL_ATTRIBUTE_PURE
|
||||
-find_unit_order (char const *number)
|
||||
+/* Traverse number given as *number consisting of digits, thousands_sep, and
|
||||
+ decimal_point chars only. Returns the highest digit found in the number,
|
||||
+ or '\0' if no digit has been found. Upon return *number points at the
|
||||
+ character that immediately follows after the given number. */
|
||||
+static unsigned char
|
||||
+traverse_raw_number (char const **number)
|
||||
{
|
||||
- bool minus_sign = (*number == '-');
|
||||
- char const *p = number + minus_sign;
|
||||
- int nonzero = 0;
|
||||
+ char const *p = *number;
|
||||
unsigned char ch;
|
||||
+ unsigned char max_digit = '\0';
|
||||
|
||||
/* Scan to end of number.
|
||||
Decimals or separators not followed by digits stop the scan.
|
||||
@@ -2253,16 +2251,34 @@ find_unit_order (char const *number)
|
||||
do
|
||||
{
|
||||
while (ISDIGIT (ch = *p++))
|
||||
- nonzero |= ch - '0';
|
||||
+ if (max_digit < ch)
|
||||
+ max_digit = ch;
|
||||
}
|
||||
while (ch == thousands_sep);
|
||||
|
||||
if (ch == decimal_point)
|
||||
while (ISDIGIT (ch = *p++))
|
||||
- nonzero |= ch - '0';
|
||||
+ if (max_digit < ch)
|
||||
+ max_digit = ch;
|
||||
+
|
||||
+ *number = p - 1;
|
||||
+ return max_digit;
|
||||
+}
|
||||
+
|
||||
+/* Return an integer that represents the order of magnitude of the
|
||||
+ unit following the number. The number may contain thousands
|
||||
+ separators and a decimal point, but it may not contain leading blanks.
|
||||
+ Negative numbers get negative orders; zero numbers have a zero order. */
|
||||
|
||||
- if (nonzero)
|
||||
+static int _GL_ATTRIBUTE_PURE
|
||||
+find_unit_order (char const *number)
|
||||
+{
|
||||
+ bool minus_sign = (*number == '-');
|
||||
+ char const *p = number + minus_sign;
|
||||
+ unsigned char max_digit = traverse_raw_number (&p);
|
||||
+ if ('0' < max_digit)
|
||||
{
|
||||
+ unsigned char ch = *p;
|
||||
int order = unit_order[ch];
|
||||
return (minus_sign ? -order : order);
|
||||
}
|
||||
@@ -2655,23 +2671,14 @@ debug_key (struct line const *line, struct keyfield const *key)
|
||||
ignore_value (strtold (beg, &tighter_lim));
|
||||
else if (key->numeric || key->human_numeric)
|
||||
{
|
||||
- char *p = beg + (beg < lim && *beg == '-');
|
||||
- bool found_digit = false;
|
||||
- unsigned char ch;
|
||||
-
|
||||
- do
|
||||
+ char const *p = beg + (beg < lim && *beg == '-');
|
||||
+ unsigned char max_digit = traverse_raw_number (&p);
|
||||
+ if ('0' <= max_digit)
|
||||
{
|
||||
- while (ISDIGIT (ch = *p++))
|
||||
- found_digit = true;
|
||||
+ unsigned char ch = *p;
|
||||
+ tighter_lim = (char *) p
|
||||
+ + (key->human_numeric && unit_order[ch]);
|
||||
}
|
||||
- while (ch == thousands_sep);
|
||||
-
|
||||
- if (ch == decimal_point)
|
||||
- while (ISDIGIT (ch = *p++))
|
||||
- found_digit = true;
|
||||
-
|
||||
- if (found_digit)
|
||||
- tighter_lim = p - ! (key->human_numeric && unit_order[ch]);
|
||||
}
|
||||
else
|
||||
tighter_lim = lim;
|
||||
--
|
||||
2.5.5
|
||||
|
||||
|
||||
From 8c39465a5b0343ff7a21286dd69ed5430685d2f7 Mon Sep 17 00:00:00 2001
|
||||
From: Kamil Dudka <kdudka@redhat.com>
|
||||
Date: Mon, 18 Jul 2016 19:04:44 +0200
|
||||
Subject: [PATCH 2/3] sort: make -h work with -k and blank used as thousands
|
||||
separator
|
||||
|
||||
* src/sort.c (traverse_raw_number): Allow to skip only one occurrence
|
||||
of thousands_sep to avoid finding the unit in the next column in case
|
||||
thousands_sep matches as blank and is used as column delimiter.
|
||||
* tests/misc/sort-h-thousands-sep.sh: Add regression test for this bug.
|
||||
* tests/local.mk: Reference the test.
|
||||
* NEWS: Mention the bug fix.
|
||||
Reported at https://bugzilla.redhat.com/1355780
|
||||
Fixes http://bugs.gnu.org/24015
|
||||
---
|
||||
src/sort.c | 14 ++++++++----
|
||||
tests/local.mk | 1 +
|
||||
tests/misc/sort-h-thousands-sep.sh | 47 ++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 57 insertions(+), 5 deletions(-)
|
||||
create mode 100755 tests/misc/sort-h-thousands-sep.sh
|
||||
|
||||
diff --git a/src/sort.c b/src/sort.c
|
||||
index e28bb6c..dd3ba58 100644
|
||||
--- a/src/sort.c
|
||||
+++ b/src/sort.c
|
||||
@@ -2248,13 +2248,17 @@ traverse_raw_number (char const **number)
|
||||
to be lacking in units.
|
||||
FIXME: add support for multibyte thousands_sep and decimal_point. */
|
||||
|
||||
- do
|
||||
+ while (ISDIGIT (ch = *p++))
|
||||
{
|
||||
- while (ISDIGIT (ch = *p++))
|
||||
- if (max_digit < ch)
|
||||
- max_digit = ch;
|
||||
+ if (max_digit < ch)
|
||||
+ max_digit = ch;
|
||||
+
|
||||
+ /* Allow to skip only one occurrence of thousands_sep to avoid finding
|
||||
+ the unit in the next column in case thousands_sep matches as blank
|
||||
+ and is used as column delimiter. */
|
||||
+ if (*p == thousands_sep)
|
||||
+ ++p;
|
||||
}
|
||||
- while (ch == thousands_sep);
|
||||
|
||||
if (ch == decimal_point)
|
||||
while (ISDIGIT (ch = *p++))
|
||||
diff --git a/tests/local.mk b/tests/local.mk
|
||||
index 42d39f2..dccff8d 100644
|
||||
--- a/tests/local.mk
|
||||
+++ b/tests/local.mk
|
||||
@@ -344,6 +344,7 @@ all_tests = \
|
||||
tests/misc/sort-discrim.sh \
|
||||
tests/misc/sort-files0-from.pl \
|
||||
tests/misc/sort-float.sh \
|
||||
+ tests/misc/sort-h-thousands-sep.sh \
|
||||
tests/misc/sort-mb-tests.sh \
|
||||
tests/i18n/sort.sh \
|
||||
tests/misc/sort-merge.pl \
|
||||
diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh
|
||||
new file mode 100755
|
||||
index 0000000..17f1b6c
|
||||
--- /dev/null
|
||||
+++ b/tests/misc/sort-h-thousands-sep.sh
|
||||
@@ -0,0 +1,47 @@
|
||||
+#!/bin/sh
|
||||
+# exercise 'sort -h' in locales where thousands separator is blank
|
||||
+
|
||||
+# Copyright (C) 2016 Free Software Foundation, Inc.
|
||||
+
|
||||
+# This program is free software: you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation, either version 3 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+
|
||||
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
||||
+print_ver_ sort
|
||||
+test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \
|
||||
+ || skip_ 'The Swedish locale with blank thousands separator is unavailable.'
|
||||
+
|
||||
+tee exp1 > in << _EOF_
|
||||
+1 1k 4 003 1M
|
||||
+2k 2M 4 002 2
|
||||
+3M 3 4 001 3k
|
||||
+_EOF_
|
||||
+
|
||||
+cat > exp2 << _EOF_
|
||||
+3M 3 4 001 3k
|
||||
+1 1k 4 003 1M
|
||||
+2k 2M 4 002 2
|
||||
+_EOF_
|
||||
+
|
||||
+cat > exp3 << _EOF_
|
||||
+3M 3 4 001 3k
|
||||
+2k 2M 4 002 2
|
||||
+1 1k 4 003 1M
|
||||
+_EOF_
|
||||
+
|
||||
+for i in 1 2 3; do
|
||||
+ LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1
|
||||
+ compare "exp${i}" "out${i}" || fail=1
|
||||
+done
|
||||
+
|
||||
+Exit $fail
|
||||
--
|
||||
2.5.5
|
||||
|
||||
|
||||
From 46ef53f558e7bc1c0bc0abd62a86b40b4141e058 Mon Sep 17 00:00:00 2001
|
||||
From: Kamil Dudka <kdudka@redhat.com>
|
||||
Date: Mon, 18 Jul 2016 19:04:45 +0200
|
||||
Subject: [PATCH 3/3] sort: with -h, disallow thousands separator between
|
||||
number and unit
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
* src/sort.c (traverse_raw_number): Accept thousands separator only
|
||||
if it is immediately followed by a digit.
|
||||
* tests/misc/sort-h-thousands-sep.sh: Cover the fix for this bug.
|
||||
|
||||
Suggested by Pádraig Brady in http://bugs.gnu.org/24015
|
||||
---
|
||||
src/sort.c | 11 ++++++++++-
|
||||
tests/misc/sort-h-thousands-sep.sh | 25 +++++++++++++------------
|
||||
2 files changed, 23 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/src/sort.c b/src/sort.c
|
||||
index dd3ba58..69ef75f 100644
|
||||
--- a/src/sort.c
|
||||
+++ b/src/sort.c
|
||||
@@ -2241,6 +2241,7 @@ traverse_raw_number (char const **number)
|
||||
char const *p = *number;
|
||||
unsigned char ch;
|
||||
unsigned char max_digit = '\0';
|
||||
+ bool ends_with_thousands_sep = false;
|
||||
|
||||
/* Scan to end of number.
|
||||
Decimals or separators not followed by digits stop the scan.
|
||||
@@ -2256,10 +2257,18 @@ traverse_raw_number (char const **number)
|
||||
/* Allow to skip only one occurrence of thousands_sep to avoid finding
|
||||
the unit in the next column in case thousands_sep matches as blank
|
||||
and is used as column delimiter. */
|
||||
- if (*p == thousands_sep)
|
||||
+ ends_with_thousands_sep = (*p == thousands_sep);
|
||||
+ if (ends_with_thousands_sep)
|
||||
++p;
|
||||
}
|
||||
|
||||
+ if (ends_with_thousands_sep)
|
||||
+ {
|
||||
+ /* thousands_sep not followed by digit is not allowed. */
|
||||
+ *number = p - 2;
|
||||
+ return max_digit;
|
||||
+ }
|
||||
+
|
||||
if (ch == decimal_point)
|
||||
while (ISDIGIT (ch = *p++))
|
||||
if (max_digit < ch)
|
||||
diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh
|
||||
index 17f1b6c..3ffa89e 100755
|
||||
--- a/tests/misc/sort-h-thousands-sep.sh
|
||||
+++ b/tests/misc/sort-h-thousands-sep.sh
|
||||
@@ -18,28 +18,29 @@
|
||||
|
||||
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
||||
print_ver_ sort
|
||||
+
|
||||
test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \
|
||||
|| skip_ 'The Swedish locale with blank thousands separator is unavailable.'
|
||||
|
||||
-tee exp1 > in << _EOF_
|
||||
-1 1k 4 003 1M
|
||||
-2k 2M 4 002 2
|
||||
-3M 3 4 001 3k
|
||||
+tee exp1 exp3 > in << _EOF_
|
||||
+1 1k 1 M 4 003 1M
|
||||
+2k 2M 2 k 4 002 2
|
||||
+3M 3 3 G 4 001 3k
|
||||
_EOF_
|
||||
|
||||
cat > exp2 << _EOF_
|
||||
-3M 3 4 001 3k
|
||||
-1 1k 4 003 1M
|
||||
-2k 2M 4 002 2
|
||||
+3M 3 3 G 4 001 3k
|
||||
+1 1k 1 M 4 003 1M
|
||||
+2k 2M 2 k 4 002 2
|
||||
_EOF_
|
||||
|
||||
-cat > exp3 << _EOF_
|
||||
-3M 3 4 001 3k
|
||||
-2k 2M 4 002 2
|
||||
-1 1k 4 003 1M
|
||||
+cat > exp5 << _EOF_
|
||||
+3M 3 3 G 4 001 3k
|
||||
+2k 2M 2 k 4 002 2
|
||||
+1 1k 1 M 4 003 1M
|
||||
_EOF_
|
||||
|
||||
-for i in 1 2 3; do
|
||||
+for i in 1 2 3 5; do
|
||||
LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1
|
||||
compare "exp${i}" "out${i}" || fail=1
|
||||
done
|
||||
--
|
||||
2.5.5
|
||||
|
@ -1,7 +1,7 @@
|
||||
Summary: A set of basic GNU tools commonly used in shell scripts
|
||||
Name: coreutils
|
||||
Version: 8.25
|
||||
Release: 13%{?dist}
|
||||
Release: 14%{?dist}
|
||||
License: GPLv3+
|
||||
Group: System Environment/Base
|
||||
Url: http://www.gnu.org/software/coreutils/
|
||||
@ -19,6 +19,8 @@ Source10: coreutils-find-requires.sh
|
||||
|
||||
# From upstream
|
||||
Patch952: coreutils-8.25-intall-Z-selinux.patch
|
||||
# fix 'sort -h -k' in locales that use blank as thousands separator (#1355780)
|
||||
Patch953: coreutils-8.25-sort-thousands-sep.patch
|
||||
|
||||
# Our patches
|
||||
#general patch to workaround koji build system issues
|
||||
@ -209,9 +211,13 @@ tee DIR_COLORS{,.256color,.lightbgcolor} <src/dircolors.hin >/dev/null
|
||||
%patch951 -p1 -b .selinuxman
|
||||
%patch952 -p1
|
||||
|
||||
# upstream patches
|
||||
%patch953 -p1
|
||||
|
||||
chmod a+x \
|
||||
tests/df/direct.sh \
|
||||
tests/install/install-Z-selinux.sh \
|
||||
tests/misc/sort-h-thousands-sep.sh \
|
||||
tests/misc/sort-mb-tests.sh \
|
||||
|| :
|
||||
|
||||
@ -352,6 +358,9 @@ fi
|
||||
%license COPYING
|
||||
|
||||
%changelog
|
||||
* Tue Jul 19 2016 Kamil Dudka <kdudka@redhat.com> - 8.25-14
|
||||
- fix 'sort -h -k' in locales that use blank as thousands separator (#1355780)
|
||||
|
||||
* Thu Jul 14 2016 Kamil Dudka <kdudka@redhat.com> - 8.25-13
|
||||
- make 'sort -h' work for arbitrary column even when using UTF-8 locales
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user