333 lines
11 KiB
Diff
333 lines
11 KiB
Diff
From c479153d77b419a6cae4551b63d2b73096c1130e Mon Sep 17 00:00:00 2001
|
|
From: Kamil Dudka <kdudka@redhat.com>
|
|
Date: Mon, 18 Jul 2016 19:04:43 +0200
|
|
Subject: [PATCH 1/3] maint: sort.c: deduplicate code for traversing numbers
|
|
|
|
* src/sort.c (traverse_raw_number): New function for traversing numbers.
|
|
(find_unit_order): Use traverse_raw_number() instead of open-coding it.
|
|
(debug_key): Likewise.
|
|
---
|
|
src/sort.c | 63 ++++++++++++++++++++++++++++++++++----------------------------
|
|
1 file changed, 35 insertions(+), 28 deletions(-)
|
|
|
|
diff --git a/src/sort.c b/src/sort.c
|
|
index 5b02343..e28bb6c 100644
|
|
--- a/src/sort.c
|
|
+++ b/src/sort.c
|
|
@@ -2231,18 +2231,16 @@ static char const unit_order[UCHAR_LIM] =
|
|
#endif
|
|
};
|
|
|
|
-/* Return an integer that represents the order of magnitude of the
|
|
- unit following the number. The number may contain thousands
|
|
- separators and a decimal point, but it may not contain leading blanks.
|
|
- Negative numbers get negative orders; zero numbers have a zero order. */
|
|
-
|
|
-static int _GL_ATTRIBUTE_PURE
|
|
-find_unit_order (char const *number)
|
|
+/* Traverse number given as *number consisting of digits, thousands_sep, and
|
|
+ decimal_point chars only. Returns the highest digit found in the number,
|
|
+ or '\0' if no digit has been found. Upon return *number points at the
|
|
+ character that immediately follows after the given number. */
|
|
+static unsigned char
|
|
+traverse_raw_number (char const **number)
|
|
{
|
|
- bool minus_sign = (*number == '-');
|
|
- char const *p = number + minus_sign;
|
|
- int nonzero = 0;
|
|
+ char const *p = *number;
|
|
unsigned char ch;
|
|
+ unsigned char max_digit = '\0';
|
|
|
|
/* Scan to end of number.
|
|
Decimals or separators not followed by digits stop the scan.
|
|
@@ -2253,16 +2251,34 @@ find_unit_order (char const *number)
|
|
do
|
|
{
|
|
while (ISDIGIT (ch = *p++))
|
|
- nonzero |= ch - '0';
|
|
+ if (max_digit < ch)
|
|
+ max_digit = ch;
|
|
}
|
|
while (ch == thousands_sep);
|
|
|
|
if (ch == decimal_point)
|
|
while (ISDIGIT (ch = *p++))
|
|
- nonzero |= ch - '0';
|
|
+ if (max_digit < ch)
|
|
+ max_digit = ch;
|
|
+
|
|
+ *number = p - 1;
|
|
+ return max_digit;
|
|
+}
|
|
+
|
|
+/* Return an integer that represents the order of magnitude of the
|
|
+ unit following the number. The number may contain thousands
|
|
+ separators and a decimal point, but it may not contain leading blanks.
|
|
+ Negative numbers get negative orders; zero numbers have a zero order. */
|
|
|
|
- if (nonzero)
|
|
+static int _GL_ATTRIBUTE_PURE
|
|
+find_unit_order (char const *number)
|
|
+{
|
|
+ bool minus_sign = (*number == '-');
|
|
+ char const *p = number + minus_sign;
|
|
+ unsigned char max_digit = traverse_raw_number (&p);
|
|
+ if ('0' < max_digit)
|
|
{
|
|
+ unsigned char ch = *p;
|
|
int order = unit_order[ch];
|
|
return (minus_sign ? -order : order);
|
|
}
|
|
@@ -2655,23 +2671,14 @@ debug_key (struct line const *line, struct keyfield const *key)
|
|
ignore_value (strtold (beg, &tighter_lim));
|
|
else if (key->numeric || key->human_numeric)
|
|
{
|
|
- char *p = beg + (beg < lim && *beg == '-');
|
|
- bool found_digit = false;
|
|
- unsigned char ch;
|
|
-
|
|
- do
|
|
+ char const *p = beg + (beg < lim && *beg == '-');
|
|
+ unsigned char max_digit = traverse_raw_number (&p);
|
|
+ if ('0' <= max_digit)
|
|
{
|
|
- while (ISDIGIT (ch = *p++))
|
|
- found_digit = true;
|
|
+ unsigned char ch = *p;
|
|
+ tighter_lim = (char *) p
|
|
+ + (key->human_numeric && unit_order[ch]);
|
|
}
|
|
- while (ch == thousands_sep);
|
|
-
|
|
- if (ch == decimal_point)
|
|
- while (ISDIGIT (ch = *p++))
|
|
- found_digit = true;
|
|
-
|
|
- if (found_digit)
|
|
- tighter_lim = p - ! (key->human_numeric && unit_order[ch]);
|
|
}
|
|
else
|
|
tighter_lim = lim;
|
|
--
|
|
2.5.5
|
|
|
|
|
|
From 8c39465a5b0343ff7a21286dd69ed5430685d2f7 Mon Sep 17 00:00:00 2001
|
|
From: Kamil Dudka <kdudka@redhat.com>
|
|
Date: Mon, 18 Jul 2016 19:04:44 +0200
|
|
Subject: [PATCH 2/3] sort: make -h work with -k and blank used as thousands
|
|
separator
|
|
|
|
* src/sort.c (traverse_raw_number): Allow to skip only one occurrence
|
|
of thousands_sep to avoid finding the unit in the next column in case
|
|
thousands_sep matches as blank and is used as column delimiter.
|
|
* tests/misc/sort-h-thousands-sep.sh: Add regression test for this bug.
|
|
* tests/local.mk: Reference the test.
|
|
* NEWS: Mention the bug fix.
|
|
Reported at https://bugzilla.redhat.com/1355780
|
|
Fixes http://bugs.gnu.org/24015
|
|
---
|
|
src/sort.c | 14 ++++++++----
|
|
tests/local.mk | 1 +
|
|
tests/misc/sort-h-thousands-sep.sh | 47 ++++++++++++++++++++++++++++++++++++++
|
|
3 files changed, 57 insertions(+), 5 deletions(-)
|
|
create mode 100755 tests/misc/sort-h-thousands-sep.sh
|
|
|
|
diff --git a/src/sort.c b/src/sort.c
|
|
index e28bb6c..dd3ba58 100644
|
|
--- a/src/sort.c
|
|
+++ b/src/sort.c
|
|
@@ -2248,13 +2248,17 @@ traverse_raw_number (char const **number)
|
|
to be lacking in units.
|
|
FIXME: add support for multibyte thousands_sep and decimal_point. */
|
|
|
|
- do
|
|
+ while (ISDIGIT (ch = *p++))
|
|
{
|
|
- while (ISDIGIT (ch = *p++))
|
|
- if (max_digit < ch)
|
|
- max_digit = ch;
|
|
+ if (max_digit < ch)
|
|
+ max_digit = ch;
|
|
+
|
|
+ /* Allow to skip only one occurrence of thousands_sep to avoid finding
|
|
+ the unit in the next column in case thousands_sep matches as blank
|
|
+ and is used as column delimiter. */
|
|
+ if (*p == thousands_sep)
|
|
+ ++p;
|
|
}
|
|
- while (ch == thousands_sep);
|
|
|
|
if (ch == decimal_point)
|
|
while (ISDIGIT (ch = *p++))
|
|
diff --git a/tests/local.mk b/tests/local.mk
|
|
index 42d39f2..dccff8d 100644
|
|
--- a/tests/local.mk
|
|
+++ b/tests/local.mk
|
|
@@ -344,6 +344,7 @@ all_tests = \
|
|
tests/misc/sort-discrim.sh \
|
|
tests/misc/sort-files0-from.pl \
|
|
tests/misc/sort-float.sh \
|
|
+ tests/misc/sort-h-thousands-sep.sh \
|
|
tests/misc/sort-mb-tests.sh \
|
|
tests/i18n/sort.sh \
|
|
tests/misc/sort-merge.pl \
|
|
diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh
|
|
new file mode 100755
|
|
index 0000000..17f1b6c
|
|
--- /dev/null
|
|
+++ b/tests/misc/sort-h-thousands-sep.sh
|
|
@@ -0,0 +1,47 @@
|
|
+#!/bin/sh
|
|
+# exercise 'sort -h' in locales where thousands separator is blank
|
|
+
|
|
+# Copyright (C) 2016 Free Software Foundation, Inc.
|
|
+
|
|
+# This program is free software: you can redistribute it and/or modify
|
|
+# it under the terms of the GNU General Public License as published by
|
|
+# the Free Software Foundation, either version 3 of the License, or
|
|
+# (at your option) any later version.
|
|
+
|
|
+# This program is distributed in the hope that it will be useful,
|
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+# GNU General Public License for more details.
|
|
+
|
|
+# You should have received a copy of the GNU General Public License
|
|
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
+
|
|
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
|
+print_ver_ sort
|
|
+test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \
|
|
+ || skip_ 'The Swedish locale with blank thousands separator is unavailable.'
|
|
+
|
|
+tee exp1 > in << _EOF_
|
|
+1 1k 4 003 1M
|
|
+2k 2M 4 002 2
|
|
+3M 3 4 001 3k
|
|
+_EOF_
|
|
+
|
|
+cat > exp2 << _EOF_
|
|
+3M 3 4 001 3k
|
|
+1 1k 4 003 1M
|
|
+2k 2M 4 002 2
|
|
+_EOF_
|
|
+
|
|
+cat > exp3 << _EOF_
|
|
+3M 3 4 001 3k
|
|
+2k 2M 4 002 2
|
|
+1 1k 4 003 1M
|
|
+_EOF_
|
|
+
|
|
+for i in 1 2 3; do
|
|
+ LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1
|
|
+ compare "exp${i}" "out${i}" || fail=1
|
|
+done
|
|
+
|
|
+Exit $fail
|
|
--
|
|
2.5.5
|
|
|
|
|
|
From 46ef53f558e7bc1c0bc0abd62a86b40b4141e058 Mon Sep 17 00:00:00 2001
|
|
From: Kamil Dudka <kdudka@redhat.com>
|
|
Date: Mon, 18 Jul 2016 19:04:45 +0200
|
|
Subject: [PATCH 3/3] sort: with -h, disallow thousands separator between
|
|
number and unit
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
* src/sort.c (traverse_raw_number): Accept thousands separator only
|
|
if it is immediately followed by a digit.
|
|
* tests/misc/sort-h-thousands-sep.sh: Cover the fix for this bug.
|
|
|
|
Suggested by Pádraig Brady in http://bugs.gnu.org/24015
|
|
---
|
|
src/sort.c | 11 ++++++++++-
|
|
tests/misc/sort-h-thousands-sep.sh | 25 +++++++++++++------------
|
|
2 files changed, 23 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/src/sort.c b/src/sort.c
|
|
index dd3ba58..69ef75f 100644
|
|
--- a/src/sort.c
|
|
+++ b/src/sort.c
|
|
@@ -2241,6 +2241,7 @@ traverse_raw_number (char const **number)
|
|
char const *p = *number;
|
|
unsigned char ch;
|
|
unsigned char max_digit = '\0';
|
|
+ bool ends_with_thousands_sep = false;
|
|
|
|
/* Scan to end of number.
|
|
Decimals or separators not followed by digits stop the scan.
|
|
@@ -2256,10 +2257,18 @@ traverse_raw_number (char const **number)
|
|
/* Allow to skip only one occurrence of thousands_sep to avoid finding
|
|
the unit in the next column in case thousands_sep matches as blank
|
|
and is used as column delimiter. */
|
|
- if (*p == thousands_sep)
|
|
+ ends_with_thousands_sep = (*p == thousands_sep);
|
|
+ if (ends_with_thousands_sep)
|
|
++p;
|
|
}
|
|
|
|
+ if (ends_with_thousands_sep)
|
|
+ {
|
|
+ /* thousands_sep not followed by digit is not allowed. */
|
|
+ *number = p - 2;
|
|
+ return max_digit;
|
|
+ }
|
|
+
|
|
if (ch == decimal_point)
|
|
while (ISDIGIT (ch = *p++))
|
|
if (max_digit < ch)
|
|
diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh
|
|
index 17f1b6c..3ffa89e 100755
|
|
--- a/tests/misc/sort-h-thousands-sep.sh
|
|
+++ b/tests/misc/sort-h-thousands-sep.sh
|
|
@@ -18,28 +18,29 @@
|
|
|
|
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
|
print_ver_ sort
|
|
+
|
|
test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \
|
|
|| skip_ 'The Swedish locale with blank thousands separator is unavailable.'
|
|
|
|
-tee exp1 > in << _EOF_
|
|
-1 1k 4 003 1M
|
|
-2k 2M 4 002 2
|
|
-3M 3 4 001 3k
|
|
+tee exp1 exp3 > in << _EOF_
|
|
+1 1k 1 M 4 003 1M
|
|
+2k 2M 2 k 4 002 2
|
|
+3M 3 3 G 4 001 3k
|
|
_EOF_
|
|
|
|
cat > exp2 << _EOF_
|
|
-3M 3 4 001 3k
|
|
-1 1k 4 003 1M
|
|
-2k 2M 4 002 2
|
|
+3M 3 3 G 4 001 3k
|
|
+1 1k 1 M 4 003 1M
|
|
+2k 2M 2 k 4 002 2
|
|
_EOF_
|
|
|
|
-cat > exp3 << _EOF_
|
|
-3M 3 4 001 3k
|
|
-2k 2M 4 002 2
|
|
-1 1k 4 003 1M
|
|
+cat > exp5 << _EOF_
|
|
+3M 3 3 G 4 001 3k
|
|
+2k 2M 2 k 4 002 2
|
|
+1 1k 1 M 4 003 1M
|
|
_EOF_
|
|
|
|
-for i in 1 2 3; do
|
|
+for i in 1 2 3 5; do
|
|
LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1
|
|
compare "exp${i}" "out${i}" || fail=1
|
|
done
|
|
--
|
|
2.5.5
|
|
|