fix sort bugs (including #485715) for multibyte locales

This commit is contained in:
Ondrej Vasik 2009-03-02 12:42:55 +00:00
parent c39fe6a1cc
commit bfff2133f5
3 changed files with 44 additions and 19 deletions

View File

@ -64,7 +64,18 @@ diff -urNp coreutils-7.1-orig/src/sort.c coreutils-7.1/src/sort.c
diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
--- coreutils-7.1-orig/tests/misc/sort 2009-01-27 22:11:25.000000000 +0100
+++ coreutils-7.1/tests/misc/sort 2009-02-25 16:21:48.000000000 +0100
@@ -110,6 +110,8 @@ my @Tests =
@@ -24,6 +24,10 @@ my $prog = 'sort';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+my $mb_locale = $ENV{LOCALE_FR_UTF8};
+! defined $mb_locale || $mb_locale eq 'none'
+ and $mb_locale = 'C';
+
# Since each test is run with a file name and with redirected stdin,
# the name in the diagnostic is either the file name or "-".
# Normalize each diagnostic to use '-'.
@@ -110,6 +114,8 @@ my @Tests =
["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
@ -73,7 +84,7 @@ diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
#
# report an error for `.' without following char spec
["08a", '-k 2.,3', {EXIT=>2},
@@ -210,6 +212,10 @@ my @Tests =
@@ -210,6 +216,15 @@ my @Tests =
# key start and key end.
["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
@ -81,6 +92,11 @@ diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
+# next field are not included in the sort. I.E. order should not change here.
+["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}],
+
+# When ignoring leading blanks for start position, ensure blanks from
+# next field are not included in the sort. I.E. order should not change here.
+# This was noticed as an issue on fedora 8 (only in multibyte locales).
+["18g", '-k1b,1', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
# This looks odd, but works properly -- 2nd keyspec is never
# used because all lines are different.
["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],

View File

@ -1938,7 +1938,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
if (newlim)
lim = newlim;
}
@@ -1384,6 +1570,107 @@
@@ -1384,6 +1570,113 @@
return ptr;
}
@ -1952,6 +1952,9 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
+ size_t mblength;
+ mbstate_t state;
+
+ if (echar == 0)
+ eword++; /* skip all of end field. */
+
+ memset (&state, '\0', sizeof(mbstate_t));
+
+ if (tab_length)
@ -2020,24 +2023,27 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
+ }
+# endif
+
+ /* If we're skipping leading blanks, don't start counting characters
+ * until after skipping past any leading blanks. */
+ if (key->skipsblanks)
+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
+ ptr += mblength;
+ if (echar != 0)
+ {
+ /* If we're skipping leading blanks, don't start counting characters
+ * until after skipping past any leading blanks. */
+ if (key->skipsblanks)
+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
+ ptr += mblength;
+
+ memset (&state, '\0', sizeof(mbstate_t));
+ memset (&state, '\0', sizeof(mbstate_t));
+
+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
+ for (i = 0; i < echar; i++)
+ {
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
+ for (i = 0; i < echar; i++)
+ {
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
+
+ if (ptr + mblength > lim)
+ break;
+ else
+ ptr += mblength;
+ }
+ if (ptr + mblength > lim)
+ break;
+ else
+ ptr += mblength;
+ }
+ }
+
+ return ptr;
+}

View File

@ -1,7 +1,7 @@
Summary: A set of basic GNU tools commonly used in shell scripts
Name: coreutils
Version: 7.1
Release: 5%{?dist}
Release: 6%{?dist}
License: GPLv3+
Group: System Environment/Base
Url: http://www.gnu.org/software/coreutils/
@ -313,6 +313,9 @@ fi
/sbin/runuser
%changelog
* Mon Mar 02 2009 Ondrej Vasik <ovasik@redhat.com> 7.1-6
- fix sort bugs (including #485715) for multibyte locales
as well
* Fri Feb 27 2009 Ondrej Vasik <ovasik@redhat.com> 7.1-5
- fix infinite loop in recursive cp (upstream, introduced
by 7.1)