fix memory leak in sort/I18N (patches written by Pádraig, #1259942), Use the new i18n implementation for expand/unexpand

This commit is contained in:
Ondřej Vašík 2016-01-13 12:56:44 +01:00
parent 5efe9439ad
commit 4e1172402b
2 changed files with 1492 additions and 466 deletions

File diff suppressed because it is too large Load Diff

View File

@ -596,201 +596,6 @@ diff -urNp coreutils-8.24-orig/src/cut.c coreutils-8.24/src/cut.c
}
if (optind == argc)
diff -urNp coreutils-8.24-orig/src/expand.c coreutils-8.24/src/expand.c
--- coreutils-8.24-orig/src/expand.c 2015-06-26 19:05:22.000000000 +0200
+++ coreutils-8.24/src/expand.c 2015-07-05 09:04:33.028546950 +0200
@@ -37,12 +37,34 @@
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
+
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
+/* Get iswblank(). */
+#if HAVE_WCTYPE_H
+# include <wctype.h>
+#endif
+
#include "system.h"
#include "error.h"
#include "fadvise.h"
#include "quote.h"
#include "xstrndup.h"
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
+ installation; work around this configuration error. */
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
+# define MB_LEN_MAX 16
+#endif
+
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "expand"
@@ -357,6 +379,142 @@ expand (void)
}
}
+#if HAVE_MBRTOWC
+static void
+expand_multibyte (void)
+{
+ FILE *fp; /* Input strem. */
+ mbstate_t i_state; /* Current shift state of the input stream. */
+ mbstate_t i_state_bak; /* Back up the I_STATE. */
+ mbstate_t o_state; /* Current shift state of the output stream. */
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
+ char *bufpos = buf; /* Next read position of BUF. */
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
+ wchar_t wc; /* A gotten wide character. */
+ size_t mblength; /* The byte size of a multibyte character
+ which shows as same character as WC. */
+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
+ int column = 0; /* Column on screen of the next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+
+ fp = next_file ((FILE *) NULL);
+ if (fp == NULL)
+ return;
+
+ memset (&o_state, '\0', sizeof(mbstate_t));
+ memset (&i_state, '\0', sizeof(mbstate_t));
+
+ for (;;)
+ {
+ /* Refill the buffer BUF. */
+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
+ {
+ memmove (buf, bufpos, buflen);
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
+ bufpos = buf;
+ }
+
+ /* No character is left in BUF. */
+ if (buflen < 1)
+ {
+ fp = next_file (fp);
+
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ {
+ memset (&i_state, '\0', sizeof(mbstate_t));
+ continue;
+ }
+ }
+
+ /* Get a wide character. */
+ i_state_bak = i_state;
+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
+
+ switch (mblength)
+ {
+ case (size_t)-1: /* illegal byte sequence. */
+ case (size_t)-2:
+ mblength = 1;
+ i_state = i_state_bak;
+ if (convert)
+ {
+ ++column;
+ if (convert_entire_line == 0 && !isblank(*bufpos))
+ convert = 0;
+ }
+ putchar (*bufpos);
+ break;
+
+ case 0: /* null. */
+ mblength = 1;
+ if (convert && convert_entire_line == 0)
+ convert = 0;
+ putchar ('\0');
+ break;
+
+ default:
+ if (wc == L'\n') /* LF. */
+ {
+ tab_index = 0;
+ column = 0;
+ convert = 1;
+ putchar ('\n');
+ }
+ else if (wc == L'\t' && convert) /* Tab. */
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ next_tab_column = column + 1;
+ }
+ else
+ next_tab_column = column + tab_size - column % tab_size;
+
+ while (column < next_tab_column)
+ {
+ putchar (' ');
+ ++column;
+ }
+ }
+ else /* Others. */
+ {
+ if (convert)
+ {
+ if (wc == L'\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ int width; /* The width of WC. */
+
+ width = wcwidth (wc);
+ column += (width > 0) ? width : 0;
+ if (convert_entire_line == 0 && !iswblank(wc))
+ convert = 0;
+ }
+ }
+ fwrite (bufpos, sizeof(char), mblength, stdout);
+ }
+ }
+ buflen -= mblength;
+ bufpos += mblength;
+ }
+}
+#endif
+
int
main (int argc, char **argv)
{
@@ -421,7 +579,12 @@ main (int argc, char **argv)
file_list = (optind < argc ? &argv[optind] : stdin_argv);
- expand ();
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
+ expand_multibyte ();
+ else
+#endif
+ expand ();
if (have_read_stdin && fclose (stdin) != 0)
error (EXIT_FAILURE, errno, "-");
diff -urNp coreutils-8.24-orig/src/fold.c coreutils-8.24/src/fold.c
--- coreutils-8.24-orig/src/fold.c 2015-06-26 19:05:22.000000000 +0200
+++ coreutils-8.24/src/fold.c 2015-07-05 09:04:33.029546958 +0200
@ -3046,8 +2851,8 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
+ register int lo = 0, hi = MONTHS_PER_YEAR, result;
+ char *tmp;
+ size_t wclength, mblength;
+ const char **pp;
+ const wchar_t **wpp;
+ const char *pp;
+ const wchar_t *wpp;
+ wchar_t *month_wcs;
+ mbstate_t state;
+
@ -3060,17 +2865,19 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
+ if (len == 0)
+ return 0;
+
+ month = (char *) xmalloc (len + 1);
+ if (SIZE_MAX - len < 1)
+ xalloc_die ();
+
+ tmp = (char *) xmalloc (len + 1);
+ month = (char *) xnmalloc (len + 1, MB_CUR_MAX);
+
+ pp = tmp = (char *) xnmalloc (len + 1, MB_CUR_MAX);
+ memcpy (tmp, s, len);
+ tmp[len] = '\0';
+ pp = (const char **)&tmp;
+ month_wcs = (wchar_t *) xmalloc ((len + 1) * sizeof (wchar_t));
+ memset (&state, '\0', sizeof(mbstate_t));
+ wpp = month_wcs = (wchar_t *) xnmalloc (len + 1, sizeof (wchar_t));
+ memset (&state, '\0', sizeof (mbstate_t));
+
+ wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
+ if (wclength == (size_t)-1 || *pp != NULL)
+ wclength = mbsrtowcs (month_wcs, &pp, len + 1, &state);
+ if (wclength == (size_t)-1 || pp != NULL)
+ error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s));
+
+ for (i = 0; i < wclength; i++)
@ -3083,10 +2890,8 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
+ }
+ }
+
+ wpp = (const wchar_t **)&month_wcs;
+
+ mblength = wcsrtombs (month, wpp, len + 1, &state);
+ assert (mblength != (-1) && *wpp == NULL);
+ mblength = wcsrtombs (month, &wpp, (len + 1) * MB_CUR_MAX, &state);
+ assert (mblength != (-1) && wpp == NULL);
+
+ do
+ {
@ -3343,7 +3148,7 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
/* Compare two lines A and B, returning negative, zero, or positive
depending on whether A compares less than, equal to, or greater than B. */
@@ -2721,7 +3364,7 @@ compare (struct line const *a, struct li
@@ -2721,7 +3366,7 @@ compare (struct line const *a, struct line const *b)
diff = - NONZERO (blen);
else if (blen == 0)
diff = 1;
@ -3352,7 +3157,7 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
{
/* Note xmemcoll0 is a performance enhancement as
it will not unconditionally write '\0' after the
@@ -4120,6 +4763,7 @@ set_ordering (char const *s, struct keyf
@@ -4120,6 +4765,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype)
break;
case 'f':
key->translate = fold_toupper;
@ -3360,7 +3165,7 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
break;
case 'g':
key->general_numeric = true;
@@ -4197,7 +4841,7 @@ main (int argc, char **argv)
@@ -4197,7 +4843,7 @@ main (int argc, char **argv)
initialize_exit_failure (SORT_FAILURE);
hard_LC_COLLATE = hard_locale (LC_COLLATE);
@ -3369,7 +3174,7 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
hard_LC_TIME = hard_locale (LC_TIME);
#endif
@@ -4218,6 +4862,29 @@ main (int argc, char **argv)
@@ -4218,6 +4864,29 @@ main (int argc, char **argv)
thousands_sep = -1;
}
@ -3399,7 +3204,7 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
have_read_stdin = false;
inittables ();
@@ -4492,13 +5159,34 @@ main (int argc, char **argv)
@@ -4492,13 +5161,34 @@ main (int argc, char **argv)
case 't':
{
@ -3438,7 +3243,7 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
else
{
/* Provoke with 'sort -txx'. Complain about
@@ -4509,9 +5197,12 @@ main (int argc, char **argv)
@@ -4509,9 +5199,12 @@ main (int argc, char **argv)
quote (optarg));
}
}
@ -3453,264 +3258,33 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
}
break;
diff -urNp coreutils-8.24-orig/src/unexpand.c coreutils-8.24/src/unexpand.c
--- coreutils-8.24-orig/src/unexpand.c 2015-06-26 19:05:22.000000000 +0200
+++ coreutils-8.24/src/unexpand.c 2015-07-05 09:04:33.032546980 +0200
@@ -38,12 +38,29 @@
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
+
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
#include "system.h"
#include "error.h"
#include "fadvise.h"
#include "quote.h"
#include "xstrndup.h"
@@ -4681,10 +5374,10 @@ main (int argc, char **argv)
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
+ installation; work around this configuration error. */
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
+# define MB_LEN_MAX 16
+#endif
+
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "unexpand"
if (nfiles == 0)
{
- static char *minus = (char *) "-";
nfiles = 1;
free (files);
- files = &minus;
+ files = xmalloc (sizeof *files);
+ *files = (char *) "-";
}
@@ -103,6 +120,210 @@ static struct option const longopts[] =
{NULL, 0, NULL, 0}
};
/* Need to re-check that we meet the minimum requirement for memory
@@ -4742,6 +5435,13 @@ main (int argc, char **argv)
sort (files, nfiles, outfile, nthreads);
}
+static FILE *next_file (FILE *fp);
+
+#if HAVE_MBRTOWC
+static void
+unexpand_multibyte (void)
+{
+ FILE *fp; /* Input stream. */
+ mbstate_t i_state; /* Current shift state of the input stream. */
+ mbstate_t i_state_bak; /* Back up the I_STATE. */
+ mbstate_t o_state; /* Current shift state of the output stream. */
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
+ char *bufpos = buf; /* Next read position of BUF. */
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
+ wint_t wc; /* A gotten wide character. */
+ size_t mblength; /* The byte size of a multibyte character
+ which shows as same character as WC. */
+ bool prev_tab = false;
+
+ /* Index in `tab_list' of next tabstop: */
+ int tab_index = 0; /* For calculating width of pending tabs. */
+ int print_tab_index = 0; /* For printing as many tabs as possible. */
+ unsigned int column = 0; /* Column on screen of next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+ unsigned int pending = 0; /* Pending columns of blanks. */
+
+ fp = next_file ((FILE *) NULL);
+ if (fp == NULL)
+ return;
+
+ memset (&o_state, '\0', sizeof(mbstate_t));
+ memset (&i_state, '\0', sizeof(mbstate_t));
+
+ for (;;)
+ {
+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
+ {
+ memmove (buf, bufpos, buflen);
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
+ bufpos = buf;
+ }
+
+ /* Get a wide character. */
+ if (buflen < 1)
+ {
+ mblength = 1;
+ wc = WEOF;
+ }
+ else
+ {
+ i_state_bak = i_state;
+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
+ }
+
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
+ {
+ i_state = i_state_bak;
+ wc = L'\0';
+ }
+
+ if (wc == L' ' && convert && column < INT_MAX)
+ {
+ ++pending;
+ ++column;
+ }
+ else if (wc == L'\t' && convert)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ {
+ convert = 0; /* Ran out of tab stops. */
+ goto flush_pend_mb;
+ }
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ pending += next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+flush_pend_mb:
+ /* Flush pending spaces. Print as many tabs as possible,
+ then print the rest as spaces. */
+ if (pending == 1 && column != 1 && !prev_tab)
+ {
+ putchar (' ');
+ pending = 0;
+ }
+ column -= pending;
+ while (pending > 0)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let print_tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (print_tab_index < first_free_tab - 1
+ && column >= tab_list[print_tab_index])
+ print_tab_index++;
+ next_tab_column = tab_list[print_tab_index];
+ if (print_tab_index < first_free_tab - 1)
+ print_tab_index++;
+ }
+ else
+ {
+ next_tab_column =
+ column + tab_size - column % tab_size;
+ }
+ if (next_tab_column - column <= pending)
+ {
+ putchar ('\t');
+ pending -= next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+ --print_tab_index;
+ column += pending;
+ while (pending != 0)
+ {
+ putchar (' ');
+ pending--;
+ }
+ }
+ }
+
+ if (wc == WEOF)
+ {
+ fp = next_file (fp);
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ {
+ memset (&i_state, '\0', sizeof(mbstate_t));
+ continue;
+ }
+ }
+
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
+ {
+ if (convert)
+ {
+ ++column;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ mblength = 1;
+ putchar (buf[0]);
+ }
+ else if (mblength == 0)
+ {
+ if (convert && convert_entire_line == 0)
+ convert = 0;
+ mblength = 1;
+ putchar ('\0');
+ }
+ else
+ {
+ if (convert)
+ {
+ if (wc == L'\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ int width; /* The width of WC. */
+
+ width = wcwidth (wc);
+ column += (width > 0) ? width : 0;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ }
+
+ if (wc == L'\n')
+ {
+ tab_index = print_tab_index = 0;
+ column = pending = 0;
+ convert = 1;
+ }
+ fwrite (bufpos, sizeof(char), mblength, stdout);
+ }
+ }
+ prev_tab = wc == L'\t';
+ buflen -= mblength;
+ bufpos += mblength;
+ }
+}
+#endif
+
+
void
usage (int status)
{
@@ -523,7 +744,12 @@ main (int argc, char **argv)
file_list = (optind < argc ? &argv[optind] : stdin_argv);
- unexpand ();
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
+ unexpand_multibyte ();
+#ifdef lint
+ if (files_from)
+ readtokens0_free (&tok);
+ else
+ free (files);
+#endif
+ unexpand ();
+
if (have_read_stdin && fclose (stdin) == EOF)
die (_("close failed"), "-");
if (have_read_stdin && fclose (stdin) != 0)
error (EXIT_FAILURE, errno, "-");
diff -urNp coreutils-8.24-orig/src/uniq.c coreutils-8.24/src/uniq.c
--- coreutils-8.24-orig/src/uniq.c 2015-06-26 19:04:19.000000000 +0200
+++ coreutils-8.24/src/uniq.c 2015-07-05 09:04:33.032546980 +0200