regex: Fix memory overread when pattern contains NUL byte (#1622674)

This commit is contained in:
Florian Weimer 2018-08-29 11:40:05 +02:00
parent 09b1c92ab2
commit bbcbf3796f
3 changed files with 273 additions and 1 deletions

41
glibc-1622674-1.patch Normal file
View File

@ -0,0 +1,41 @@
commit 58559f14437d2aa71753a29fed435efa06aa4576
Author: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue Aug 28 21:54:28 2018 +0200
regex: fix uninitialized memory access
I introduced this bug into gnulib in commit
8335a4d6c7b4448cd0bcb6d0bebf1d456bcfdb17 dated 2006-04-10;
eventually it was merged into glibc. The bug was found by
project-repo <bugs@feusi.co> and reported here:
https://lists.gnu.org/r/sed-devel/2018-08/msg00017.html
Diagnosis and draft fix reported by Assaf Gordon here:
https://lists.gnu.org/r/bug-gnulib/2018-08/msg00071.html
https://lists.gnu.org/r/bug-gnulib/2018-08/msg00142.html
* posix/regex_internal.c (build_wcs_upper_buffer):
Fix bug when mbrtowc returns 0.
(cherry picked from commit bc680b336971305cb39896b30d72dc7101b62242)
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 7f0083b918de6530..b10588f1ccbb1992 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -317,7 +317,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
mbclen = __mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
- if (BE (mbclen < (size_t) -2, 1))
+ if (BE (0 < mbclen && mbclen < (size_t) -2, 1))
{
wchar_t wcu = __towupper (wc);
if (wcu != wc)
@@ -386,7 +386,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
- if (BE (mbclen < (size_t) -2, 1))
+ if (BE (0 < mbclen && mbclen < (size_t) -2, 1))
{
wchar_t wcu = __towupper (wc);
if (wcu != wc)

226
glibc-1622674-2.patch Normal file
View File

@ -0,0 +1,226 @@
commit 0b79004569e5ce1669136b8c41564c3809730f15
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Aug 28 12:57:46 2018 +0200
regex: Add test tst-regcomp-truncated [BZ #23578]
(cherry picked from commit 761404b74d9853ce1608195e24f25b78a910591a)
diff --git a/posix/Makefile b/posix/Makefile
index 00c62841a282f15a..83162123f9c927a0 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -96,7 +96,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \
tst-posix_fadvise tst-posix_fadvise64 \
tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \
tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \
- bug-regex38
+ bug-regex38 tst-regcomp-truncated
tests-internal := bug-regex5 bug-regex20 bug-regex33 \
tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \
tst-glob_lstat_compat tst-spawn4-compat
@@ -194,6 +194,7 @@ $(objpfx)tst-regex2.out: $(gen-locales)
$(objpfx)tst-regexloc.out: $(gen-locales)
$(objpfx)tst-rxspencer.out: $(gen-locales)
$(objpfx)tst-rxspencer-no-utf8.out: $(gen-locales)
+$(objpfx)tst-regcomp-truncated.out: $(gen-locales)
endif
# If we will use the generic uname implementation, we must figure out what
diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c
new file mode 100644
index 0000000000000000..a4a1581bbc2b39eb
--- /dev/null
+++ b/posix/tst-regcomp-truncated.c
@@ -0,0 +1,191 @@
+/* Test compilation of truncated regular expressions.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This test constructs various patterns in an attempt to trigger
+ over-reading the regular expression compiler, such as bug
+ 23578. */
+
+#include <array_length.h>
+#include <errno.h>
+#include <locale.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/next_to_fault.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <wchar.h>
+
+/* Locales to test. */
+static const char locales[][17] =
+ {
+ "C",
+ "en_US.UTF-8",
+ "de_DE.ISO-8859-1",
+ };
+
+/* Syntax options. Will be combined with other flags. */
+static const reg_syntax_t syntaxes[] =
+ {
+ RE_SYNTAX_EMACS,
+ RE_SYNTAX_AWK,
+ RE_SYNTAX_GNU_AWK,
+ RE_SYNTAX_POSIX_AWK,
+ RE_SYNTAX_GREP,
+ RE_SYNTAX_EGREP,
+ RE_SYNTAX_POSIX_EGREP,
+ RE_SYNTAX_POSIX_BASIC,
+ RE_SYNTAX_POSIX_EXTENDED,
+ RE_SYNTAX_POSIX_MINIMAL_EXTENDED,
+ };
+
+/* Trailing characters placed after the initial character. */
+static const char trailing_strings[][4] =
+ {
+ "",
+ "[",
+ "\\",
+ "[\\",
+ "(",
+ "(\\",
+ "\\(",
+ };
+
+static int
+do_test (void)
+{
+ /* Staging buffer for the constructed regular expression. */
+ char buffer[16];
+
+ /* Allocation used to detect over-reading by the regular expression
+ compiler. */
+ struct support_next_to_fault ntf
+ = support_next_to_fault_allocate (sizeof (buffer));
+
+ /* Arbitrary Unicode codepoint at which we stop generating
+ characters. We do not probe the whole range because that would
+ take too long due to combinatorical exploision as the result of
+ combination with other flags. */
+ static const wchar_t last_character = 0xfff;
+
+ for (size_t locale_idx = 0; locale_idx < array_length (locales);
+ ++ locale_idx)
+ {
+ if (setlocale (LC_ALL, locales[locale_idx]) == NULL)
+ {
+ support_record_failure ();
+ printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]);
+ continue;
+ }
+ if (test_verbose > 0)
+ printf ("info: testing locale \"%s\"\n", locales[locale_idx]);
+
+ for (wchar_t wc = 0; wc <= last_character; ++wc)
+ {
+ char *after_wc;
+ if (wc == 0)
+ {
+ /* wcrtomb treats L'\0' in a special way. */
+ *buffer = '\0';
+ after_wc = &buffer[1];
+ }
+ else
+ {
+ mbstate_t ps = { };
+ size_t ret = wcrtomb (buffer, wc, &ps);
+ if (ret == (size_t) -1)
+ {
+ /* EILSEQ means that the target character set
+ cannot encode the character. */
+ if (errno != EILSEQ)
+ {
+ support_record_failure ();
+ printf ("error: wcrtomb (0x%x) failed: %m\n",
+ (unsigned) wc);
+ }
+ continue;
+ }
+ TEST_VERIFY_EXIT (ret != 0);
+ after_wc = &buffer[ret];
+ }
+
+ for (size_t trailing_idx = 0;
+ trailing_idx < array_length (trailing_strings);
+ ++trailing_idx)
+ {
+ char *after_trailing
+ = stpcpy (after_wc, trailing_strings[trailing_idx]);
+
+ for (int do_nul = 0; do_nul < 2; ++do_nul)
+ {
+ char *after_nul;
+ if (do_nul)
+ {
+ *after_trailing = '\0';
+ after_nul = &after_trailing[1];
+ }
+ else
+ after_nul = after_trailing;
+
+ size_t length = after_nul - buffer;
+
+ /* Make sure that the faulting region starts
+ after the used portion of the buffer. */
+ char *ntf_start = ntf.buffer + sizeof (buffer) - length;
+ memcpy (ntf_start, buffer, length);
+
+ for (const reg_syntax_t *psyntax = syntaxes;
+ psyntax < array_end (syntaxes); ++psyntax)
+ for (int do_icase = 0; do_icase < 2; ++do_icase)
+ {
+ re_syntax_options = *psyntax;
+ if (do_icase)
+ re_syntax_options |= RE_ICASE;
+
+ regex_t reg;
+ memset (&reg, 0, sizeof (reg));
+ const char *msg = re_compile_pattern
+ (ntf_start, length, &reg);
+ if (msg != NULL)
+ {
+ if (test_verbose > 0)
+ {
+ char *quoted = support_quote_blob
+ (buffer, length);
+ printf ("info: compilation failed for pattern"
+ " \"%s\", syntax 0x%lx: %s\n",
+ quoted, re_syntax_options, msg);
+ free (quoted);
+ }
+ }
+ else
+ regfree (&reg);
+ }
+ }
+ }
+ }
+ }
+
+ support_next_to_fault_free (&ntf);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

View File

@ -1,6 +1,6 @@
%define glibcsrcdir glibc-2.28
%define glibcversion 2.28
%define glibcrelease 7%{?dist}
%define glibcrelease 8%{?dist}
# Pre-release tarballs are pulled in from git using a command that is
# effectively:
#
@ -166,6 +166,8 @@ Patch29: glibc-error-va_end.patch
Patch30: glibc-nscd-leak.patch
Patch31: glibc-nss_files-leak.patch
Patch32: glibc-rh1622669.patch
Patch33: glibc-1622674-1.patch
Patch34: glibc-1622674-2.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -1881,6 +1883,9 @@ fi
%endif
%changelog
* Wed Aug 29 2018 Florian Weimer <fweimer@redhat.com> - 2.28-8
- regex: Fix memory overread when pattern contains NUL byte (#1622674)
* Wed Aug 29 2018 Florian Weimer <fweimer@redhat.com> - 2.28-7
- nptl: Fix waiters-after-spinning case in pthread_cond_broadcast (#1622669)