syslog-ng/syslog-ng-3.1.4-pcre-dos.patch

272 lines
10 KiB
Diff

commit 35de55e53dd653c50c8da5daf41a99ab22e7e8aa
Author: Balazs Scheidler <bazsi@balabit.hu>
Date: Tue May 3 20:54:53 2011 +0200
pcre: fixed a potential resource hogging infinite loop when an error occurs
Any kind of PCRE error case would cause an infinite loop, when the
"global" flag is present and pcre returns an error code.
The reported problem is that with PCRE 8.12 we indeed get such an error
while doing a global replace.
This patch also reworks the way PCRE based replacements are made, that code
was hairy, and I just hope this one is one bit less so. One performance
related change also made it that improves the speed pcre replacements,
which previously zeroed out a 3k array unconditionally in every invocation.
Also added some additional testcases to be sure I didn't break anything.
Reported-By: Micah Anderson <micah@riseup.net>
Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
diff --git a/src/logmatcher.c b/src/logmatcher.c
index 67b6c1b..6b70f13 100644
--- a/src/logmatcher.c
+++ b/src/logmatcher.c
@@ -504,7 +504,6 @@ typedef struct _LogMatcherPcreRe
pcre *pattern;
pcre_extra *extra;
gint match_options;
- int start_offset;
} LogMatcherPcreRe;
static gboolean
@@ -623,29 +622,37 @@ static gboolean
log_matcher_pcre_re_match(LogMatcher *s, LogMessage *msg, gint value_handle, const gchar *value, gssize value_len)
{
LogMatcherPcreRe *self = (LogMatcherPcreRe *) s;
- int matches[RE_MAX_MATCHES * 3];
+ gint *matches;
+ gsize matches_size;
+ gint num_matches;
gint rc;
- if(value_len == -1)
+ if (value_len == -1)
value_len = strlen(value);
- self->start_offset = 0;
- rc = pcre_exec(self->pattern, self->extra, value, value_len, self->start_offset, self->match_options, matches, (RE_MAX_MATCHES * 3));
+ if (pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_CAPTURECOUNT, &num_matches) < 0)
+ g_assert_not_reached();
+ if (num_matches > RE_MAX_MATCHES)
+ num_matches = RE_MAX_MATCHES;
+
+ matches_size = 3 * (num_matches + 1);
+ matches = g_alloca(matches_size * sizeof(gint));
+
+ rc = pcre_exec(self->pattern, self->extra,
+ value, value_len, 0, self->match_options, matches, matches_size);
if (rc < 0)
{
switch (rc)
{
- case PCRE_ERROR_NOMATCH:
- /*
- msg_debug("No match", NULL);
- */
+ case PCRE_ERROR_NOMATCH:
break;
+
+ default:
/* Handle other special cases */
- default:
- msg_error("Error while matching regexp",
- evt_tag_int("error_code",rc),
- NULL);
- break;
+ msg_error("Error while matching regexp",
+ evt_tag_int("error_code", rc),
+ NULL);
+ break;
}
return FALSE;
}
@@ -668,78 +675,120 @@ static gchar *
log_matcher_pcre_re_replace(LogMatcher *s, LogMessage *msg, gint value_handle, const gchar *value, gssize value_len, LogTemplate *replacement, gssize *new_length)
{
LogMatcherPcreRe *self = (LogMatcherPcreRe *) s;
- int matches[RE_MAX_MATCHES * 3];
- gint rc;
- gboolean first_round = TRUE;
GString *new_value = NULL;
- gssize last_offset = 0;
- gint options = 0;
+ gint *matches;
+ gsize matches_size;
+ gint num_matches;
+ gint rc;
+ gint start_offset, last_offset;
+ gint options;
+ gboolean last_match_was_empty;
+
+ if (pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_CAPTURECOUNT, &num_matches) < 0)
+ g_assert_not_reached();
+ if (num_matches > RE_MAX_MATCHES)
+ num_matches = RE_MAX_MATCHES;
+
+ matches_size = 3 * (num_matches + 1);
+ matches = g_alloca(matches_size * sizeof(gint));
+
+ /* we need zero initialized offsets for the last match as the
+ * algorithm tries uses that as the base position */
- memset(matches, 0, sizeof(matches));
+ matches[0] = matches[1] = matches[2] = 0;
if (value_len == -1)
value_len = strlen(value);
+ last_offset = start_offset = 0;
+ last_match_was_empty = FALSE;
do
{
- options = 0;
- self->start_offset = matches[1]; /* Start at end of previous match 0 on the first iteration*/
-
- /* If the previous match was for an empty string, we are finished if we are
- at the end of the subject. Otherwise, arrange to run another match at the
- same point to see if a non-empty match can be found.
+ /* loop over the string, replacing one occurence at a time. */
+
+ /* NOTE: zero length matches need special care, as we could spin
+ * forever otherwise (since the current position wouldn't be
+ * advanced).
+ *
+ * A zero-length match can be as simple as "a*" which will be
+ * returned unless PCRE_NOTEMPTY is specified.
+ *
+ * By supporting zero-length matches, we basically make it
+ * possible to insert replacement between each incoming
+ * character.
+ *
+ * For example:
+ * pattern: a*
+ * replacement: #
+ * input: message
+ * result: #m#e#s#s#a#g#e#
+ *
+ * This mimics Perl behaviour.
*/
- if (matches[0] == matches[1] && !first_round)
+ if (last_match_was_empty)
{
- if (matches[0] == value_len)
- break;
+ /* Otherwise, arrange to run another match at the same point
+ * to see if a non-empty match can be found.
+ */
+
options = PCRE_NOTEMPTY | PCRE_ANCHORED;
}
+ else
+ {
+ options = 0;
+ }
- rc = pcre_exec(self->pattern, self->extra, value, value_len, self->start_offset/*start offset*/, (self->match_options | options) , matches, (RE_MAX_MATCHES * 3) );
- if (rc < 0)
+ rc = pcre_exec(self->pattern, self->extra,
+ value, value_len,
+ start_offset, (self->match_options | options), matches, matches_size);
+ if (rc < 0 && rc != PCRE_ERROR_NOMATCH)
{
- if(rc == PCRE_ERROR_NOMATCH)
- {
- /* msg_debug("No match", NULL); */
- if(!first_round)
- {
- if (options == 0)
- break;
- else
- matches[1] = self->start_offset + 1;
- continue; /* Go round the loop again */
- }
- }
- else
- {
- /* Handle other special cases */
- msg_error("Error while matching regexp",
- evt_tag_int("error_code",rc),
- NULL);
- }
+ msg_error("Error while matching regexp",
+ evt_tag_int("error_code", rc),
+ NULL);
+ break;
}
- else if (rc == 0)
+ else if (rc < 0)
{
- msg_error("Error while storing matching substrings", NULL);
+ if ((options & PCRE_NOTEMPTY) == 0)
+ {
+ /* we didn't match, even when we permitted to match the
+ * empty string. Nothing to find here, bail out */
+ break;
+ }
+
+ /* we didn't match, quite possibly because the empty match
+ * was not permitted. Skip one character in order to avoid
+ * infinite loop over the same zero-length match. */
+
+ start_offset = start_offset + 1;
+ /* FIXME: handle complex sequences like utf8 and newline characters */
+ last_match_was_empty = FALSE;
+ continue;
}
else
{
+ /* if the output array was too small, truncate the number of
+ captures to RE_MAX_MATCHES */
+
+ if (rc == 0)
+ rc = matches_size / 3;
+
log_matcher_pcre_re_feed_backrefs(s, msg, value_handle, matches, rc, value);
log_matcher_pcre_re_feed_named_substrings(s, msg, matches, value);
if (!new_value)
new_value = g_string_sized_new(value_len);
- /* literal */
+ /* append non-matching portion */
g_string_append_len(new_value, &value[last_offset], matches[0] - last_offset);
/* replacement */
log_template_append_format(replacement, msg, 0, TS_FMT_BSD, NULL, 0, 0, new_value);
- last_offset = matches[1];
+ last_match_was_empty = (matches[0] == matches[1]);
+ start_offset = last_offset = matches[1];
}
- first_round = FALSE;
}
- while (TRUE && (self->super.flags & LMF_GLOBAL));
+ while (self->super.flags & LMF_GLOBAL && start_offset < value_len);
if (new_value)
{
diff --git a/tests/unit/test_matcher.c b/tests/unit/test_matcher.c
index 3df98e5..95866b3 100644
--- a/tests/unit/test_matcher.c
+++ b/tests/unit/test_matcher.c
@@ -144,9 +144,18 @@ main()
/* empty match with global flag*/
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa bb", 0, "c*", "#", "#a#a# #b#b#", LMF_GLOBAL, log_matcher_pcre_re_new());
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa bb", 0, "a*", "?", "?? ?b?b?", LMF_GLOBAL, log_matcher_pcre_re_new());
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "aa|b*", "@", "@@", LMF_GLOBAL, log_matcher_pcre_re_new());
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "aa|b*", "@", "@", 0, log_matcher_pcre_re_new());
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "b*|aa", "@", "@@@", LMF_GLOBAL, log_matcher_pcre_re_new());
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "b*|aa", "@", "@aa", 0, log_matcher_pcre_re_new());
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: wikiwiki", 0, "wi", "", "kiki", LMF_GLOBAL, log_matcher_pcre_re_new());
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: wikiwiki", 0, "wi", "kuku", "kukukikukuki", LMF_GLOBAL, log_matcher_pcre_re_new());
+
+ /* this tests a pcre 8.12 incompatibility */
+
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: wikiwiki", 0, "([[:digit:]]{1,3}\\.){3}[[:digit:]]{1,3}", "foo", "wikiwiki", LMF_GLOBAL, log_matcher_pcre_re_new());
+
#endif
return 0;