272 lines
10 KiB
Diff
272 lines
10 KiB
Diff
|
commit 35de55e53dd653c50c8da5daf41a99ab22e7e8aa
|
||
|
Author: Balazs Scheidler <bazsi@balabit.hu>
|
||
|
Date: Tue May 3 20:54:53 2011 +0200
|
||
|
|
||
|
pcre: fixed a potential resource hogging infinite loop when an error occurs
|
||
|
|
||
|
Any kind of PCRE error case would cause an infinite loop, when the
|
||
|
"global" flag is present and pcre returns an error code.
|
||
|
|
||
|
The reported problem is that with PCRE 8.12 we indeed get such an error
|
||
|
while doing a global replace.
|
||
|
|
||
|
This patch also reworks the way PCRE based replacements are made, that code
|
||
|
was hairy, and I just hope this one is one bit less so. One performance
|
||
|
related change also made it that improves the speed pcre replacements,
|
||
|
which previously zeroed out a 3k array unconditionally in every invocation.
|
||
|
|
||
|
Also added some additional testcases to be sure I didn't break anything.
|
||
|
|
||
|
Reported-By: Micah Anderson <micah@riseup.net>
|
||
|
Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
|
||
|
|
||
|
diff --git a/src/logmatcher.c b/src/logmatcher.c
|
||
|
index 67b6c1b..6b70f13 100644
|
||
|
--- a/src/logmatcher.c
|
||
|
+++ b/src/logmatcher.c
|
||
|
@@ -504,7 +504,6 @@ typedef struct _LogMatcherPcreRe
|
||
|
pcre *pattern;
|
||
|
pcre_extra *extra;
|
||
|
gint match_options;
|
||
|
- int start_offset;
|
||
|
} LogMatcherPcreRe;
|
||
|
|
||
|
static gboolean
|
||
|
@@ -623,29 +622,37 @@ static gboolean
|
||
|
log_matcher_pcre_re_match(LogMatcher *s, LogMessage *msg, gint value_handle, const gchar *value, gssize value_len)
|
||
|
{
|
||
|
LogMatcherPcreRe *self = (LogMatcherPcreRe *) s;
|
||
|
- int matches[RE_MAX_MATCHES * 3];
|
||
|
+ gint *matches;
|
||
|
+ gsize matches_size;
|
||
|
+ gint num_matches;
|
||
|
gint rc;
|
||
|
|
||
|
- if(value_len == -1)
|
||
|
+ if (value_len == -1)
|
||
|
value_len = strlen(value);
|
||
|
|
||
|
- self->start_offset = 0;
|
||
|
- rc = pcre_exec(self->pattern, self->extra, value, value_len, self->start_offset, self->match_options, matches, (RE_MAX_MATCHES * 3));
|
||
|
+ if (pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_CAPTURECOUNT, &num_matches) < 0)
|
||
|
+ g_assert_not_reached();
|
||
|
+ if (num_matches > RE_MAX_MATCHES)
|
||
|
+ num_matches = RE_MAX_MATCHES;
|
||
|
+
|
||
|
+ matches_size = 3 * (num_matches + 1);
|
||
|
+ matches = g_alloca(matches_size * sizeof(gint));
|
||
|
+
|
||
|
+ rc = pcre_exec(self->pattern, self->extra,
|
||
|
+ value, value_len, 0, self->match_options, matches, matches_size);
|
||
|
if (rc < 0)
|
||
|
{
|
||
|
switch (rc)
|
||
|
{
|
||
|
- case PCRE_ERROR_NOMATCH:
|
||
|
- /*
|
||
|
- msg_debug("No match", NULL);
|
||
|
- */
|
||
|
+ case PCRE_ERROR_NOMATCH:
|
||
|
break;
|
||
|
+
|
||
|
+ default:
|
||
|
/* Handle other special cases */
|
||
|
- default:
|
||
|
- msg_error("Error while matching regexp",
|
||
|
- evt_tag_int("error_code",rc),
|
||
|
- NULL);
|
||
|
- break;
|
||
|
+ msg_error("Error while matching regexp",
|
||
|
+ evt_tag_int("error_code", rc),
|
||
|
+ NULL);
|
||
|
+ break;
|
||
|
}
|
||
|
return FALSE;
|
||
|
}
|
||
|
@@ -668,78 +675,120 @@ static gchar *
|
||
|
log_matcher_pcre_re_replace(LogMatcher *s, LogMessage *msg, gint value_handle, const gchar *value, gssize value_len, LogTemplate *replacement, gssize *new_length)
|
||
|
{
|
||
|
LogMatcherPcreRe *self = (LogMatcherPcreRe *) s;
|
||
|
- int matches[RE_MAX_MATCHES * 3];
|
||
|
- gint rc;
|
||
|
- gboolean first_round = TRUE;
|
||
|
GString *new_value = NULL;
|
||
|
- gssize last_offset = 0;
|
||
|
- gint options = 0;
|
||
|
+ gint *matches;
|
||
|
+ gsize matches_size;
|
||
|
+ gint num_matches;
|
||
|
+ gint rc;
|
||
|
+ gint start_offset, last_offset;
|
||
|
+ gint options;
|
||
|
+ gboolean last_match_was_empty;
|
||
|
+
|
||
|
+ if (pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_CAPTURECOUNT, &num_matches) < 0)
|
||
|
+ g_assert_not_reached();
|
||
|
+ if (num_matches > RE_MAX_MATCHES)
|
||
|
+ num_matches = RE_MAX_MATCHES;
|
||
|
+
|
||
|
+ matches_size = 3 * (num_matches + 1);
|
||
|
+ matches = g_alloca(matches_size * sizeof(gint));
|
||
|
+
|
||
|
+ /* we need zero initialized offsets for the last match as the
|
||
|
+ * algorithm tries uses that as the base position */
|
||
|
|
||
|
- memset(matches, 0, sizeof(matches));
|
||
|
+ matches[0] = matches[1] = matches[2] = 0;
|
||
|
|
||
|
if (value_len == -1)
|
||
|
value_len = strlen(value);
|
||
|
|
||
|
+ last_offset = start_offset = 0;
|
||
|
+ last_match_was_empty = FALSE;
|
||
|
do
|
||
|
{
|
||
|
- options = 0;
|
||
|
- self->start_offset = matches[1]; /* Start at end of previous match 0 on the first iteration*/
|
||
|
-
|
||
|
- /* If the previous match was for an empty string, we are finished if we are
|
||
|
- at the end of the subject. Otherwise, arrange to run another match at the
|
||
|
- same point to see if a non-empty match can be found.
|
||
|
+ /* loop over the string, replacing one occurence at a time. */
|
||
|
+
|
||
|
+ /* NOTE: zero length matches need special care, as we could spin
|
||
|
+ * forever otherwise (since the current position wouldn't be
|
||
|
+ * advanced).
|
||
|
+ *
|
||
|
+ * A zero-length match can be as simple as "a*" which will be
|
||
|
+ * returned unless PCRE_NOTEMPTY is specified.
|
||
|
+ *
|
||
|
+ * By supporting zero-length matches, we basically make it
|
||
|
+ * possible to insert replacement between each incoming
|
||
|
+ * character.
|
||
|
+ *
|
||
|
+ * For example:
|
||
|
+ * pattern: a*
|
||
|
+ * replacement: #
|
||
|
+ * input: message
|
||
|
+ * result: #m#e#s#s#a#g#e#
|
||
|
+ *
|
||
|
+ * This mimics Perl behaviour.
|
||
|
*/
|
||
|
|
||
|
- if (matches[0] == matches[1] && !first_round)
|
||
|
+ if (last_match_was_empty)
|
||
|
{
|
||
|
- if (matches[0] == value_len)
|
||
|
- break;
|
||
|
+ /* Otherwise, arrange to run another match at the same point
|
||
|
+ * to see if a non-empty match can be found.
|
||
|
+ */
|
||
|
+
|
||
|
options = PCRE_NOTEMPTY | PCRE_ANCHORED;
|
||
|
}
|
||
|
+ else
|
||
|
+ {
|
||
|
+ options = 0;
|
||
|
+ }
|
||
|
|
||
|
- rc = pcre_exec(self->pattern, self->extra, value, value_len, self->start_offset/*start offset*/, (self->match_options | options) , matches, (RE_MAX_MATCHES * 3) );
|
||
|
- if (rc < 0)
|
||
|
+ rc = pcre_exec(self->pattern, self->extra,
|
||
|
+ value, value_len,
|
||
|
+ start_offset, (self->match_options | options), matches, matches_size);
|
||
|
+ if (rc < 0 && rc != PCRE_ERROR_NOMATCH)
|
||
|
{
|
||
|
- if(rc == PCRE_ERROR_NOMATCH)
|
||
|
- {
|
||
|
- /* msg_debug("No match", NULL); */
|
||
|
- if(!first_round)
|
||
|
- {
|
||
|
- if (options == 0)
|
||
|
- break;
|
||
|
- else
|
||
|
- matches[1] = self->start_offset + 1;
|
||
|
- continue; /* Go round the loop again */
|
||
|
- }
|
||
|
- }
|
||
|
- else
|
||
|
- {
|
||
|
- /* Handle other special cases */
|
||
|
- msg_error("Error while matching regexp",
|
||
|
- evt_tag_int("error_code",rc),
|
||
|
- NULL);
|
||
|
- }
|
||
|
+ msg_error("Error while matching regexp",
|
||
|
+ evt_tag_int("error_code", rc),
|
||
|
+ NULL);
|
||
|
+ break;
|
||
|
}
|
||
|
- else if (rc == 0)
|
||
|
+ else if (rc < 0)
|
||
|
{
|
||
|
- msg_error("Error while storing matching substrings", NULL);
|
||
|
+ if ((options & PCRE_NOTEMPTY) == 0)
|
||
|
+ {
|
||
|
+ /* we didn't match, even when we permitted to match the
|
||
|
+ * empty string. Nothing to find here, bail out */
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* we didn't match, quite possibly because the empty match
|
||
|
+ * was not permitted. Skip one character in order to avoid
|
||
|
+ * infinite loop over the same zero-length match. */
|
||
|
+
|
||
|
+ start_offset = start_offset + 1;
|
||
|
+ /* FIXME: handle complex sequences like utf8 and newline characters */
|
||
|
+ last_match_was_empty = FALSE;
|
||
|
+ continue;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
+ /* if the output array was too small, truncate the number of
|
||
|
+ captures to RE_MAX_MATCHES */
|
||
|
+
|
||
|
+ if (rc == 0)
|
||
|
+ rc = matches_size / 3;
|
||
|
+
|
||
|
log_matcher_pcre_re_feed_backrefs(s, msg, value_handle, matches, rc, value);
|
||
|
log_matcher_pcre_re_feed_named_substrings(s, msg, matches, value);
|
||
|
|
||
|
if (!new_value)
|
||
|
new_value = g_string_sized_new(value_len);
|
||
|
- /* literal */
|
||
|
+ /* append non-matching portion */
|
||
|
g_string_append_len(new_value, &value[last_offset], matches[0] - last_offset);
|
||
|
/* replacement */
|
||
|
log_template_append_format(replacement, msg, 0, TS_FMT_BSD, NULL, 0, 0, new_value);
|
||
|
- last_offset = matches[1];
|
||
|
+ last_match_was_empty = (matches[0] == matches[1]);
|
||
|
+ start_offset = last_offset = matches[1];
|
||
|
}
|
||
|
- first_round = FALSE;
|
||
|
}
|
||
|
- while (TRUE && (self->super.flags & LMF_GLOBAL));
|
||
|
+ while (self->super.flags & LMF_GLOBAL && start_offset < value_len);
|
||
|
|
||
|
if (new_value)
|
||
|
{
|
||
|
diff --git a/tests/unit/test_matcher.c b/tests/unit/test_matcher.c
|
||
|
index 3df98e5..95866b3 100644
|
||
|
--- a/tests/unit/test_matcher.c
|
||
|
+++ b/tests/unit/test_matcher.c
|
||
|
@@ -144,9 +144,18 @@ main()
|
||
|
/* empty match with global flag*/
|
||
|
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa bb", 0, "c*", "#", "#a#a# #b#b#", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa bb", 0, "a*", "?", "?? ?b?b?", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "aa|b*", "@", "@@", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "aa|b*", "@", "@", 0, log_matcher_pcre_re_new());
|
||
|
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "b*|aa", "@", "@@@", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: aa", 0, "b*|aa", "@", "@aa", 0, log_matcher_pcre_re_new());
|
||
|
|
||
|
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: wikiwiki", 0, "wi", "", "kiki", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: wikiwiki", 0, "wi", "kuku", "kukukikukuki", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
+
|
||
|
+ /* this tests a pcre 8.12 incompatibility */
|
||
|
+
|
||
|
+ testcase_replace("<155>2006-02-11T10:34:56+01:00 bzorp syslog-ng[23323]: wikiwiki", 0, "([[:digit:]]{1,3}\\.){3}[[:digit:]]{1,3}", "foo", "wikiwiki", LMF_GLOBAL, log_matcher_pcre_re_new());
|
||
|
+
|
||
|
#endif
|
||
|
|
||
|
return 0;
|