On 1/19/11 4:37 AM, Dmitry Shulga wrote:
> Hi Davi!
>
> On 18.01.2011, at 15:42, Davi Arnaut wrote:
>
>>>> Hum, since the hook is global, there isn't much reason to pass
>>>> the stack check function each time we need to compile a
>>>> expression.
>>>>
>>>> Just add the hook to regcomp.c with internal linkage:
>>>>
>>>> static my_regex_stack_check_t stack_check;
>>>>
>>>> and we let a hook be registered at my_regex_init time:
>>>>
>>>> my_regex_init(CHARSET_INFO *cs, my_regex_stack_check_t
>>>> stack_check);
>>>>
>>>> In
> I still don't fully understand your point. Do you suggest moved
> definition of function check_enough_stack_size() from
> sql/item_cmpfunc.cc to regex/regcomp.c and make this function as
> static? But this solution add extra dependency between regex and sql.
> I believe that this dependency is superfluous. Moreover,
> check_enough_stack_size() uses current_thd that is not available
> inside regex.
I suggested that the hook pointer be private to regex, I didn't check
whether it was possible -- it seems to not be. In spite of that, here
goes a patch that implements the suggestion.
Regards,
Davi
=== modified file 'mysql-test/r/not_embedded_server.result'
--- mysql-test/r/not_embedded_server.result 2009-04-30 10:29:19 +0000
+++ mysql-test/r/not_embedded_server.result 2011-01-19 10:46:22 +0000
@@ -4,3 +4,7 @@ select 1;
SHOW VARIABLES like 'slave_skip_errors';
Variable_name Value
slave_skip_errors OFF
+#
+# Bug#58026: massive recursion and crash in regular expression handling
+#
+SELECT '1' RLIKE RPAD('1', 10000, '(');
=== modified file 'mysql-test/t/not_embedded_server.test'
--- mysql-test/t/not_embedded_server.test 2009-04-30 10:29:19 +0000
+++ mysql-test/t/not_embedded_server.test 2011-01-19 10:46:04 +0000
@@ -42,4 +42,13 @@ select 1;
SHOW VARIABLES like 'slave_skip_errors';
+--echo #
+--echo # Bug#58026: massive recursion and crash in regular expression handling
+--echo #
+
+--disable_result_log
+--error ER_STACK_OVERRUN_NEED_MORE
+SELECT '1' RLIKE RPAD('1', 10000, '(');
+--enable_result_log
+
# End of 5.1 tests
=== modified file 'regex/my_regex.h'
--- regex/my_regex.h 2005-09-29 00:08:24 +0000
+++ regex/my_regex.h 2011-01-19 10:35:23 +0000
@@ -25,6 +25,7 @@ typedef struct {
regoff_t rm_so; /* start of match */
regoff_t rm_eo; /* end of match */
} my_regmatch_t;
+typedef int (*my_regex_stack_check_t)(uchar *);
/* === regcomp.c === */
@@ -76,7 +77,8 @@ extern void my_regfree(my_regex_t *);
/* === reginit.c === */
-extern void my_regex_init(CHARSET_INFO *cs); /* Should be called for multithread progs */
+/* Should be called for multithread progs */
+extern void my_regex_init(CHARSET_INFO *cs, my_regex_stack_check_t func);
extern void my_regex_end(void); /* If one wants a clean end */
#ifdef __cplusplus
=== modified file 'regex/regcomp.c'
--- regex/regcomp.c 2010-07-09 19:37:52 +0000
+++ regex/regcomp.c 2011-01-19 10:40:30 +0000
@@ -31,6 +31,9 @@ struct parse {
CHARSET_INFO *charset; /* for ctype things */
};
+/* Check if there is enough stack space for recursion. */
+my_regex_stack_check_t my_regex_enough_mem_in_stack= NULL;
+
#include "regcomp.ih"
static char nuls[10]; /* place to point scanner in event of error */
@@ -117,7 +120,7 @@ CHARSET_INFO *charset;
# define GOODFLAGS(f) ((f)&~REG_DUMP)
#endif
- my_regex_init(charset); /* Init cclass if neaded */
+ my_regex_init(charset, NULL); /* Init cclass if neaded */
preg->charset=charset;
cflags = GOODFLAGS(cflags);
if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
@@ -217,12 +220,19 @@ int stop; /* character this ERE should
register sopno UNINIT_VAR(prevfwd);
register sopno conc;
register int first = 1; /* is this the first alternative? */
+ unsigned char stack_top;
for (;;) {
/* do a bunch of concatenated expressions */
conc = HERE();
- while (MORE() && (c = PEEK()) != '|' && c != stop)
- p_ere_exp(p);
+ while (MORE() && (c = PEEK()) != '|' && c != stop) {
+ if (my_regex_enough_mem_in_stack &&
+ my_regex_enough_mem_in_stack(&stack_top)) {
+ SETERROR(REG_ESPACE);
+ return;
+ }
+ p_ere_exp(p);
+ }
if(REQUIRE(HERE() != conc, REG_EMPTY)) {}/* require nonempty */
if (!EAT('|'))
=== modified file 'regex/reginit.c'
--- regex/reginit.c 2008-02-18 22:29:39 +0000
+++ regex/reginit.c 2011-01-19 10:40:55 +0000
@@ -4,10 +4,12 @@
#include <m_ctype.h>
#include <m_string.h>
#include "cclass.h"
+#include "my_regex.h"
static my_bool regex_inited=0;
+extern my_regex_stack_check_t my_regex_enough_mem_in_stack;
-void my_regex_init(CHARSET_INFO *cs)
+void my_regex_init(CHARSET_INFO *cs, my_regex_stack_check_t func)
{
char buff[CCLASS_LAST][256];
int count[CCLASS_LAST];
@@ -16,6 +18,7 @@ void my_regex_init(CHARSET_INFO *cs)
if (!regex_inited)
{
regex_inited=1;
+ my_regex_enough_mem_in_stack= func;
bzero((uchar*) &count,sizeof(count));
for (i=1 ; i<= 255; i++)
@@ -74,6 +77,7 @@ void my_regex_end()
int i;
for (i=0; i < CCLASS_LAST ; i++)
free((char*) cclasses[i].chars);
+ my_regex_enough_mem_in_stack= NULL;
regex_inited=0;
}
}
=== modified file 'sql/mysqld.cc'
--- sql/mysqld.cc 2010-12-28 23:47:05 +0000
+++ sql/mysqld.cc 2011-01-19 10:23:51 +0000
@@ -3033,6 +3033,15 @@ static const int load_default_groups_sz=
sizeof(load_default_groups)/sizeof(load_default_groups[0]);
#endif
+#ifndef EMBEDDED_LIBRARY
+extern "C"
+{
+ static int check_enough_stack_size(uchar *stack_top)
+ {
+ return check_stack_overrun(current_thd, STACK_MIN_SIZE, stack_top);
+ }
+}
+#endif
/**
Initialize one of the global date/time format variables.
@@ -3414,7 +3423,11 @@ static int init_common_variables(const c
#endif
mysys_uses_curses=0;
#ifdef USE_REGEX
- my_regex_init(&my_charset_latin1);
+#ifndef EMBEDDED_LIBRARY
+ my_regex_init(&my_charset_latin1, check_enough_stack_size);
+#else
+ my_regex_init(&my_charset_latin1, NULL);
+#endif
#endif
/*
Process a comma-separated character set list and choose