Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-10-05 12:15:11+05:00, bar@stripped +11 -0
Bug#31081 server crash in regexp function
Problem: The "regex" library written by Henry Spencer
does not support tricky character sets like UCS2.
Fix: convert tricky character sets to UTF8 before calling
regex functions.
mysql-test/include/ctype_regex.inc@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +42 -0
New BitKeeper file ``mysql-test/include/ctype_regex.inc''
Moving common regular expression tests into a separate
file and uncluding it into func_regexp and into many ctype_xxx tests.
mysql-test/include/ctype_regex.inc@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +0 -0
mysql-test/r/ctype_uca.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +45 -0
Adding tests
mysql-test/r/ctype_ucs.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +45 -0
Adding tests
mysql-test/r/ctype_utf8.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +45 -0
Adding tests
mysql-test/r/func_regexp.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +13 -1
Adding tests
mysql-test/t/ctype_uca.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +4 -0
Adding tests
mysql-test/t/ctype_ucs.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +4 -0
Adding tests
mysql-test/t/ctype_utf8.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +7 -0
Adding tests
mysql-test/t/func_regexp.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +2 -21
Adding tests
sql/item_cmpfunc.cc@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +72 -50
- Adding new method Item_func_regex::regcomp()
to share more code between fix_fields() and val_int()
- Adding conversion from ASCII-incompatible charsets like UCS2
to UTF8, because the "regexp" does not support these charsets
- Additional optimization: calculate flags for regcomp only
once in fix_fields, instead of every regcomp()
sql/item_cmpfunc.h@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +4 -0
Adding prototypes for new members and methods
diff -Nrup a/mysql-test/include/ctype_regex.inc b/mysql-test/include/ctype_regex.inc
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/mysql-test/include/ctype_regex.inc 2007-10-05 12:15:09 +05:00
@@ -0,0 +1,42 @@
+#
+# To test a desired collation, set session.collation_connection to
+# this collation before including this file
+#
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# Create a table with two varchar(64) null-able column,
+# using current values of
+# @@character_set_connection and @@collation_connection.
+#
+
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+delete from t1;
+
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+
+select HIGH_PRIORITY s1 regexp s2 from t1;
+
+drop table t1;
diff -Nrup a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result
--- a/mysql-test/r/ctype_uca.result 2007-06-28 13:34:42 +05:00
+++ b/mysql-test/r/ctype_uca.result 2007-10-05 12:15:09 +05:00
@@ -2754,4 +2754,49 @@ a
c
ch
drop table t1;
+set collation_connection=ucs2_unicode_ci;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `s1` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL,
+ `s2` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+select HIGH_PRIORITY s1 regexp s2 from t1;
+s1 regexp s2
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+NULL
+NULL
+NULL
+NULL
+drop table t1;
+set names utf8;
End for 5.0 tests
diff -Nrup a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
--- a/mysql-test/r/ctype_ucs.result 2007-08-03 15:57:13 +05:00
+++ b/mysql-test/r/ctype_ucs.result 2007-10-05 12:15:09 +05:00
@@ -922,4 +922,49 @@ ERROR HY000: Illegal mix of collations (
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and
(ucs2_general_ci,COERCIBLE) for operation '='
drop table t1;
+set collation_connection=ucs2_general_ci;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `s1` varchar(64) character set ucs2 default NULL,
+ `s2` varchar(64) character set ucs2 default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+select HIGH_PRIORITY s1 regexp s2 from t1;
+s1 regexp s2
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+NULL
+NULL
+NULL
+NULL
+drop table t1;
+set names latin1;
End of 5.0 tests
diff -Nrup a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
--- a/mysql-test/r/ctype_utf8.result 2007-08-03 15:28:37 +05:00
+++ b/mysql-test/r/ctype_utf8.result 2007-10-05 12:15:09 +05:00
@@ -267,6 +267,51 @@ b
select * from t1 where a = 'b' and a != 'b';
a
drop table t1;
+set collation_connection=utf8_general_ci;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `s1` varchar(64) character set utf8 default NULL,
+ `s2` varchar(64) character set utf8 default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+select HIGH_PRIORITY s1 regexp s2 from t1;
+s1 regexp s2
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+NULL
+NULL
+NULL
+NULL
+drop table t1;
+set names utf8;
set names utf8;
select 'вася' rlike '[[:<:]]вася[[:>:]]';
'вася' rlike '[[:<:]]вася[[:>:]]'
diff -Nrup a/mysql-test/r/func_regexp.result b/mysql-test/r/func_regexp.result
--- a/mysql-test/r/func_regexp.result 2007-03-10 01:18:43 +04:00
+++ b/mysql-test/r/func_regexp.result 2007-10-05 12:15:09 +05:00
@@ -1,5 +1,17 @@
drop table if exists t1;
-create table t1 (s1 char(64),s2 char(64));
+set names latin1;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `s1` varchar(64) default NULL,
+ `s2` varchar(64) default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
diff -Nrup a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test
--- a/mysql-test/t/ctype_uca.test 2007-06-28 13:34:42 +05:00
+++ b/mysql-test/t/ctype_uca.test 2007-10-05 12:15:09 +05:00
@@ -538,4 +538,8 @@ alter table t1 convert to character set
select * from t1 where a like 'c%';
drop table t1;
+set collation_connection=ucs2_unicode_ci;
+--source include/ctype_regex.inc
+set names utf8;
+
-- echo End for 5.0 tests
diff -Nrup a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
--- a/mysql-test/t/ctype_ucs.test 2007-08-03 15:30:29 +05:00
+++ b/mysql-test/t/ctype_ucs.test 2007-10-05 12:15:09 +05:00
@@ -651,4 +651,8 @@ select * from t1 where a=if(b<10,_ucs2 0
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
drop table t1;
+set collation_connection=ucs2_general_ci;
+--source include/ctype_regex.inc
+set names latin1;
+
--echo End of 5.0 tests
diff -Nrup a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
--- a/mysql-test/t/ctype_utf8.test 2007-08-03 15:28:37 +05:00
+++ b/mysql-test/t/ctype_utf8.test 2007-10-05 12:15:09 +05:00
@@ -186,6 +186,13 @@ select * from t1 where a = 'b' and a !=
drop table t1;
#
+# Testing regexp
+#
+set collation_connection=utf8_general_ci;
+--source include/ctype_regex.inc
+set names utf8;
+
+#
# Bug #3928 regexp [[:>:]] and UTF-8
#
set names utf8;
diff -Nrup a/mysql-test/t/func_regexp.test b/mysql-test/t/func_regexp.test
--- a/mysql-test/t/func_regexp.test 2005-07-28 05:21:42 +05:00
+++ b/mysql-test/t/func_regexp.test 2007-10-05 12:15:09 +05:00
@@ -6,28 +6,9 @@
drop table if exists t1;
--enable_warnings
-create table t1 (s1 char(64),s2 char(64));
+set names latin1;
+--source include/ctype_regex.inc
-insert into t1 values('aaa','aaa');
-insert into t1 values('aaa|qqq','qqq');
-insert into t1 values('gheis','^[^a-dXYZ]+$');
-insert into t1 values('aab','^aa?b');
-insert into t1 values('Baaan','^Ba*n');
-insert into t1 values('aaa','qqq|aaa');
-insert into t1 values('qqq','qqq|aaa');
-
-insert into t1 values('bbb','qqq|aaa');
-insert into t1 values('bbb','qqq');
-insert into t1 values('aaa','aba');
-
-insert into t1 values(null,'abc');
-insert into t1 values('def',null);
-insert into t1 values(null,null);
-insert into t1 values('ghi','ghi[');
-
-select HIGH_PRIORITY s1 regexp s2 from t1;
-
-drop table t1;
#
# This test a bug in regexp on Alpha
diff -Nrup a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
--- a/sql/item_cmpfunc.cc 2007-07-16 02:03:32 +05:00
+++ b/sql/item_cmpfunc.cc 2007-10-05 12:15:09 +05:00
@@ -4226,6 +4226,51 @@ void Item_func_like::cleanup()
#ifdef USE_REGEX
bool
+Item_func_regex::regcomp(bool send_error)
+{
+ char buff[MAX_FIELD_WIDTH];
+ String tmp(buff,sizeof(buff),&my_charset_bin);
+ String *res= args[1]->val_str(&tmp);
+ int error;
+
+ if (args[1]->null_value)
+ return TRUE;
+
+ if (regex_compiled)
+ {
+ if (!stringcmp(res, &prev_regexp))
+ return FALSE;
+ prev_regexp.copy(*res);
+ my_regfree(&preg);
+ regex_compiled= 0;
+ }
+
+ if (cmp_collation.collation != regex_lib_charset)
+ {
+ /* Convert UCS2 strings to UTF8 */
+ uint dummy_errors;
+ if (conv.copy(res->ptr(), res->length(), res->charset(),
+ regex_lib_charset, &dummy_errors))
+ return TRUE;
+ res= &conv;
+ }
+
+ if ((error= my_regcomp(&preg, res->c_ptr(),
+ regex_lib_flags, regex_lib_charset)))
+ {
+ if (send_error)
+ {
+ (void) my_regerror(error, &preg, buff, sizeof(buff));
+ my_error(ER_REGEXP_ERROR, MYF(0), buff);
+ }
+ return TRUE;
+ }
+ regex_compiled= 1;
+ return FALSE;
+}
+
+
+bool
Item_func_regex::fix_fields(THD *thd, Item **ref)
{
DBUG_ASSERT(fixed == 0);
@@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, It
if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1))
return TRUE;
+ regex_lib_flags= (cmp_collation.collation->state &
+ (MY_CS_BINSORT | MY_CS_CSSORT)) ?
+ REG_EXTENDED | REG_NOSUB :
+ REG_EXTENDED | REG_NOSUB | REG_ICASE;
+ /*
+ If the case of UCS2 and other non-ASCII character sets,
+ we will convert patterns and strings to UTF8.
+ */
+ regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
+ &my_charset_utf8_general_ci :
+ cmp_collation.collation;
+
used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
not_null_tables_cache= (args[0]->not_null_tables() |
args[1]->not_null_tables());
const_item_cache=args[0]->const_item() && args[1]->const_item();
if (!regex_compiled && args[1]->const_item())
{
- char buff[MAX_FIELD_WIDTH];
- String tmp(buff,sizeof(buff),&my_charset_bin);
- String *res=args[1]->val_str(&tmp);
if (args[1]->null_value)
{ // Will always return NULL
maybe_null=1;
return FALSE;
}
- int error;
- if ((error= my_regcomp(&preg,res->c_ptr(),
- ((cmp_collation.collation->state &
- (MY_CS_BINSORT | MY_CS_CSSORT)) ?
- REG_EXTENDED | REG_NOSUB :
- REG_EXTENDED | REG_NOSUB | REG_ICASE),
- cmp_collation.collation)))
- {
- (void) my_regerror(error,&preg,buff,sizeof(buff));
- my_error(ER_REGEXP_ERROR, MYF(0), buff);
+ if (regcomp(TRUE))
return TRUE;
- }
- regex_compiled=regex_is_const=1;
- maybe_null=args[0]->maybe_null;
+ regex_is_const= 1;
+ maybe_null= args[0]->maybe_null;
}
else
maybe_null=1;
@@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int()
{
DBUG_ASSERT(fixed == 1);
char buff[MAX_FIELD_WIDTH];
- String *res, tmp(buff,sizeof(buff),&my_charset_bin);
+ String tmp(buff,sizeof(buff),&my_charset_bin);
+ String *res= args[0]->val_str(&tmp);
- res=args[0]->val_str(&tmp);
- if (args[0]->null_value)
- {
- null_value=1;
+ if ((null_value= (args[0]->null_value ||
+ (!regex_is_const && regcomp(FALSE)))))
return 0;
- }
- if (!regex_is_const)
- {
- char buff2[MAX_FIELD_WIDTH];
- String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin);
- res2= args[1]->val_str(&tmp2);
- if (args[1]->null_value)
+ if (cmp_collation.collation != regex_lib_charset)
+ {
+ /* Convert UCS2 strings to UTF8 */
+ uint dummy_errors;
+ if (conv.copy(res->ptr(), res->length(), res->charset(),
+ regex_lib_charset, &dummy_errors))
{
- null_value=1;
+ null_value= 1;
return 0;
}
- if (!regex_compiled || stringcmp(res2,&prev_regexp))
- {
- prev_regexp.copy(*res2);
- if (regex_compiled)
- {
- my_regfree(&preg);
- regex_compiled=0;
- }
- if (my_regcomp(&preg,res2->c_ptr_safe(),
- ((cmp_collation.collation->state &
- (MY_CS_BINSORT | MY_CS_CSSORT)) ?
- REG_EXTENDED | REG_NOSUB :
- REG_EXTENDED | REG_NOSUB | REG_ICASE),
- cmp_collation.collation))
- {
- null_value=1;
- return 0;
- }
- regex_compiled=1;
- }
+ res= &conv;
}
- null_value=0;
return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
}
diff -Nrup a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
--- a/sql/item_cmpfunc.h 2007-08-31 04:23:36 +05:00
+++ b/sql/item_cmpfunc.h 2007-10-05 12:15:09 +05:00
@@ -1313,6 +1313,10 @@ class Item_func_regex :public Item_bool_
bool regex_is_const;
String prev_regexp;
DTCollation cmp_collation;
+ CHARSET_INFO *regex_lib_charset;
+ int regex_lib_flags;
+ String conv;
+ bool regcomp(bool send_error);
public:
Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b),
regex_compiled(0),regex_is_const(0) {}