List:Commits« Previous MessageNext Message »
From:bar Date:October 5 2007 9:15am
Subject:bk commit into 5.0 tree (bar:1.2534) BUG#31081
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-10-05 12:15:11+05:00, bar@stripped +11 -0
  Bug#31081 server crash in regexp function
  Problem: The "regex" library written by Henry Spencer
  does not support tricky character sets like UCS2.
  Fix: convert tricky character sets to UTF8 before calling
  regex functions.

  mysql-test/include/ctype_regex.inc@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +42 -0
    New BitKeeper file ``mysql-test/include/ctype_regex.inc''
    
    Moving common regular expression tests into a separate
    file and uncluding it into func_regexp and into many ctype_xxx tests.
    

  mysql-test/include/ctype_regex.inc@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +0 -0

  mysql-test/r/ctype_uca.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +45 -0
    Adding tests

  mysql-test/r/ctype_ucs.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +45 -0
    Adding tests

  mysql-test/r/ctype_utf8.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +45 -0
    Adding tests

  mysql-test/r/func_regexp.result@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +13 -1
    Adding tests

  mysql-test/t/ctype_uca.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +4 -0
    Adding tests

  mysql-test/t/ctype_ucs.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +4 -0
    Adding tests

  mysql-test/t/ctype_utf8.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +7 -0
    Adding tests

  mysql-test/t/func_regexp.test@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +2 -21
    Adding tests

  sql/item_cmpfunc.cc@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +72 -50
    - Adding new method Item_func_regex::regcomp()
    to share more code between fix_fields() and val_int()
    - Adding conversion from ASCII-incompatible charsets like UCS2
    to UTF8, because the "regexp" does not support these charsets
    - Additional optimization: calculate flags for regcomp only
      once in fix_fields, instead of every regcomp()

  sql/item_cmpfunc.h@stripped, 2007-10-05 12:15:09+05:00, bar@stripped +4 -0
    Adding prototypes for new members and methods

diff -Nrup a/mysql-test/include/ctype_regex.inc b/mysql-test/include/ctype_regex.inc
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/include/ctype_regex.inc	2007-10-05 12:15:09 +05:00
@@ -0,0 +1,42 @@
+#
+# To test a desired collation, set session.collation_connection to
+# this collation before including this file
+#
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+#
+# Create a table with two varchar(64) null-able column,
+# using current values of
+# @@character_set_connection and  @@collation_connection.
+#
+
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+delete from t1;
+
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+
+select HIGH_PRIORITY s1 regexp s2 from t1;
+
+drop table t1;
diff -Nrup a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result
--- a/mysql-test/r/ctype_uca.result	2007-06-28 13:34:42 +05:00
+++ b/mysql-test/r/ctype_uca.result	2007-10-05 12:15:09 +05:00
@@ -2754,4 +2754,49 @@ a
 c
 ch
 drop table t1;
+set collation_connection=ucs2_unicode_ci;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `s1` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL,
+  `s2` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+select HIGH_PRIORITY s1 regexp s2 from t1;
+s1 regexp s2
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+NULL
+NULL
+NULL
+NULL
+drop table t1;
+set names utf8;
 End for 5.0 tests
diff -Nrup a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
--- a/mysql-test/r/ctype_ucs.result	2007-08-03 15:57:13 +05:00
+++ b/mysql-test/r/ctype_ucs.result	2007-10-05 12:15:09 +05:00
@@ -922,4 +922,49 @@ ERROR HY000: Illegal mix of collations (
 select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
 ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and
(ucs2_general_ci,COERCIBLE) for operation '='
 drop table t1;
+set collation_connection=ucs2_general_ci;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `s1` varchar(64) character set ucs2 default NULL,
+  `s2` varchar(64) character set ucs2 default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+select HIGH_PRIORITY s1 regexp s2 from t1;
+s1 regexp s2
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+NULL
+NULL
+NULL
+NULL
+drop table t1;
+set names latin1;
 End of 5.0 tests
diff -Nrup a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
--- a/mysql-test/r/ctype_utf8.result	2007-08-03 15:28:37 +05:00
+++ b/mysql-test/r/ctype_utf8.result	2007-10-05 12:15:09 +05:00
@@ -267,6 +267,51 @@ b
 select * from t1 where a = 'b' and a != 'b';
 a
 drop table t1;
+set collation_connection=utf8_general_ci;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `s1` varchar(64) character set utf8 default NULL,
+  `s2` varchar(64) character set utf8 default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
+insert into t1 values('aaa','aaa');
+insert into t1 values('aaa|qqq','qqq');
+insert into t1 values('gheis','^[^a-dXYZ]+$');
+insert into t1 values('aab','^aa?b');
+insert into t1 values('Baaan','^Ba*n');
+insert into t1 values('aaa','qqq|aaa');
+insert into t1 values('qqq','qqq|aaa');
+insert into t1 values('bbb','qqq|aaa');
+insert into t1 values('bbb','qqq');
+insert into t1 values('aaa','aba');
+insert into t1 values(null,'abc');
+insert into t1 values('def',null);
+insert into t1 values(null,null);
+insert into t1 values('ghi','ghi[');
+select HIGH_PRIORITY s1 regexp s2 from t1;
+s1 regexp s2
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+NULL
+NULL
+NULL
+NULL
+drop table t1;
+set names utf8;
 set names utf8;
 select  'вася'  rlike '[[:<:]]вася[[:>:]]';
 'вася'  rlike '[[:<:]]вася[[:>:]]'
diff -Nrup a/mysql-test/r/func_regexp.result b/mysql-test/r/func_regexp.result
--- a/mysql-test/r/func_regexp.result	2007-03-10 01:18:43 +04:00
+++ b/mysql-test/r/func_regexp.result	2007-10-05 12:15:09 +05:00
@@ -1,5 +1,17 @@
 drop table if exists t1;
-create table t1 (s1 char(64),s2 char(64));
+set names latin1;
+drop table if exists t1;
+create table t1 as
+select repeat(' ', 64) as s1, repeat(' ',64) as s2
+union
+select null, null;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `s1` varchar(64) default NULL,
+  `s2` varchar(64) default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+delete from t1;
 insert into t1 values('aaa','aaa');
 insert into t1 values('aaa|qqq','qqq');
 insert into t1 values('gheis','^[^a-dXYZ]+$');
diff -Nrup a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test
--- a/mysql-test/t/ctype_uca.test	2007-06-28 13:34:42 +05:00
+++ b/mysql-test/t/ctype_uca.test	2007-10-05 12:15:09 +05:00
@@ -538,4 +538,8 @@ alter table t1 convert to character set 
 select * from t1 where a like 'c%';
 drop table t1;
 
+set collation_connection=ucs2_unicode_ci;
+--source include/ctype_regex.inc
+set names utf8;
+
 -- echo End for 5.0 tests
diff -Nrup a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
--- a/mysql-test/t/ctype_ucs.test	2007-08-03 15:30:29 +05:00
+++ b/mysql-test/t/ctype_ucs.test	2007-10-05 12:15:09 +05:00
@@ -651,4 +651,8 @@ select * from t1 where a=if(b<10,_ucs2 0
 select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
 drop table t1;
 
+set collation_connection=ucs2_general_ci;
+--source include/ctype_regex.inc
+set names latin1;
+
 --echo End of 5.0 tests
diff -Nrup a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
--- a/mysql-test/t/ctype_utf8.test	2007-08-03 15:28:37 +05:00
+++ b/mysql-test/t/ctype_utf8.test	2007-10-05 12:15:09 +05:00
@@ -186,6 +186,13 @@ select * from t1 where a = 'b' and a != 
 drop table t1;
 
 #
+# Testing regexp
+#
+set collation_connection=utf8_general_ci;
+--source include/ctype_regex.inc
+set names utf8;
+
+#
 # Bug #3928 regexp [[:>:]] and UTF-8
 #
 set names utf8;
diff -Nrup a/mysql-test/t/func_regexp.test b/mysql-test/t/func_regexp.test
--- a/mysql-test/t/func_regexp.test	2005-07-28 05:21:42 +05:00
+++ b/mysql-test/t/func_regexp.test	2007-10-05 12:15:09 +05:00
@@ -6,28 +6,9 @@
 drop table if exists t1;
 --enable_warnings
 
-create table t1 (s1 char(64),s2 char(64));
+set names latin1;
+--source include/ctype_regex.inc
 
-insert into t1 values('aaa','aaa');
-insert into t1 values('aaa|qqq','qqq');
-insert into t1 values('gheis','^[^a-dXYZ]+$');
-insert into t1 values('aab','^aa?b');
-insert into t1 values('Baaan','^Ba*n');
-insert into t1 values('aaa','qqq|aaa');
-insert into t1 values('qqq','qqq|aaa');
-
-insert into t1 values('bbb','qqq|aaa');
-insert into t1 values('bbb','qqq');
-insert into t1 values('aaa','aba');
-
-insert into t1 values(null,'abc');
-insert into t1 values('def',null);
-insert into t1 values(null,null);
-insert into t1 values('ghi','ghi[');
-
-select HIGH_PRIORITY s1 regexp s2 from t1;
-
-drop table t1;
 
 #
 # This test a bug in regexp on Alpha
diff -Nrup a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
--- a/sql/item_cmpfunc.cc	2007-07-16 02:03:32 +05:00
+++ b/sql/item_cmpfunc.cc	2007-10-05 12:15:09 +05:00
@@ -4226,6 +4226,51 @@ void Item_func_like::cleanup()
 #ifdef USE_REGEX
 
 bool
+Item_func_regex::regcomp(bool send_error)
+{
+  char buff[MAX_FIELD_WIDTH];
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  String *res= args[1]->val_str(&tmp);
+  int error;
+
+  if (args[1]->null_value)
+    return TRUE;
+
+  if (regex_compiled)
+  {
+    if (!stringcmp(res, &prev_regexp))
+      return FALSE;
+    prev_regexp.copy(*res);
+    my_regfree(&preg);
+    regex_compiled= 0;
+  }
+
+  if (cmp_collation.collation != regex_lib_charset)
+  {
+    /* Convert UCS2 strings to UTF8 */
+    uint dummy_errors;
+    if (conv.copy(res->ptr(), res->length(), res->charset(),
+                  regex_lib_charset, &dummy_errors))
+      return TRUE;
+    res= &conv;
+  }
+
+  if ((error= my_regcomp(&preg, res->c_ptr(),
+                         regex_lib_flags, regex_lib_charset)))
+  {
+    if (send_error)
+    {
+      (void) my_regerror(error, &preg, buff, sizeof(buff));
+      my_error(ER_REGEXP_ERROR, MYF(0), buff);
+    }
+    return TRUE;
+  }
+  regex_compiled= 1;
+  return FALSE;
+}
+
+
+bool
 Item_func_regex::fix_fields(THD *thd, Item **ref)
 {
   DBUG_ASSERT(fixed == 0);
@@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, It
   if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1))
     return TRUE;
 
+  regex_lib_flags= (cmp_collation.collation->state &
+                    (MY_CS_BINSORT | MY_CS_CSSORT)) ?
+                   REG_EXTENDED | REG_NOSUB :
+                   REG_EXTENDED | REG_NOSUB | REG_ICASE;
+  /*
+    If the case of UCS2 and other non-ASCII character sets,
+    we will convert patterns and strings to UTF8.
+  */
+  regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
+                     &my_charset_utf8_general_ci :
+                     cmp_collation.collation;
+
   used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
   not_null_tables_cache= (args[0]->not_null_tables() |
 			  args[1]->not_null_tables());
   const_item_cache=args[0]->const_item() && args[1]->const_item();
   if (!regex_compiled && args[1]->const_item())
   {
-    char buff[MAX_FIELD_WIDTH];
-    String tmp(buff,sizeof(buff),&my_charset_bin);
-    String *res=args[1]->val_str(&tmp);
     if (args[1]->null_value)
     {						// Will always return NULL
       maybe_null=1;
       return FALSE;
     }
-    int error;
-    if ((error= my_regcomp(&preg,res->c_ptr(),
-                           ((cmp_collation.collation->state &
-                             (MY_CS_BINSORT | MY_CS_CSSORT)) ?
-                            REG_EXTENDED | REG_NOSUB :
-                            REG_EXTENDED | REG_NOSUB | REG_ICASE),
-                           cmp_collation.collation)))
-    {
-      (void) my_regerror(error,&preg,buff,sizeof(buff));
-      my_error(ER_REGEXP_ERROR, MYF(0), buff);
+    if (regcomp(TRUE))
       return TRUE;
-    }
-    regex_compiled=regex_is_const=1;
-    maybe_null=args[0]->maybe_null;
+    regex_is_const= 1;
+    maybe_null= args[0]->maybe_null;
   }
   else
     maybe_null=1;
@@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   char buff[MAX_FIELD_WIDTH];
-  String *res, tmp(buff,sizeof(buff),&my_charset_bin);
+  String tmp(buff,sizeof(buff),&my_charset_bin);
+  String *res= args[0]->val_str(&tmp);
 
-  res=args[0]->val_str(&tmp);
-  if (args[0]->null_value)
-  {
-    null_value=1;
+  if ((null_value= (args[0]->null_value ||
+                    (!regex_is_const && regcomp(FALSE)))))
     return 0;
-  }
-  if (!regex_is_const)
-  {
-    char buff2[MAX_FIELD_WIDTH];
-    String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin);
 
-    res2= args[1]->val_str(&tmp2);
-    if (args[1]->null_value)
+  if (cmp_collation.collation != regex_lib_charset)
+  {
+    /* Convert UCS2 strings to UTF8 */
+    uint dummy_errors;
+    if (conv.copy(res->ptr(), res->length(), res->charset(),
+                  regex_lib_charset, &dummy_errors))
     {
-      null_value=1;
+      null_value= 1;
       return 0;
     }
-    if (!regex_compiled || stringcmp(res2,&prev_regexp))
-    {
-      prev_regexp.copy(*res2);
-      if (regex_compiled)
-      {
-	my_regfree(&preg);
-	regex_compiled=0;
-      }
-      if (my_regcomp(&preg,res2->c_ptr_safe(),
-                     ((cmp_collation.collation->state &
-                       (MY_CS_BINSORT | MY_CS_CSSORT)) ?
-                      REG_EXTENDED | REG_NOSUB :
-                      REG_EXTENDED | REG_NOSUB | REG_ICASE),
-                     cmp_collation.collation))
-      {
-	null_value=1;
-	return 0;
-      }
-      regex_compiled=1;
-    }
+    res= &conv;
   }
-  null_value=0;
   return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
 }
 
diff -Nrup a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
--- a/sql/item_cmpfunc.h	2007-08-31 04:23:36 +05:00
+++ b/sql/item_cmpfunc.h	2007-10-05 12:15:09 +05:00
@@ -1313,6 +1313,10 @@ class Item_func_regex :public Item_bool_
   bool regex_is_const;
   String prev_regexp;
   DTCollation cmp_collation;
+  CHARSET_INFO *regex_lib_charset;
+  int regex_lib_flags;
+  String conv;
+  bool regcomp(bool send_error);
 public:
   Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b),
     regex_compiled(0),regex_is_const(0) {}
Thread
bk commit into 5.0 tree (bar:1.2534) BUG#31081bar5 Oct
  • Re: bk commit into 5.0 tree (bar:1.2534) BUG#31081Sergei Golubchik15 Oct