List:Commits« Previous MessageNext Message »
From:bar Date:August 1 2007 11:25am
Subject:bk commit into 5.0 tree (bar:1.2518) BUG#28875
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-08-01 16:25:51+05:00, bar@stripped +19 -0
  Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
  (Regression, caused by a patch for the bug 22646).
  Problem: when result type of date_format() was changed from
  binary string to character string, mixing date_format()
  with a ascii column in CONCAT() stopped to work.
  Fix:
  - adding "repertoire" flag into DTCollation class,
  to mark items which can return only pure ASCII strings.
  - allow character set conversion from pure ASCII to other character sets.

  include/m_ctype.h@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +11 -0
    Defining new flags.
    Adding new function prototypes.

  mysql-test/r/ctype_ucs.result@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +26 -0
    Adding tests.

  mysql-test/r/ctype_utf8.result@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +36 -0
    Adding tests.

  mysql-test/r/func_time.result@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +16 -0
    Adding tests.

  mysql-test/t/ctype_ucs.test@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +30 -0
    Adding tests.

  mysql-test/t/ctype_utf8.test@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +40 -0
    Adding tests.

  mysql-test/t/func_time.test@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +26 -0
    Adding test.

  mysys/charset.c@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +3 -0
    Adding pure ASCII detection when loading a dynamic character set.

  sql/item.cc@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +30 -12
    - Moving detection of a Unicode superset into function.
    - Adding detection of a ASCII subset.
    - Adding creation of to-ASCII character set convertor when
      safe_charset_converter() failed and when the argument.
      repertoire is know to be pure ASCII.

  sql/item.h@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +36 -7
    - Adding "repertoire" member into DTCollation class.
    - Adding "repertoire" argument to constructors.
    - Adding new methods:
      set_repertoire_from_charset()
      set_repertoire_from_value()

  sql/item_func.cc@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +1 -1
    Adding "repertoire" argument.

  sql/item_strfunc.cc@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +2 -1
    Adding "repertoire" argument.

  sql/item_timefunc.cc@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +5 -1
    Initializing the result repertoire taking into account the "is_ascii"
    flag of the current locale.

  sql/sql_lex.cc@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +10 -5
    Detect 7bit strings, return in Lex->text_string_is_7bit.

  sql/sql_lex.h@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +1 -0
    Adding new member into LEX structure.

  sql/sql_yacc.yy@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +63 -25
    Depening on Lex->text_string_is_7bit and character set features,
    create Item_string with MY_REPERTOIRE_ASCII when it is possible.

  strings/conf_to_src.c@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +29 -4
    - Adding printing of the "MY_CS_PUREASCII" flag
    - Adding printing of copyright

  strings/ctype-extra.c@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +4 -3
    Recreating ctype-extra.c: ascii_general_ci and ascii_bin
    are now marked with MY_CS_PUREASCII flag.

  strings/ctype.c@stripped, 2007-08-01 16:25:45+05:00, bar@stripped +86 -0
    Adding new functions.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.myoffice.izhnet.ru
# Root:	/home/bar/mysql-work/mysql-5.0.b28875

--- 1.128/include/m_ctype.h	2007-06-07 17:55:53 +05:00
+++ 1.129/include/m_ctype.h	2007-08-01 16:25:45 +05:00
@@ -78,8 +78,14 @@ extern MY_UNICASE_INFO *my_unicase_turki
 #define MY_CS_READY	256    /* if a charset is initialized    */
 #define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
 #define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
+#define MY_CS_PUREASCII 2048   /* if a charset is pure ascii     */
 #define MY_CHARSET_UNDEFINED 0
 
+/* Character repertoire flags */
+#define MY_REPERTOIRE_ASCII      1 /* Pure ASCII            U+0000..U+007F */
+#define MY_REPERTOIRE_EXTENDED   2 /* Extended characters:  U+0080..U+FFFF */
+#define MY_REPERTOIRE_UNICODE30  3 /* ASCII | EXTENDED:     U+0000..U+FFFF */
+
 
 typedef struct my_uni_idx_st
 {
@@ -434,6 +440,11 @@ extern my_bool my_parse_charset_xml(cons
 
 my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, uint len);
 my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, uint len);
+
+
+uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
+my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
+my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
 
 
 #define	_MY_U	01	/* Upper case */

--- 1.151/mysys/charset.c	2007-06-07 17:55:53 +05:00
+++ 1.152/mysys/charset.c	2007-08-01 16:25:45 +05:00
@@ -277,6 +277,9 @@ static int add_collation(CHARSET_INFO *c
         if (sort_order && sort_order['A'] < sort_order['a'] &&
                           sort_order['a'] < sort_order['B'])
           all_charsets[cs->number]->state|= MY_CS_CSSORT; 
+
+        if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
+          all_charsets[cs->number]->state|= MY_CS_PUREASCII;
       }
     }
     else

--- 1.269/sql/item.cc	2007-05-21 23:50:02 +05:00
+++ 1.270/sql/item.cc	2007-08-01 16:25:45 +05:00
@@ -1296,6 +1296,25 @@ void Item::split_sum_func2(THD *thd, Ite
 }
 
 
+static bool
+left_is_superset(DTCollation *left, DTCollation *right)
+{
+  /* Allow convert to Unicode */
+  if (left->collation->state & MY_CS_UNICODE &&
+      (left->derivation < right->derivation ||
+       (left->derivation == right->derivation &&
+        !(right->collation->state & MY_CS_UNICODE))))
+    return TRUE;
+  /* Allow convert from ASCII */
+  if (right->repertoire == MY_REPERTOIRE_ASCII &&
+      (left->derivation < right->derivation ||
+       (left->derivation == right->derivation &&
+        !(left->repertoire == MY_REPERTOIRE_ASCII))))
+    return TRUE;
+  /* Disallow conversion otherwise */
+  return FALSE;
+}
+
 /*
    Aggregate two collations together taking
    into account their coercibility (aka derivation):
@@ -1360,18 +1379,12 @@ bool DTCollation::aggregate(DTCollation 
        ; // Do nothing
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
-             collation->state & MY_CS_UNICODE &&
-             (derivation < dt.derivation ||
-             (derivation == dt.derivation &&
-             !(dt.collation->state & MY_CS_UNICODE))))
+             left_is_superset(this, &dt))
     {
       // Do nothing
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
-             dt.collation->state & MY_CS_UNICODE &&
-             (dt.derivation < derivation ||
-              (dt.derivation == derivation &&
-             !(collation->state & MY_CS_UNICODE))))
+             left_is_superset(&dt, this))
     {
       set(dt);
     }
@@ -1390,7 +1403,7 @@ bool DTCollation::aggregate(DTCollation 
     else
     {
       // Cannot apply conversion
-      set(0, DERIVATION_NONE);
+      set(0, DERIVATION_NONE, 0);
       return 1;
     }
   }
@@ -1412,8 +1425,8 @@ bool DTCollation::aggregate(DTCollation 
     {
       if (derivation == DERIVATION_EXPLICIT)
       {
-	set(0, DERIVATION_NONE);
-	return 1;
+        set(0, DERIVATION_NONE, 0);
+        return 1;
       }
       if (collation->state & MY_CS_BINSORT)
         return 0;
@@ -1427,6 +1440,7 @@ bool DTCollation::aggregate(DTCollation 
       set(bin, DERIVATION_NONE);
     }
   }
+  repertoire|= dt.repertoire;
   return 0;
 }
 
@@ -1571,7 +1585,11 @@ bool agg_item_charsets(DTCollation &coll
                                   &dummy_offset))
       continue;
 
-    if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
+    if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
+        ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
+      conv= new Item_func_conv_charset(*arg, coll.collation, 1);
+
+    if (!conv)
     {
       if (nargs >=2 && nargs <= 3)
       {

--- 1.230/sql/item.h	2007-05-18 01:17:44 +05:00
+++ 1.231/sql/item.h	2007-08-01 16:25:45 +05:00
@@ -49,29 +49,50 @@ class DTCollation {
 public:
   CHARSET_INFO     *collation;
   enum Derivation derivation;
+  uint repertoire;
   
+  void set_repertoire_from_charset(CHARSET_INFO *cs)
+  {
+    repertoire= cs->state & MY_CS_PUREASCII ?
+                MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+  }
   DTCollation()
   {
     collation= &my_charset_bin;
     derivation= DERIVATION_NONE;
+    repertoire= MY_REPERTOIRE_UNICODE30;
   }
   DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg)
   {
     collation= collation_arg;
     derivation= derivation_arg;
+    set_repertoire_from_charset(collation_arg);
   }
   void set(DTCollation &dt)
   { 
     collation= dt.collation;
     derivation= dt.derivation;
+    repertoire= dt.repertoire;
   }
   void set(CHARSET_INFO *collation_arg, Derivation derivation_arg)
   {
     collation= collation_arg;
     derivation= derivation_arg;
+    set_repertoire_from_charset(collation_arg);
+  }
+  void set(CHARSET_INFO *collation_arg,
+           Derivation derivation_arg,
+           uint repertoire_arg)
+  {
+    collation= collation_arg;
+    derivation= derivation_arg;
+    repertoire= repertoire_arg;
   }
   void set(CHARSET_INFO *collation_arg)
-  { collation= collation_arg; }
+  {
+    collation= collation_arg;
+    set_repertoire_from_charset(collation_arg);
+  }
   void set(Derivation derivation_arg)
   { derivation= derivation_arg; }
   bool aggregate(DTCollation &dt, uint flags= 0);
@@ -1650,10 +1671,11 @@ class Item_string :public Item
 {
 public:
   Item_string(const char *str,uint length,
-  	      CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+              CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+              uint repertoire= MY_REPERTOIRE_UNICODE30)
   {
-    collation.set(cs, dv);
-    str_value.set_or_copy_aligned(str,length,cs);
+    str_value.set_or_copy_aligned(str, length, cs);
+    collation.set(cs, dv, repertoire);
     /*
       We have to have a different max_length than 'length' here to
       ensure that we get the right length if we do use the item
@@ -1677,10 +1699,11 @@ public:
     fixed= 1;
   }
   Item_string(const char *name_par, const char *str, uint length,
-	      CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+              CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+              uint repertoire= MY_REPERTOIRE_UNICODE30)
   {
-    collation.set(cs, dv);
-    str_value.set_or_copy_aligned(str,length,cs);
+    str_value.set_or_copy_aligned(str, length, cs);
+    collation.set(cs, dv, repertoire);
     max_length= str_value.numchars()*cs->mbmaxlen;
     set_name(name_par, 0, cs);
     decimals=NOT_FIXED_DEC;
@@ -1695,6 +1718,12 @@ public:
   {
     str_value.copy(str_arg, length_arg, collation.collation);
     max_length= str_value.numchars() * collation.collation->mbmaxlen;
+  }
+  void set_repertoire_from_value()
+  {
+    collation.repertoire= my_string_repertoire(str_value.charset(),
+                                               str_value.ptr(),
+                                               str_value.length());
   }
   enum Type type() const { return STRING_ITEM; }
   double val_real();

--- 1.347/sql/item_func.cc	2007-06-03 16:21:53 +05:00
+++ 1.348/sql/item_func.cc	2007-08-01 16:25:45 +05:00
@@ -3751,7 +3751,7 @@ static user_var_entry *get_variable(HASH
     entry->value=0;
     entry->length=0;
     entry->update_query_id=0;
-    entry->collation.set(NULL, DERIVATION_IMPLICIT);
+    entry->collation.set(NULL, DERIVATION_IMPLICIT, 0);
     entry->unsigned_flag= 0;
     /*
       If we are here, we were called from a SET or a query which sets a

--- 1.301/sql/item_strfunc.cc	2007-04-28 21:26:10 +05:00
+++ 1.302/sql/item_strfunc.cc	2007-08-01 16:25:45 +05:00
@@ -2672,7 +2672,8 @@ void Item_func_set_collation::fix_length
              colname, args[0]->collation.collation->csname);
     return;
   }
-  collation.set(set_collation, DERIVATION_EXPLICIT);
+  collation.set(set_collation, DERIVATION_EXPLICIT,
+                args[0]->collation.repertoire);
   max_length= args[0]->max_length;
 }
 

--- 1.144/sql/item_timefunc.cc	2007-05-30 01:32:57 +05:00
+++ 1.145/sql/item_timefunc.cc	2007-08-01 16:25:45 +05:00
@@ -1717,7 +1717,11 @@ void Item_func_date_format::fix_length_a
   Item *arg1= args[1]->this_item();
 
   decimals=0;
-  collation.set(thd->variables.collation_connection);
+  CHARSET_INFO *cs= thd->variables.collation_connection;
+  uint32 repertoire= arg1->collation.repertoire;
+  if (!thd->variables.lc_time_names->is_ascii)
+    repertoire|= MY_REPERTOIRE_EXTENDED;
+  collation.set(cs, arg1->collation.derivation, repertoire);
   if (arg1->type() == STRING_ITEM)
   {						// Optimize the normal case
     fixed_length=1;

--- 1.223/sql/sql_lex.cc	2007-05-26 03:17:18 +05:00
+++ 1.224/sql/sql_lex.cc	2007-08-01 16:25:45 +05:00
@@ -305,16 +305,18 @@ static LEX_STRING get_quoted_token(Lex_i
   Fix sometimes to do only one scan of the string
 */
 
-static char *get_text(Lex_input_stream *lip)
+static char *get_text(Lex_input_stream *lip, uchar *bitmask)
 {
   reg1 uchar c,sep;
-  uint found_escape=0;
+  uint found_escape= 0;
   CHARSET_INFO *cs= lip->m_thd->charset();
 
+  *bitmask= 0;
   sep= yyGetLast();			// String should end with this
   while (lip->ptr != lip->end_of_query)
   {
-    c = yyGet();
+    c= yyGet();
+    *bitmask|= c;
 #ifdef USE_MB
     {
       int l;
@@ -534,6 +536,7 @@ int MYSQLlex(void *arg, void *yythd)
   CHARSET_INFO *cs= thd->charset();
   uchar *state_map= cs->state_map;
   uchar *ident_map= cs->ident_map;
+  uchar string_bitmask;
 
   lip->yylval=yylval;			// The global state
 
@@ -599,12 +602,13 @@ int MYSQLlex(void *arg, void *yythd)
       /* Found N'string' */
       lip->tok_start++;                 // Skip N
       yySkip();                         // Skip '
-      if (!(yylval->lex_str.str = get_text(lip)))
+      if (!(yylval->lex_str.str= get_text(lip, &string_bitmask)))
       {
 	state= MY_LEX_CHAR;             // Read char by char
 	break;
       }
       yylval->lex_str.length= lip->yytoklen;
+      lex->text_string_is_7bit= (string_bitmask & 0x80) ? 0 : 1;
       return(NCHAR_STRING);
 
     case MY_LEX_IDENT_OR_HEX:
@@ -920,12 +924,13 @@ int MYSQLlex(void *arg, void *yythd)
       }
       /* " used for strings */
     case MY_LEX_STRING:			// Incomplete text string
-      if (!(yylval->lex_str.str = get_text(lip)))
+      if (!(yylval->lex_str.str= get_text(lip, &string_bitmask)))
       {
 	state= MY_LEX_CHAR;		// Read char by char
 	break;
       }
       yylval->lex_str.length=lip->yytoklen;
+      lex->text_string_is_7bit= (string_bitmask & 0x80) ? 0 : 1;
       return(TEXT_STRING);
 
     case MY_LEX_COMMENT:			//  Comment

--- 1.247/sql/sql_lex.h	2007-05-26 01:35:58 +05:00
+++ 1.248/sql/sql_lex.h	2007-08-01 16:25:45 +05:00
@@ -994,6 +994,7 @@ typedef struct st_lex : public Query_tab
   gptr yacc_yyss,yacc_yyvs;
   THD *thd;
   CHARSET_INFO *charset, *underscore_charset;
+  bool text_string_is_7bit;
   /* store original leaf_tables for INSERT SELECT and PS/SP */
   TABLE_LIST *leaf_tables_insert;
   /* Position (first character index) of SELECT of CREATE VIEW statement */

--- 1.521/sql/sql_yacc.yy	2007-06-03 12:03:14 +05:00
+++ 1.522/sql/sql_yacc.yy	2007-08-01 16:25:45 +05:00
@@ -7509,18 +7509,54 @@ opt_load_data_set_spec:
 /* Common definitions */
 
 text_literal:
-	TEXT_STRING_literal
-	{
-	  THD *thd= YYTHD;
-	  $$ = new Item_string($1.str,$1.length,thd->variables.collation_connection);
-	}
-	| NCHAR_STRING
-	{ $$=  new Item_string($1.str,$1.length,national_charset_info); }
-	| UNDERSCORE_CHARSET TEXT_STRING
-	  { $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
-	| text_literal TEXT_STRING_literal
-	  { ((Item_string*) $1)->append($2.str,$2.length); }
-	;
+        TEXT_STRING
+        {
+          LEX_STRING tmp;
+          THD *thd= YYTHD;
+          CHARSET_INFO *cs_con= thd->variables.collation_connection;
+          CHARSET_INFO *cs_cli= thd->variables.character_set_client;
+          uint repertoire= thd->lex->text_string_is_7bit &&
+                             my_charset_is_ascii_based(cs_cli) ?
+                           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+          if (thd->charset_is_collation_connection ||
+              (repertoire == MY_REPERTOIRE_ASCII &&
+               my_charset_is_ascii_based(cs_con)))
+            tmp= $1;
+          else
+            thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli);
+          $$= new Item_string(tmp.str, tmp.length, cs_con,
+                              DERIVATION_COERCIBLE, repertoire);
+        }
+        | NCHAR_STRING
+        {
+          uint repertoire= Lex->text_string_is_7bit ?
+                           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+          DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
+          $$= new Item_string($1.str, $1.length, national_charset_info,
+                              DERIVATION_COERCIBLE, repertoire);
+        }
+        | UNDERSCORE_CHARSET TEXT_STRING
+          {
+            $$= new Item_string($2.str, $2.length, Lex->underscore_charset);
+            ((Item_string*) $$)->set_repertoire_from_value();
+          }
+        | text_literal TEXT_STRING_literal
+          {
+            Item_string* item= (Item_string*) $1;
+            item->append($2.str, $2.length);
+            if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED))
+            {
+              /*
+                 If the string has been pure ASCII so far,
+                 check the new part.
+              */
+              CHARSET_INFO *cs= YYTHD->variables.collation_connection;
+              item->collation.repertoire|= my_string_repertoire(cs,
+                                                                $2.str,
+                                                                $2.length);
+            }
+          }
+        ;
 
 text_string:
 	TEXT_STRING_literal
@@ -7592,20 +7628,22 @@ literal:
 	| TRUE_SYM	{ $$= new Item_int((char*) "TRUE",1,1); }
 	| HEX_NUM	{ $$ =	new Item_hex_string($1.str, $1.length);}
 	| BIN_NUM	{ $$= new Item_bin_string($1.str, $1.length); }
-	| UNDERSCORE_CHARSET HEX_NUM
-	  {
-	    Item *tmp= new Item_hex_string($2.str, $2.length);
-	    /*
-	      it is OK only emulate fix_fieds, because we need only
+        | UNDERSCORE_CHARSET HEX_NUM
+          {
+            Item *tmp= new Item_hex_string($2.str, $2.length);
+            /*
+              it is OK only emulate fix_fieds, because we need only
               value of constant
-	    */
-	    String *str= tmp ?
-	      tmp->quick_fix_field(), tmp->val_str((String*) 0) :
-	      (String*) 0;
-	    $$= new Item_string(str ? str->ptr() : "",
-				str ? str->length() : 0,
-				Lex->underscore_charset);
-	  }
+            */
+            String *str= tmp ?
+              tmp->quick_fix_field(), tmp->val_str((String*) 0) :
+              (String*) 0;
+            $$= new Item_string(str ? str->ptr() : "",
+                                str ? str->length() : 0,
+                                Lex->underscore_charset);
+            if ($$)
+              ((Item_string *) $$)->set_repertoire_from_value();
+          }
 	| UNDERSCORE_CHARSET BIN_NUM
           {
 	    Item *tmp= new Item_bin_string($2.str, $2.length);

--- 1.20/strings/conf_to_src.c	2006-12-27 09:28:22 +04:00
+++ 1.21/strings/conf_to_src.c	2007-08-01 16:25:45 +05:00
@@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
          cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
 }
 
+
 void dispcset(FILE *f,CHARSET_INFO *cs)
 {
   fprintf(f,"{\n");
   fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s,\n",
-          cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
-          cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
-          is_case_sensitive(cs)     ? "|MY_CS_CSSORT"  : "");
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+          cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
+          cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
+          is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
   
   if (cs->name)
   {
@@ -243,6 +245,28 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
 }
 
 
+static void
+fprint_copyright(FILE *file)
+{
+  fprintf(file,
+"/* Copyright (C) 2000-2007 MySQL AB\n"
+"\n"
+"   This program is free software; you can redistribute it and/or modify\n"
+"   it under the terms of the GNU General Public License as published by\n"
+"   the Free Software Foundation; version 2 of the License.\n"
+"\n"
+"   This program is distributed in the hope that it will be useful,\n"
+"   but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+"   GNU General Public License for more details.\n"
+"\n"
+"   You should have received a copy of the GNU General Public License\n"
+"   along with this program; if not, write to the Free Software\n"
+"   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */\n"
+"\n");
+}
+
+
 int
 main(int argc, char **argv  __attribute__((unused)))
 {
@@ -283,6 +307,7 @@ main(int argc, char **argv  __attribute_
           "directory:\n");
   fprintf(f, "    ./conf_to_src ../sql/share/charsets/ > FILE\n");
   fprintf(f, "*/\n\n");
+  fprint_copyright(f);
   fprintf(f,"#include <my_global.h>\n");
   fprintf(f,"#include <m_ctype.h>\n\n");
   

--- 1.65/strings/ctype.c	2007-06-07 17:55:53 +05:00
+++ 1.66/strings/ctype.c	2007-08-01 16:25:45 +05:00
@@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char 
   my_xml_parser_free(&p);
   return rc;
 }
+
+
+/*
+  Check repertoire: detect pure ascii strings
+*/
+uint
+my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
+{
+  const char *strend= str + length;
+  if (cs->mbminlen == 1)
+  {
+    for ( ; str < strend; str++)
+    {
+      if (((uchar) *str) > 0x7F)
+        return MY_REPERTOIRE_UNICODE30;
+    }
+  }
+  else
+  {
+    my_wc_t wc;
+    int chlen;
+    for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
+    {
+      if (wc > 0x7F)
+        return MY_REPERTOIRE_UNICODE30;
+    }
+  }
+  return MY_REPERTOIRE_ASCII;
+}
+
+
+/*
+  Detect whether a character set is ASCII compatible.
+
+  Returns TRUE for:
+  
+  - all 8bit character sets whose Unicode mapping of 0x7B is '{'
+    (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
+  
+  - all multi-byte character sets having mbminlen == 1
+    (ignores ucs2 whose mbminlen is 2)
+  
+  TODO:
+  
+  When merging to 5.2, this function should be changed
+  to check a new flag MY_CS_NONASCII, 
+  
+     return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
+  
+  This flag was previously added into 5.2 under terms
+  of WL#3759 "Optimize identifier conversion in client-server protocol"
+  especially to mark character sets not compatible with ASCII.
+  
+  We won't backport this flag to 5.0 or 5.1.
+  This function is Ok for 5.0 and 5.1, because we're not going
+  to introduce new tricky character sets between 5.0 and 5.2.
+*/
+my_bool
+my_charset_is_ascii_based(CHARSET_INFO *cs)
+{
+  return 
+    (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
+    (cs->mbminlen == 1 && cs->mbmaxlen > 1);
+}
+
+
+/*
+  Detect if a character set is 8bit,
+  and it is pure ascii, i.e. doesn't have
+  characters outside U+0000..U+007F
+  This functions is shared between "conf_to_src"
+  and dynamic charsets loader in "mysqld".
+*/
+my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+  size_t code;
+  if (!cs->tab_to_uni)
+    return 0;
+  for (code= 0; code < 256; code++)
+  {
+    if (cs->tab_to_uni[code] > 0x7F)
+      return 0;
+  }
+  return 1;
+}

--- 1.51/mysql-test/r/ctype_ucs.result	2007-03-28 18:57:27 +05:00
+++ 1.52/mysql-test/r/ctype_ucs.result	2007-08-01 16:25:45 +05:00
@@ -865,4 +865,30 @@ blob	65535	65535
 text	65535	65535
 text	65535	32767
 drop table t1;
+create table t1 (a varchar(15) character set ascii not null, b int);
+insert into t1 values ('a',1);
+select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062))
+aa
+select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062))
+ab
+select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
+a	b
+a	1
+select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
+a	b
+select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
+select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
+drop table t1;
 End of 5.0 tests

--- 1.105/mysql-test/r/ctype_utf8.result	2007-04-13 10:05:52 +05:00
+++ 1.106/mysql-test/r/ctype_utf8.result	2007-08-01 16:25:45 +05:00
@@ -1639,6 +1639,42 @@ coercibility(col1)	collation(col1)
 0	utf8_swedish_ci
 drop view v1, v2;
 drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, N'x', N'y')) from t1;
+concat(a, if(b>10, N'x', N'y'))
+ay
+select concat(a, if(b>10, N'æ', N'ß')) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
+concat(a, if(b>10, _utf8'x', _utf8'y'))
+ay
+select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
+concat(a, if(b>10, _utf8 0x78, _utf8 0x79))
+ay
+select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
+concat(a, if(b>10, 'x' 'x', 'y' 'y'))
+ayy
+select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
 CREATE TABLE t1 (
 colA int(11) NOT NULL,
 colB varchar(255) character set utf8 NOT NULL,

--- 1.50/mysql-test/t/ctype_ucs.test	2007-03-28 18:57:27 +05:00
+++ 1.51/mysql-test/t/ctype_ucs.test	2007-08-01 16:25:45 +05:00
@@ -594,4 +594,34 @@ select data_type, character_octet_length
   from information_schema.columns where table_name='t1';
 drop table t1;
 
+#
+# Conversion from UCS2 to ASCII is possible
+# if the UCS2 string consists of only ASCII characters
+#
+create table t1 (a varchar(15) character set ascii not null, b int);
+insert into t1 values ('a',1);
+select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
+select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
+
+#
+# Conversion from UCS2 to ASCII is not possible if 
+# the UCS2 string has non-ASCII characters
+#
+--error 1267
+select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+--error 1267
+select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+--error 1267
+select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+--error 1267
+select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+--error 1267
+select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
+--error 1267
+select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
+drop table t1;
+
+
 --echo End of 5.0 tests

--- 1.96/mysql-test/t/ctype_utf8.test	2007-04-13 10:05:52 +05:00
+++ 1.97/mysql-test/t/ctype_utf8.test	2007-08-01 16:25:45 +05:00
@@ -1314,6 +1314,46 @@ select coercibility(col1), collation(col
 drop view v1, v2;
 drop table t1;
 
+#
+# Check conversion of NCHAR strings to subset (e.g. latin1).
+# Conversion is possible if string repertoire is ASCII.
+# Conversion is not possible if the string have extended characters
+#
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, N'x', N'y')) from t1;
+--error 1267
+select concat(a, if(b>10, N'æ', N'ß')) from t1;
+drop table t1;
+
+# Conversion tests for character set introducers
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
+--error 1267
+select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
+drop table t1;
+
+# Conversion tests for introducer + HEX string
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
+--error 1267
+select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
+drop table t1;
+
+# Conversion tests for "text_literal TEXT_STRING_literal" syntax structure
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
+--error 1267
+select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
+drop table t1;
+
 
 #
 # Bug#19960: Inconsistent results when joining

--- 1.26/strings/ctype-extra.c	2007-01-11 18:43:42 +04:00
+++ 1.27/strings/ctype-extra.c	2007-08-01 16:25:45 +05:00
@@ -5,7 +5,8 @@
   To re-generate, run the following in the strings/ directory:
     ./conf_to_src ../sql/share/charsets/ > FILE
 */
-/* Copyright (C) 2000-2003 MySQL AB
+
+/* Copyright (C) 2000-2007 MySQL AB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -6721,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_ascii
 {
   11,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
   "ascii",                     /* cset name     */
   "ascii_general_ci",                     /* coll name     */
   "",                       /* comment       */
@@ -7810,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_ascii
 {
   65,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
   "ascii",                     /* cset name     */
   "ascii_bin",                     /* coll name     */
   "",                       /* comment       */

--- 1.81/mysql-test/r/func_time.result	2007-03-06 21:50:42 +04:00
+++ 1.82/mysql-test/r/func_time.result	2007-08-01 16:25:45 +05:00
@@ -1246,3 +1246,19 @@ SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i
 TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s")
 838:59:58
 838:59:59
+set names latin1;
+create table t1 (a varchar(15) character set ascii not null);
+insert into t1 values ('070514-000000');
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull'))
+#
+set names swe7;
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (swe7_swedish_ci,COERCIBLE) for operation 'concat'
+set names latin1;
+set lc_time_names=fr_FR;
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation 'concat'
+set lc_time_names=en_US;
+drop table t1;
+End of 5.0 tests

--- 1.68/mysql-test/t/func_time.test	2007-03-06 21:50:43 +04:00
+++ 1.69/mysql-test/t/func_time.test	2007-08-01 16:25:45 +05:00
@@ -752,3 +752,29 @@ DROP TABLE t1;
 # Check if using GROUP BY with TIME_FORMAT() produces correct results
 
 SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SELECT 3020398 ) x GROUP BY 1;
+
+#
+# Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
+#
+set names latin1;
+create table t1 (a varchar(15) character set ascii not null);
+insert into t1 values ('070514-000000');
+# Conversion of date_format() result to ASCII
+# is safe with the default locale en_US
+--replace_column 1 #
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+# Error for swe7: it is not ASCII compatible
+set names swe7;
+--error 1267
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+set names latin1;
+# Conversion of date_format() result to ASCII
+# is not safe with the non-default locale fr_FR
+# because month and day names can have accented characters
+set lc_time_names=fr_FR;
+--error 1267
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+set lc_time_names=en_US;
+drop table t1;
+
+--echo End of 5.0 tests
Thread
bk commit into 5.0 tree (bar:1.2518) BUG#28875bar1 Aug
  • Re: bk commit into 5.0 tree (bar:1.2518) BUG#28875Sergei Golubchik2 Aug