List:Commits« Previous MessageNext Message »
From:Alexander Barkov Date:September 30 2009 5:17am
Subject:bzr commit into mysql-5.4.5-next-mr branch (bar:2884) WL#3759
View as plain text  
#At file:///home/bar/mysql-bzr/mysql-next-mr-wl3759/ based on revid:alik@stripped

 2884 Alexander Barkov	2009-09-30
      Backporting WL#3759 Optimize identifier conversion in client-server protocol
      
      This patch provides performance improvements:
      - send_fields() when character_set_results = latin1
        is now about twice faster for column/table/database
        names, consisting on ASCII characters.
      
      Changes:
      
      - Protocol doesn't use "convert" temporary buffer anymore,
        and converts strings directly to "packet".
      
      - General conversion optimization: quick conversion
        of ASCII strings was added.
      
      modified files:
      
      include/m_ctype.h
      - Adding a new flag.
      - Adding a new function prototype
      
      libmysqld/lib_sql.cc
      - Adding quick conversion method for embedded library:
        conversion is now done directly to result buffer,
        without using a temporary buffer.
      
      mysys/charset.c
      - Mark all dynamic ucs2 character sets as non-ASCII
      - Mark some dymamic 7bit and 8bit charsets as non-ASCII
        (for example swe7 is not fully ASCII compatible).
      
      sql/protocol.cc
      - Adding quick method to convert a string directly
        into protocol buffer, without using a temporary buffer.
      
      sql/protocol.h
      - Adding a new method prototype
      
      sql/sql_string.cc
        Optimization for conversion between two ASCII-compatible charsets:
      - quickly convert ASCII strings,
        switch to mc_wc->wc_mb method only when a non-ASCII character is met.
      - copy four ASCII characters at once on i386
      
      strings/conf_to_src.c
      - Marking non-ASCII character sets with a flag.
      
      strings/ctype-extra.c
      - Regenerating ctype-extra.c by running "conf_to_src".
      
      strings/ctype-uca.c
      - Marking UCS2 character set as non-ASCII.
      
      strings/ctype-ucs2.c
      - Marking UCS2 character set as non-ASCII.
      
      strings/ctype.c
      - A new function to detect if a 7bit or 8bit character set
        is ascii compatible.

    modified:
      include/m_ctype.h
      libmysqld/lib_sql.cc
      mysys/charset.c
      sql/protocol.cc
      sql/protocol.h
      sql/sql_string.cc
      strings/conf_to_src.c
      strings/ctype-extra.c
      strings/ctype-uca.c
      strings/ctype-ucs2.c
      strings/ctype.c
=== modified file 'include/m_ctype.h'
--- a/include/m_ctype.h	2009-06-10 08:59:49 +0000
+++ b/include/m_ctype.h	2009-09-30 05:09:28 +0000
@@ -87,6 +87,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
 #define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
 #define MY_CS_HIDDEN	2048   /* don't display in SHOW          */	
 #define MY_CS_PUREASCII 4096   /* if a charset is pure ascii     */
+#define MY_CS_NONASCII  8192   /* if not ASCII-compatible        */
 #define MY_CHARSET_UNDEFINED 0
 
 /* Character repertoire flags */
@@ -474,6 +475,7 @@ my_bool my_charset_is_ascii_based(CHARSE
 my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
 uint my_charset_repertoire(CHARSET_INFO *cs);
 
+my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
 
 #define	_MY_U	01	/* Upper case */
 #define	_MY_L	02	/* Lower case */

=== modified file 'libmysqld/lib_sql.cc'
--- a/libmysqld/lib_sql.cc	2009-09-10 09:18:29 +0000
+++ b/libmysqld/lib_sql.cc	2009-09-30 05:09:28 +0000
@@ -1175,3 +1175,27 @@ int vprint_msg_to_log(enum loglevel leve
   mysql_server_last_errno= CR_UNKNOWN_ERROR;
   return 0;
 }
+
+
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  uint dummy_error;
+  char *field_buf;
+  if (!thd->mysql)            // bootstrap file handling
+    return false;
+
+  if (!(field_buf= (char*) alloc_root(alloc, conv_length + sizeof(uint) + 1)))
+    return true;
+  *next_field= field_buf + sizeof(uint);
+  length= copy_and_convert(*next_field, conv_length, to_cs,
+                           (const char*) from, length, from_cs, &dummy_error);
+  *(uint *) field_buf= length;
+  (*next_field)[length]= 0;
+  if (next_mysql_field->max_length < length)
+    next_mysql_field->max_length= length;
+  ++next_field;
+  ++next_mysql_field;
+  return false;
+}

=== modified file 'mysys/charset.c'
--- a/mysys/charset.c	2009-06-10 08:59:49 +0000
+++ b/mysys/charset.c	2009-09-30 05:09:28 +0000
@@ -248,6 +248,7 @@ static int add_collation(CHARSET_INFO *c
       {
 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
         copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
+        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
 #endif        
       }
       else if (!strcmp(cs->csname, "utf8"))
@@ -280,6 +281,8 @@ static int add_collation(CHARSET_INFO *c
 
         if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
           all_charsets[cs->number]->state|= MY_CS_PUREASCII;
+        if (!my_charset_is_ascii_compatible(cs))
+          all_charsets[cs->number]->state|= MY_CS_NONASCII;
       }
     }
     else

=== modified file 'sql/protocol.cc'
--- a/sql/protocol.cc	2009-09-10 09:18:29 +0000
+++ b/sql/protocol.cc	2009-09-30 05:09:28 +0000
@@ -58,6 +58,64 @@ bool Protocol_binary::net_store_data(con
 }
 
 
+
+
+/*
+  net_store_data() - extended version with character set conversion.
+  
+  It is optimized for short strings whose length after
+  conversion is garanteed to be less than 251, which accupies
+  exactly one byte to store length. It allows not to use
+  the "convert" member as a temporary buffer, conversion
+  is done directly to the "packet" member.
+  The limit 251 is good enough to optimize send_result_set_metadata()
+  because column, table, database names fit into this limit.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint dummy_errors;
+  /* Calculate maxumum possible result length */
+  uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  if (conv_length > 250)
+  {
+    /*
+      For strings with conv_length greater than 250 bytes
+      we don't know how many bytes we will need to store length: one or two,
+      because we don't know result length until conversion is done.
+      For example, when converting from utf8 (mbmaxlen=3) to latin1,
+      conv_length=300 means that the result length can vary between 100 to 300.
+      length=100 needs one byte, length=300 needs to bytes.
+      
+      Thus conversion directly to "packet" is not worthy.
+      Let's use "convert" as a temporary buffer.
+    */
+    return (convert->copy((const char*) from, length, from_cs,
+                          to_cs, &dummy_errors) ||
+            net_store_data((const uchar*) convert->ptr(), convert->length()));
+  }
+
+  ulong packet_length= packet->length();
+  ulong new_length= packet_length + conv_length + 1;
+
+  if (new_length > packet->alloced_length() && packet->realloc(new_length))
+    return 1;
+
+  char *length_pos= (char*) packet->ptr() + packet_length;
+  char *to= length_pos + 1;
+
+  to+= copy_and_convert(to, conv_length, to_cs,
+                        (const char*) from, length, from_cs, &dummy_errors);
+
+  net_store_length((uchar*) length_pos, to - length_pos - 1);
+  packet->length((uint) (to - packet->ptr()));
+  return 0;
+}
+#endif
+
+
 /**
   Send a error string to client.
 
@@ -827,10 +885,10 @@ bool Protocol::store_string_aux(const ch
       fromcs != &my_charset_bin &&
       tocs != &my_charset_bin)
   {
-    uint dummy_errors;
-    return (convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
-            net_store_data((uchar*) convert->ptr(), convert->length()));
+    /* Store with conversion */
+    return net_store_data((uchar*) from, length, fromcs, tocs);
   }
+  /* Store without conversion */
   return net_store_data((uchar*) from, length);
 }
 

=== modified file 'sql/protocol.h'
--- a/sql/protocol.h	2009-09-10 09:18:29 +0000
+++ b/sql/protocol.h	2009-09-30 05:09:28 +0000
@@ -43,6 +43,8 @@ protected:
   MYSQL_FIELD *next_mysql_field;
   MEM_ROOT *alloc;
 #endif
+  bool net_store_data(const uchar *from, size_t length,
+                      CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
   bool store_string_aux(const char *from, size_t length,
                         CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
 public:

=== modified file 'sql/sql_string.cc'
--- a/sql/sql_string.cc	2009-07-31 17:14:52 +0000
+++ b/sql/sql_string.cc	2009-09-30 05:09:28 +0000
@@ -794,10 +794,11 @@ String *copy_if_not_alloced(String *to,S
 */
 
 
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
-                 uint *errors)
+static uint32
+copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                          const char *from, uint32 from_length,
+                          CHARSET_INFO *from_cs,
+                          uint *errors)
 {
   int         cnvres;
   my_wc_t     wc;
@@ -849,6 +850,65 @@ outp:
 }
 
 
+/*
+  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+*/
+uint32
+copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+                 uint *errors)
+{
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return copy_and_convert_extended(to, to_length, to_cs,
+                                     from, from_length, from_cs, errors);
+
+  uint32 length= min(to_length, from_length), length2= length;
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + copy_and_convert_extended(to, to_length,
+                                                       to_cs,
+                                                       from, from_length,
+                                                       from_cs,
+                                                       errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}
+
+
 /**
   Copy string with HEX-encoding of "bad" characters.
 

=== modified file 'strings/conf_to_src.c'
--- a/strings/conf_to_src.c	2008-11-14 16:29:38 +0000
+++ b/strings/conf_to_src.c	2009-09-30 05:09:28 +0000
@@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
 {
   fprintf(f,"{\n");
   fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s%s,\n",
           cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
           cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
           is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
-          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
+          !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
   
   if (cs->name)
   {

=== modified file 'strings/ctype-extra.c'
--- a/strings/ctype-extra.c	2007-08-20 11:47:31 +0000
+++ b/strings/ctype-extra.c	2009-09-30 05:09:28 +0000
@@ -6,7 +6,7 @@
     ./conf_to_src ../sql/share/charsets/ > FILE
 */
 
-/* Copyright (C) 2000-2007 MySQL AB
+/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_swe7
 {
   10,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_swedish_ci",                     /* coll name     */
   "",                       /* comment       */
@@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_swe7
 {
   82,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_bin",                     /* coll name     */
   "",                       /* comment       */
@@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = {
 }
 ,
 #endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  92,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
-  "geostd8",                     /* cset name     */
-  "geostd8_general_ci",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_general_ci,                   /* ctype         */
-  to_lower_geostd8_general_ci,                /* lower         */
-  to_upper_geostd8_general_ci,                /* upper         */
-  sort_order_geostd8_general_ci,            /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_general_ci,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_simple_ci_handler,
-}
-,
-#endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  93,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
-  "geostd8",                     /* cset name     */
-  "geostd8_bin",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_bin,                   /* ctype         */
-  to_lower_geostd8_bin,                /* lower         */
-  to_upper_geostd8_bin,                /* upper         */
-  NULL,                     /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_bin,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_bin_handler,
-}
-,
-#endif
 #ifdef HAVE_CHARSET_latin1
 {
   94,0,0,

=== modified file 'strings/ctype-uca.c'
--- a/strings/ctype-uca.c	2009-06-04 09:35:29 +0000
+++ b/strings/ctype-uca.c	2009-09-30 05:09:28 +0000
@@ -8087,7 +8087,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_u
 CHARSET_INFO my_charset_ucs2_unicode_ci=
 {
     128,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_unicode_ci",	/* name         */
     "",			/* comment      */
@@ -8119,7 +8119,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci=
 CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
 {
     129,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_icelandic_ci",/* name         */
     "",			/* comment      */
@@ -8151,7 +8151,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_u
 CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
 {
     130,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_latvian_ci",	/* name         */
     "",			/* comment      */
@@ -8183,7 +8183,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca
 CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
 {
     131,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_romanian_ci",	/* name         */
     "",			/* comment      */
@@ -8215,7 +8215,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uc
 CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
 {
     132,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovenian_ci",/* name         */
     "",			/* comment      */
@@ -8247,7 +8247,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_u
 CHARSET_INFO my_charset_ucs2_polish_uca_ci=
 {
     133,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_polish_ci",	/* name         */
     "",			/* comment      */
@@ -8279,7 +8279,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_
 CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
 {
     134,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_estonian_ci",	/* name         */
     "",			/* comment      */
@@ -8311,7 +8311,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uc
 CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
 {
     135,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish_ci",	/* name         */
     "",			/* comment      */
@@ -8343,7 +8343,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca
 CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
 {
     136,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_swedish_ci",	/* name         */
     "",			/* comment      */
@@ -8375,7 +8375,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca
 CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
 {
     137,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_turkish_ci",	/* name         */
     "",			/* comment      */
@@ -8407,7 +8407,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca
 CHARSET_INFO my_charset_ucs2_czech_uca_ci=
 {
     138,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_czech_ci",	/* name         */
     "",			/* comment      */
@@ -8440,7 +8440,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_c
 CHARSET_INFO my_charset_ucs2_danish_uca_ci=
 {
     139,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_danish_ci",	/* name         */
     "",			/* comment      */
@@ -8472,7 +8472,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_
 CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
 {
     140,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_lithuanian_ci",/* name         */
     "",			/* comment      */
@@ -8504,7 +8504,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_
 CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
 {
     141,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovak_ci",	/* name         */
     "",			/* comment      */
@@ -8536,7 +8536,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_
 CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
 {
     142,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish2_ci",	/* name         */
     "",			/* comment      */
@@ -8569,7 +8569,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uc
 CHARSET_INFO my_charset_ucs2_roman_uca_ci=
 {
     143,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_roman_ci",	/* name         */
     "",			/* comment      */
@@ -8602,7 +8602,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_c
 CHARSET_INFO my_charset_ucs2_persian_uca_ci=
 {
     144,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_persian_ci",	/* name         */
     "",			/* comment      */
@@ -8635,7 +8635,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca
 CHARSET_INFO my_charset_ucs2_esperanto_uca_ci=
 {
     145,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_esperanto_ci",/* name         */
     "",			/* comment      */
@@ -8668,7 +8668,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_u
 CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
 {
     146,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_hungarian_ci",/* name         */
     "",			/* comment      */
@@ -8748,7 +8748,7 @@ extern MY_CHARSET_HANDLER my_charset_utf
 CHARSET_INFO my_charset_utf8_unicode_ci=
 {
     192,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_unicode_ci",	/* name         */
     "",			/* comment      */
@@ -8781,7 +8781,7 @@ CHARSET_INFO my_charset_utf8_unicode_ci=
 CHARSET_INFO my_charset_utf8_icelandic_uca_ci=
 {
     193,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_icelandic_ci",/* name         */
     "",			/* comment      */
@@ -8813,7 +8813,7 @@ CHARSET_INFO my_charset_utf8_icelandic_u
 CHARSET_INFO my_charset_utf8_latvian_uca_ci=
 {
     194,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_latvian_ci",	/* name         */
     "",			/* comment      */
@@ -8845,7 +8845,7 @@ CHARSET_INFO my_charset_utf8_latvian_uca
 CHARSET_INFO my_charset_utf8_romanian_uca_ci=
 {
     195,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_romanian_ci",	/* name         */
     "",			/* comment      */
@@ -8877,7 +8877,7 @@ CHARSET_INFO my_charset_utf8_romanian_uc
 CHARSET_INFO my_charset_utf8_slovenian_uca_ci=
 {
     196,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_slovenian_ci",/* name         */
     "",			/* comment      */
@@ -8909,7 +8909,7 @@ CHARSET_INFO my_charset_utf8_slovenian_u
 CHARSET_INFO my_charset_utf8_polish_uca_ci=
 {
     197,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_polish_ci",	/* name         */
     "",			/* comment      */
@@ -8941,7 +8941,7 @@ CHARSET_INFO my_charset_utf8_polish_uca_
 CHARSET_INFO my_charset_utf8_estonian_uca_ci=
 {
     198,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_estonian_ci",	/* name         */
     "",			/* comment      */
@@ -8973,7 +8973,7 @@ CHARSET_INFO my_charset_utf8_estonian_uc
 CHARSET_INFO my_charset_utf8_spanish_uca_ci=
 {
     199,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_spanish_ci",	/* name         */
     "",			/* comment      */
@@ -9005,7 +9005,7 @@ CHARSET_INFO my_charset_utf8_spanish_uca
 CHARSET_INFO my_charset_utf8_swedish_uca_ci=
 {
     200,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_swedish_ci",	/* name         */
     "",			/* comment      */
@@ -9037,7 +9037,7 @@ CHARSET_INFO my_charset_utf8_swedish_uca
 CHARSET_INFO my_charset_utf8_turkish_uca_ci=
 {
     201,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_turkish_ci",	/* name         */
     "",			/* comment      */
@@ -9069,7 +9069,7 @@ CHARSET_INFO my_charset_utf8_turkish_uca
 CHARSET_INFO my_charset_utf8_czech_uca_ci=
 {
     202,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_czech_ci",	/* name         */
     "",			/* comment      */
@@ -9102,7 +9102,7 @@ CHARSET_INFO my_charset_utf8_czech_uca_c
 CHARSET_INFO my_charset_utf8_danish_uca_ci=
 {
     203,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_danish_ci",	/* name         */
     "",			/* comment      */
@@ -9134,7 +9134,7 @@ CHARSET_INFO my_charset_utf8_danish_uca_
 CHARSET_INFO my_charset_utf8_lithuanian_uca_ci=
 {
     204,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_lithuanian_ci",/* name         */
     "",			/* comment      */
@@ -9166,7 +9166,7 @@ CHARSET_INFO my_charset_utf8_lithuanian_
 CHARSET_INFO my_charset_utf8_slovak_uca_ci=
 {
     205,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_slovak_ci",	/* name         */
     "",			/* comment      */
@@ -9198,7 +9198,7 @@ CHARSET_INFO my_charset_utf8_slovak_uca_
 CHARSET_INFO my_charset_utf8_spanish2_uca_ci=
 {
     206,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_spanish2_ci",	/* name         */
     "",			/* comment      */
@@ -9230,7 +9230,7 @@ CHARSET_INFO my_charset_utf8_spanish2_uc
 CHARSET_INFO my_charset_utf8_roman_uca_ci=
 {
     207,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_roman_ci",	/* name         */
     "",			/* comment      */
@@ -9262,7 +9262,7 @@ CHARSET_INFO my_charset_utf8_roman_uca_c
 CHARSET_INFO my_charset_utf8_persian_uca_ci=
 {
     208,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_persian_ci",	/* name         */
     "",			/* comment      */
@@ -9294,7 +9294,7 @@ CHARSET_INFO my_charset_utf8_persian_uca
 CHARSET_INFO my_charset_utf8_esperanto_uca_ci=
 {
     209,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_esperanto_ci",/* name         */
     "",			/* comment      */
@@ -9326,7 +9326,7 @@ CHARSET_INFO my_charset_utf8_esperanto_u
 CHARSET_INFO my_charset_utf8_hungarian_uca_ci=
 {
     210,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "utf8",		/* cs name    */
     "utf8_hungarian_ci",/* name         */
     "",			/* comment      */

=== modified file 'strings/ctype-ucs2.c'
--- a/strings/ctype-ucs2.c	2009-08-28 16:21:54 +0000
+++ b/strings/ctype-ucs2.c	2009-09-30 05:09:28 +0000
@@ -1712,7 +1712,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handl
 CHARSET_INFO my_charset_ucs2_general_ci=
 {
     35,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_general_ci",	/* name         */
     "",			/* comment      */
@@ -1744,7 +1744,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
 CHARSET_INFO my_charset_ucs2_bin=
 {
     90,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_bin",		/* name         */
     "",			/* comment      */

=== modified file 'strings/ctype.c'
--- a/strings/ctype.c	2009-06-06 13:05:44 +0000
+++ b/strings/ctype.c	2009-09-30 05:09:28 +0000
@@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_IN
   }
   return 1;
 }
+
+
+/*
+  Shared function between conf_to_src and mysys.
+  Check if a 8bit character set is compatible with
+  ascii on the range 0x00..0x7F.
+*/
+my_bool
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
+{
+  uint i;
+  if (!cs->tab_to_uni)
+    return 1;
+  for (i= 0; i < 128; i++)
+  {
+    if (cs->tab_to_uni[i] != i)
+      return 0;
+  }
+  return 1;
+}


Attachment: [text/bzr-bundle] bzr/bar@mysql.com-20090930050928-b9s0fsonv1pvsp5s.bundle
Thread
bzr commit into mysql-5.4.5-next-mr branch (bar:2884) WL#3759Alexander Barkov30 Sep