Alexander, salut!
The patch is good to push.
I inlined some comments we discussed on #rep.
Thanks for the work and explanations!
cheers,
Andrei
> ChangeSet@stripped, 2007-06-26 12:59:05+05:00, bar@stripped +12 -0
> WL#3759 Optimize identifier conversion in client-server protocol
Please paste some words from very consice problem description in the
WL. Otherwise we see only the solution section.
> - Protocol doesn't use "convert" temporary buffer anymore,
> and converts strings directly to "packet".
just to be more specific to say " in cases the size is less than
251 that safisfy db, table, field names.
> - General conversion optimization: quick conversion
> of ASCII strings was added.
>
>
> send_fields() when character_set_results = latin1
> is now about twice faster for column/table/database
> names, consisting on ASCII characters.
>
That's a nice fact!
> include/m_ctype.h@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +2 -0
> Adding a new flag.
>
> libmysqld/lib_sql.cc@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +23 -0
> Adding quick conversion method for embedded library:
> conversion is now done directly to result buffer,
> without using a temporary buffer.
>
> mysys/charset.c@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +3 -1
> - Mark all dynamic ucs2 character sets as non-ASCII
> - Mark some dymamic 7bit and 8bit charsets as non-ASCII
> (for example swe7 is not fully ASCII compatible).
>
> sql/protocol.cc@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +58 -3
> Adding quick method to convert a string directly
> into protocol buffer, without using a temporary
> buffer.
>
> sql/protocol.h@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +2 -0
> Adding new method prototype.
>
> sql/share/charsets/ascii.xml@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +1 -1
> Fixing a mistake in ASCII->Unicode mapping:
> 0x7f used to be "unassigned", it is now mapped
> to "U+007F DELETE".
>
> sql/sql_string.cc@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +62 -4
> Optimization for conversion between two ASCII-compatible charsets:
> - quickly convert ASCII strings,
> switch to mc_wc->wc_mb method only when a non-ASCII character is met.
> - copy four ASCII characters at once on i386
>
> strings/conf_to_src.c@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +4 -2
> Mark non-ASCII character sets with a flag.
>
> strings/ctype-extra.c@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +4 -4
> Regenerating ctype-extra.c by running "conf_to_src".
>
> strings/ctype-uca.c@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +19 -19
> Mark UCS2 character set as non-ASCII.
>
> strings/ctype-ucs2.c@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +2 -2
> Mark UCS2 character set as non-ASCII.
>
> strings/ctype.c@stripped, 2007-06-26 12:59:03+05:00, bar@stripped +20 -0
> A new function to detect if a 7bit or 8bit character set
> is ascii compatible.
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
is called once per charset as you explainded, then no need i386-word
comparision optimizaiton that'd be helpful in per-query use.
>
> # This is a BitKeeper patch. What follows are the unified diffs for the
> # set of deltas contained in the patch. The rest of the patch, the part
> # that BitKeeper cares about, is below these diffs.
> # User: bar
> # Host: bar.myoffice.izhnet.ru
> # Root: /home/bar/mysql-work/mysql-5.2.wl3759
>
> --- 1.135/include/m_ctype.h 2007-04-03 16:16:07 +05:00
> +++ 1.136/include/m_ctype.h 2007-06-26 12:59:03 +05:00
> @@ -84,6 +84,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
> #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
> #define MY_CS_CSSORT 1024 /* if case sensitive sort order */
> #define MY_CS_HIDDEN 2048 /* don't display in SHOW */
> +#define MY_CS_NONASCII 4096 /* if not ASCII-compatible */
> #define MY_CHARSET_UNDEFINED 0
>
> /* Flags for strxfrm */
> @@ -520,6 +521,7 @@ uint my_strxfrm_pad_desc_and_reverse(CHA
> uchar *str, uchar *frmend, uchar *strend,
> uint nweights, uint flags, uint level);
>
> +my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
>
> #define _MY_U 01 /* Upper case */
> #define _MY_L 02 /* Lower case */
>
> --- 1.150/mysys/charset.c 2007-04-03 16:16:08 +05:00
> +++ 1.151/mysys/charset.c 2007-06-26 12:59:03 +05:00
> @@ -242,7 +242,7 @@ static int add_collation(CHARSET_INFO *c
> new->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
> new->mbminlen= 2;
> new->mbmaxlen= 2;
> - new->state |= MY_CS_AVAILABLE | MY_CS_LOADED;
> + new->state |= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
> #endif
> }
> else
> @@ -266,6 +266,8 @@ static int add_collation(CHARSET_INFO *c
> if (sort_order && sort_order['A'] < sort_order['a'] &&
> sort_order['a'] < sort_order['B'])
> all_charsets[cs->number]->state|= MY_CS_CSSORT;
> + if (!my_charset_is_ascii_compatible(cs))
> + all_charsets[cs->number]->state|= MY_CS_NONASCII;
> }
> }
> else
>
> --- 1.124/sql/protocol.cc 2007-01-29 03:47:30 +04:00
> +++ 1.125/sql/protocol.cc 2007-06-26 12:59:03 +05:00
> @@ -55,6 +55,61 @@ bool Protocol_prep::net_store_data(const
>
>
> /*
> + net_store_data() - extended version with character set conversion.
> +
> + It is optimized for short strings whose length after
> + conversion is garanteed to be less than 251, which accupies
> + exactly one byte to store length. It allows not to use
> + the "convert" member as a temporary buffer, conversion
> + is done directly to the "packet" member.
> + The limit 251 is good enough to optimize send_fields()
> + because column, table, database names fit into this limit.
> +*/
> +
> +#ifndef EMBEDDED_LIBRARY
> +bool Protocol::net_store_data(const char *from, uint length,
> + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
> +{
> + uint dummy_errors;
> + /* Calculate maxumum possible result length */
> + uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
> + if (conv_length > 250)
> + {
> + /*
> + For strings with conv_length greater than 250 bytes
> + we don't know how many bytes we will need to store length: one or two,
> + because we don't know result length until conversion is done.
> + For example, when converting from utf8 (mbmaxlen=3) to latin1,
> + conv_length=300 means that the result length can vary between 100 to 300.
> + length=100 needs one byte, length=300 needs to bytes.
> +
> + Thus conversion directly to "packet" is not worthy.
> + Let's use "convert" as a temporary buffer.
> + */
> + return convert->copy(from, length, from_cs, to_cs, &dummy_errors) ||
> + net_store_data(convert->ptr(), convert->length());
> + }
> +
> + ulong packet_length= packet->length();
> + ulong new_length= packet_length + conv_length + 1;
> +
> + if (new_length > packet->alloced_length() &&
> packet->realloc(new_length))
> + return 1;
> +
> + char *length_pos= (char*) packet->ptr() + packet_length;
> + char *to= length_pos + 1;
> +
> + to+= copy_and_convert(to, conv_length, to_cs,
> + from, length, from_cs, &dummy_errors);
> +
> + net_store_length(length_pos, to - length_pos - 1);
> + packet->length((uint) (to - packet->ptr()));
> + return 0;
> +}
> +#endif
> +
> +
> +/*
> Send a error string to client
>
> Design note:
> @@ -793,10 +848,10 @@ bool Protocol::store_string_aux(const ch
> fromcs != &my_charset_bin &&
> tocs != &my_charset_bin)
> {
> - uint dummy_errors;
> - return convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
> - net_store_data(convert->ptr(), convert->length());
> + /* Store with conversion */
> + return net_store_data(from, length, fromcs, tocs);
> }
> + /* Store without conversion */
> return net_store_data(from, length);
> }
>
>
> --- 1.102/sql/sql_string.cc 2007-01-24 21:57:01 +04:00
> +++ 1.103/sql/sql_string.cc 2007-06-26 12:59:03 +05:00
> @@ -785,10 +785,11 @@ String *copy_if_not_alloced(String *to,S
> */
>
>
> -uint32
> -copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
> - const char *from, uint32 from_length, CHARSET_INFO *from_cs,
> - uint *errors)
> +static uint32
> +copy_and_convert_mb(char *to, uint32 to_length, CHARSET_INFO *to_cs,
> + const char *from, uint32 from_length,
> + CHARSET_INFO *from_cs,
> + uint *errors)
<andrei> bar, okay. next Q: copy_and_convert_mb - perhaps is better to be
copy_and_convert_non_ascii. `mb' makes me thinking on multi-bytes ?
<bar> andrei, or copy_and_convert_extended.
<andrei> bar, or so.
<bar> because it can copy both extended characters and ascii :)
<andrei> bar, please rename to that to your choice.
<bar> agree, "extended" is better describes what this function does than "mb".
> {
> int cnvres;
> my_wc_t wc;
> @@ -839,6 +840,63 @@ outp:
> }
> *errors= error_count;
> return (uint32) (to - to_start);
> +}
> +
> +
> +/*
> + Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
> +*/
> +uint32
> +copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
> + const char *from, uint32 from_length, CHARSET_INFO *from_cs,
> + uint *errors)
> +{
> + /*
> + If any of the character sets is not ASCII compatible,
> + immediately switch to slow mb_wc->wc_mb method.
> + */
> + if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
> + return copy_and_convert_mb(to, to_length, to_cs,
> + from, from_length, from_cs, errors);
> +
> + uint32 length= min(to_length, from_length), length2= length;
> +
> +#if defined(__i386__)
> + /*
> + Special loop for i386, it allows to refer to a
> + non-aligned memory block as UINT32, which makes
> + it possible to copy four bytes at once. This
> + gives about 10% performance improvement comparing
> + to byte-by-byte loop.
> + */
> + for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
> + {
> + if ((*(uint32*)from) & 0x80808080)
> + break;
> + *((uint32*) to)= *((const uint32*) from);
> + }
> +#endif
> +
> + for (; ; *to++= *from++, length--)
> + {
> + if (!length)
> + {
> + *errors= 0;
> + return length2;
> + }
> + if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
> + {
> + uint32 copied_length= length2 - length;
> + to_length-= copied_length;
> + from_length-= copied_length;
> + return copied_length + copy_and_convert_mb(to, to_length, to_cs,
> + from, from_length, from_cs,
> + errors);
> + }
> + }
> +
> + DBUG_ASSERT(FALSE); // Should never get to here
> + return 0; // Make compiler happy
> }
>
We could save on two lines as [from #rep]
then execution visits the loop and return always is reached - eventually length will be
zero or mb return.
<bar> right.
<andrei> bar, my point is that i don't see too much reason to have the
assert though it's up to you to leave it or ...
>
>
> --- 1.22/strings/conf_to_src.c 2007-04-03 16:16:08 +05:00
> +++ 1.23/strings/conf_to_src.c 2007-06-26 12:59:03 +05:00
> @@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
> cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
> }
>
> +
> void dispcset(FILE *f,CHARSET_INFO *cs)
> {
> fprintf(f,"{\n");
> fprintf(f," %d,%d,%d,\n",cs->number,0,0);
> - fprintf(f," MY_CS_COMPILED%s%s%s,\n",
> + fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
> cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
> cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
> - is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
> + is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
> + !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
>
> if (cs->name)
> {
>
> --- 1.65/strings/ctype.c 2006-12-23 23:20:33 +04:00
> +++ 1.66/strings/ctype.c 2007-06-26 12:59:03 +05:00
> @@ -306,3 +306,23 @@ my_bool my_parse_charset_xml(const char
> my_xml_parser_free(&p);
> return rc;
> }
> +
> +
> +/*
> + Shared function between conf_to_src and mysys.
> + Check if a 8bit character set is compatible with
> + ascii on the range 0x00..0x7F.
> +*/
> +my_bool
> +my_charset_is_ascii_compatible(CHARSET_INFO *cs)
> +{
> + uint i;
> + if (!cs->tab_to_uni)
> + return 1;
> + for (i= 0; i < 128; i++)
> + {
> + if (cs->tab_to_uni[i] != i)
> + return 0;
> + }
> + return 1;
> +}
>
> --- 1.44/strings/ctype-uca.c 2007-04-03 16:16:09 +05:00
> +++ 1.45/strings/ctype-uca.c 2007-06-26 12:59:03 +05:00
> @@ -8060,7 +8060,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_u
> CHARSET_INFO my_charset_ucs2_general_uca=
> {
> 128,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_unicode_ci", /* name */
> "", /* comment */
> @@ -8094,7 +8094,7 @@ CHARSET_INFO my_charset_ucs2_general_uca
> CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
> {
> 129,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_icelandic_ci",/* name */
> "", /* comment */
> @@ -8128,7 +8128,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_u
> CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
> {
> 130,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_latvian_ci", /* name */
> "", /* comment */
> @@ -8162,7 +8162,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca
> CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
> {
> 131,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_romanian_ci", /* name */
> "", /* comment */
> @@ -8196,7 +8196,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uc
> CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
> {
> 132,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_slovenian_ci",/* name */
> "", /* comment */
> @@ -8230,7 +8230,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_u
> CHARSET_INFO my_charset_ucs2_polish_uca_ci=
> {
> 133,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_polish_ci", /* name */
> "", /* comment */
> @@ -8264,7 +8264,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_
> CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
> {
> 134,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_estonian_ci", /* name */
> "", /* comment */
> @@ -8298,7 +8298,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uc
> CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
> {
> 135,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_spanish_ci", /* name */
> "", /* comment */
> @@ -8332,7 +8332,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca
> CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
> {
> 136,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_swedish_ci", /* name */
> "", /* comment */
> @@ -8366,7 +8366,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca
> CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
> {
> 137,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_turkish_ci", /* name */
> "", /* comment */
> @@ -8400,7 +8400,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca
> CHARSET_INFO my_charset_ucs2_czech_uca_ci=
> {
> 138,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_czech_ci", /* name */
> "", /* comment */
> @@ -8435,7 +8435,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_c
> CHARSET_INFO my_charset_ucs2_danish_uca_ci=
> {
> 139,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_danish_ci", /* name */
> "", /* comment */
> @@ -8469,7 +8469,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_
> CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
> {
> 140,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_lithuanian_ci",/* name */
> "", /* comment */
> @@ -8503,7 +8503,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_
> CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
> {
> 141,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_slovak_ci", /* name */
> "", /* comment */
> @@ -8537,7 +8537,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_
> CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
> {
> 142,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_spanish2_ci", /* name */
> "", /* comment */
> @@ -8572,7 +8572,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uc
> CHARSET_INFO my_charset_ucs2_roman_uca_ci=
> {
> 143,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_roman_ci", /* name */
> "", /* comment */
> @@ -8607,7 +8607,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_c
> CHARSET_INFO my_charset_ucs2_persian_uca_ci=
> {
> 144,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_persian_ci", /* name */
> "", /* comment */
> @@ -8642,7 +8642,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca
> CHARSET_INFO my_charset_ucs2_esperanto_uca_ci=
> {
> 145,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_esperanto_ci",/* name */
> "", /* comment */
> @@ -8677,7 +8677,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_u
> CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
> {
> 146,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_hungarian_ci",/* name */
> "", /* comment */
>
> --- 1.5/sql/share/charsets/ascii.xml 2006-12-23 23:04:28 +04:00
> +++ 1.6/sql/share/charsets/ascii.xml 2007-06-26 12:59:03 +05:00
> @@ -97,7 +97,7 @@
> 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
> 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
> 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
> -0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 0000
> +0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
> 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
> 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
> 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
>
> --- 1.29/strings/ctype-extra.c 2007-04-03 16:16:08 +05:00
> +++ 1.30/strings/ctype-extra.c 2007-06-26 12:59:03 +05:00
> @@ -908,7 +908,7 @@ uint16 to_uni_ascii_general_ci[] = {
> 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
> 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
> 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
> -0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000,
> +0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
> 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
> 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
> 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
> @@ -4589,7 +4589,7 @@ uint16 to_uni_ascii_bin[] = {
> 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
> 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
> 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
> -0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x0000,
> +0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
> 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
> 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
> 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
> @@ -6801,7 +6801,7 @@ CHARSET_INFO compiled_charsets[] = {
> #ifdef HAVE_CHARSET_swe7
> {
> 10,0,0,
> - MY_CS_COMPILED|MY_CS_PRIMARY,
> + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII,
> "swe7", /* cset name */
> "swe7_swedish_ci", /* coll name */
> "", /* comment */
> @@ -8551,7 +8551,7 @@ CHARSET_INFO compiled_charsets[] = {
> #ifdef HAVE_CHARSET_swe7
> {
> 82,0,0,
> - MY_CS_COMPILED|MY_CS_BINSORT,
> + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII,
> "swe7", /* cset name */
> "swe7_bin", /* coll name */
> "", /* comment */
>
> --- 1.69/strings/ctype-ucs2.c 2007-04-03 16:16:09 +05:00
> +++ 1.70/strings/ctype-ucs2.c 2007-06-26 12:59:03 +05:00
> @@ -1677,7 +1677,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handl
> CHARSET_INFO my_charset_ucs2_general_ci=
> {
> 35,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_general_ci", /* name */
> "", /* comment */
> @@ -1711,7 +1711,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
> CHARSET_INFO my_charset_ucs2_bin=
> {
> 90,0,0, /* number */
> - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
> + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
> "ucs2", /* cs name */
> "ucs2_bin", /* name */
> "", /* comment */
>
> --- 1.37/sql/protocol.h 2007-02-01 21:34:06 +04:00
> +++ 1.38/sql/protocol.h 2007-06-26 12:59:03 +05:00
> @@ -42,6 +42,8 @@ protected:
> MYSQL_FIELD *next_mysql_field;
> MEM_ROOT *alloc;
> #endif
> + bool net_store_data(const char *from, uint length,
> + CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
> bool store_string_aux(const char *from, uint length,
> CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
> public:
>
> --- 1.132/libmysqld/lib_sql.cc 2007-02-23 15:23:37 +04:00
> +++ 1.133/libmysqld/lib_sql.cc 2007-06-26 12:59:03 +05:00
> @@ -1082,3 +1082,26 @@ bool Protocol::net_store_data(const char
> return false;
> }
>
> +
> +bool Protocol::net_store_data(const char *from, uint length,
> + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
> +{
> + uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
> + uint dummy_error;
> + char *field_buf;
> + if (!thd->mysql) // bootstrap file handling
> + return false;
> +
> + if (!(field_buf= alloc_root(alloc, conv_length + sizeof(uint) + 1)))
> + return true;
> + *next_field= field_buf + sizeof(uint);
> + length= copy_and_convert(*next_field, conv_length, to_cs,
> + from, length, from_cs, &dummy_error);
> + *(uint *) field_buf= length;
> + (*next_field)[length]= 0;
> + if (next_mysql_field->max_length < length)
> + next_mysql_field->max_length= length;
> + ++next_field;
> + ++next_mysql_field;
> + return false;
> +}
| Thread |
|---|
| • Re: bk commit - 5.1 tree (bar:1.2478) WL#3759 | Andrei Elkin | 28 Jun |