3648 Alexander Barkov 2011-02-15
A pre-requisite patch for
WL#5331 Support Unicode for Windows command line client
Moving character set conversion routine implementation from
sq/sql_string.cc to strings/ctype.c, as conversion functionality
is occasionally needed in client tools.
modified:
client/sql_string.cc
client/sql_string.h
include/m_ctype.h
sql/sql_string.cc
sql/sql_string.h
strings/ctype.c
3647 Luis Soares 2011-02-14
BUG#11765599: 58584: MOVE TIMESTAMPS IN SLAVE_[SQL|IO]_ERROR TO
SEPARATE COLUMNS
Adding test comments as suggested by reviewer for expected
error codes.
modified:
mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
mysql-test/suite/rpl/t/rpl_checksum.test
mysql-test/suite/rpl/t/rpl_show_errors.test
=== modified file 'client/sql_string.cc'
--- a/client/sql_string.cc 2010-09-13 09:58:11 +0000
+++ b/client/sql_string.cc 2011-02-15 11:30:56 +0000
@@ -707,80 +707,6 @@ String *copy_if_not_alloced(String *to,S
Help functions
****************************************************************************/
-/*
- copy a string from one character set to another
-
- SYNOPSIS
- copy_and_convert()
- to Store result here
- to_cs Character set of result string
- from Copy from here
- from_length Length of from string
- from_cs From character set
-
- NOTES
- 'to' must be big enough as form_length * to_cs->mbmaxlen
-
- RETURN
- length of bytes copied to 'to'
-*/
-
-
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length, CHARSET_INFO *from_cs,
- uint *errors)
-{
- int cnvres;
- my_wc_t wc;
- const uchar *from_end= (const uchar*) from+from_length;
- char *to_start= to;
- uchar *to_end= (uchar*) to+to_length;
- my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
- my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
- uint error_count= 0;
-
- while (1)
- {
- if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
- from_end)) > 0)
- from+= cnvres;
- else if (cnvres == MY_CS_ILSEQ)
- {
- error_count++;
- from++;
- wc= '?';
- }
- else if (cnvres > MY_CS_TOOSMALL)
- {
- /*
- A correct multibyte sequence detected
- But it doesn't have Unicode mapping.
- */
- error_count++;
- from+= (-cnvres);
- wc= '?';
- }
- else
- break; // Not enough characters
-
-outp:
- if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
- to+= cnvres;
- else if (cnvres == MY_CS_ILUNI && wc != '?')
- {
- error_count++;
- wc= '?';
- goto outp;
- }
- else
- break;
- }
- *errors= error_count;
- return (uint32) (to - to_start);
-}
-
-
void String::print(String *str)
{
char *st= (char*)Ptr, *end= st+str_length;
=== modified file 'client/sql_string.h'
--- a/client/sql_string.h 2010-10-19 22:51:34 +0000
+++ b/client/sql_string.h 2011-02-15 11:30:56 +0000
@@ -25,9 +25,12 @@
class String;
int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
-uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length,
- CHARSET_INFO *from_cs, uint *errors);
+inline uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ CHARSET_INFO *from_cs, uint *errors)
+{
+ return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
+}
class String
{
=== modified file 'include/m_ctype.h'
--- a/include/m_ctype.h 2010-12-20 10:28:06 +0000
+++ b/include/m_ctype.h 2011-02-15 11:30:56 +0000
@@ -686,6 +686,10 @@ my_bool my_charset_is_ascii_compatible(C
extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
const char* fmt, va_list ap);
+uint32 my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+ uint *errors);
+
#define _MY_U 01 /* Upper case */
#define _MY_L 02 /* Lower case */
#define _MY_NMR 04 /* Numeral (digit) */
=== modified file 'sql/sql_string.cc'
--- a/sql/sql_string.cc 2011-01-13 08:19:52 +0000
+++ b/sql/sql_string.cc 2011-02-15 11:30:56 +0000
@@ -750,140 +750,6 @@ String *copy_if_not_alloced(String *to,S
Help functions
****************************************************************************/
-/*
- copy a string from one character set to another
-
- SYNOPSIS
- copy_and_convert()
- to Store result here
- to_cs Character set of result string
- from Copy from here
- from_length Length of from string
- from_cs From character set
-
- NOTES
- 'to' must be big enough as form_length * to_cs->mbmaxlen
-
- RETURN
- length of bytes copied to 'to'
-*/
-
-
-static uint32
-copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length,
- CHARSET_INFO *from_cs,
- uint *errors)
-{
- int cnvres;
- my_wc_t wc;
- const uchar *from_end= (const uchar*) from+from_length;
- char *to_start= to;
- uchar *to_end= (uchar*) to+to_length;
- my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
- my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
- uint error_count= 0;
-
- while (1)
- {
- if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
- from_end)) > 0)
- from+= cnvres;
- else if (cnvres == MY_CS_ILSEQ)
- {
- error_count++;
- from++;
- wc= '?';
- }
- else if (cnvres > MY_CS_TOOSMALL)
- {
- /*
- A correct multibyte sequence detected
- But it doesn't have Unicode mapping.
- */
- error_count++;
- from+= (-cnvres);
- wc= '?';
- }
- else
- break; // Not enough characters
-
-outp:
- if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
- to+= cnvres;
- else if (cnvres == MY_CS_ILUNI && wc != '?')
- {
- error_count++;
- wc= '?';
- goto outp;
- }
- else
- break;
- }
- *errors= error_count;
- return (uint32) (to - to_start);
-}
-
-
-/*
- Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
-*/
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length, CHARSET_INFO *from_cs,
- uint *errors)
-{
- /*
- If any of the character sets is not ASCII compatible,
- immediately switch to slow mb_wc->wc_mb method.
- */
- if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
- return copy_and_convert_extended(to, to_length, to_cs,
- from, from_length, from_cs, errors);
-
- uint32 length= min(to_length, from_length), length2= length;
-
-#if defined(__i386__)
- /*
- Special loop for i386, it allows to refer to a
- non-aligned memory block as UINT32, which makes
- it possible to copy four bytes at once. This
- gives about 10% performance improvement comparing
- to byte-by-byte loop.
- */
- for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
- {
- if ((*(uint32*)from) & 0x80808080)
- break;
- *((uint32*) to)= *((const uint32*) from);
- }
-#endif
-
- for (; ; *to++= *from++, length--)
- {
- if (!length)
- {
- *errors= 0;
- return length2;
- }
- if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
- {
- uint32 copied_length= length2 - length;
- to_length-= copied_length;
- from_length-= copied_length;
- return copied_length + copy_and_convert_extended(to, to_length,
- to_cs,
- from, from_length,
- from_cs,
- errors);
- }
- }
-
- DBUG_ASSERT(FALSE); // Should never get to here
- return 0; // Make compiler happy
-}
-
-
/**
Copy string with HEX-encoding of "bad" characters.
=== modified file 'sql/sql_string.h'
--- a/sql/sql_string.h 2011-01-13 08:19:52 +0000
+++ b/sql/sql_string.h 2011-02-15 11:30:56 +0000
@@ -33,9 +33,12 @@ typedef struct st_mem_root MEM_ROOT;
int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
-uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length,
- CHARSET_INFO *from_cs, uint *errors);
+inline uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ CHARSET_INFO *from_cs, uint *errors)
+{
+ return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
+}
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
=== modified file 'strings/ctype.c'
--- a/strings/ctype.c 2011-01-19 13:35:54 +0000
+++ b/strings/ctype.c 2011-02-15 11:30:56 +0000
@@ -927,3 +927,143 @@ my_charset_is_ascii_compatible(CHARSET_I
}
return 1;
}
+
+
+/*
+ Convert a string between two character sets.
+ 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+ @param to[OUT] Store result here
+ @param to_length Size of "to" buffer
+ @param to_cs Character set of result string
+ @param from Copy from here
+ @param from_length Length of the "from" string
+ @param from_cs Character set of the "from" string
+ @param errors[OUT] Number of conversion errors
+
+ @return Number of bytes copied to 'to' string
+*/
+
+static uint32
+my_convert_internal(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,CHARSET_INFO *from_cs,
+ uint *errors)
+{
+ int cnvres;
+ my_wc_t wc;
+ const uchar *from_end= (const uchar*) from + from_length;
+ char *to_start= to;
+ uchar *to_end= (uchar*) to + to_length;
+ my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+ my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+ uint error_count= 0;
+
+ while (1)
+ {
+ if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
+ from+= cnvres;
+ else if (cnvres == MY_CS_ILSEQ)
+ {
+ error_count++;
+ from++;
+ wc= '?';
+ }
+ else if (cnvres > MY_CS_TOOSMALL)
+ {
+ /*
+ A correct multibyte sequence detected
+ But it doesn't have Unicode mapping.
+ */
+ error_count++;
+ from+= (-cnvres);
+ wc= '?';
+ }
+ else
+ break; // Not enough characters
+
+outp:
+ if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+ to+= cnvres;
+ else if (cnvres == MY_CS_ILUNI && wc != '?')
+ {
+ error_count++;
+ wc= '?';
+ goto outp;
+ }
+ else
+ break;
+ }
+ *errors= error_count;
+ return (uint32) (to - to_start);
+}
+
+
+/*
+ Convert a string between two character sets.
+ Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+ 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+ @param to[OUT] Store result here
+ @param to_length Size of "to" buffer
+ @param to_cs Character set of result string
+ @param from Copy from here
+ @param from_length Length of the "from" string
+ @param from_cs Character set of the "from" string
+ @param errors[OUT] Number of conversion errors
+
+ @return Number of bytes copied to 'to' string
+*/
+
+uint32
+my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+ uint *errors)
+{
+ uint32 length, length2;
+ /*
+ If any of the character sets is not ASCII compatible,
+ immediately switch to slow mb_wc->wc_mb method.
+ */
+ if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+ return my_convert_internal(to, to_length, to_cs,
+ from, from_length, from_cs, errors);
+
+ length= length2= min(to_length, from_length);
+
+#if defined(__i386__)
+ /*
+ Special loop for i386, it allows to refer to a
+ non-aligned memory block as UINT32, which makes
+ it possible to copy four bytes at once. This
+ gives about 10% performance improvement comparing
+ to byte-by-byte loop.
+ */
+ for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+ {
+ if ((*(uint32*)from) & 0x80808080)
+ break;
+ *((uint32*) to)= *((const uint32*) from);
+ }
+#endif /* __i386__ */
+
+ for (; ; *to++= *from++, length--)
+ {
+ if (!length)
+ {
+ *errors= 0;
+ return length2;
+ }
+ if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+ {
+ uint32 copied_length= length2 - length;
+ to_length-= copied_length;
+ from_length-= copied_length;
+ return copied_length + my_convert_internal(to, to_length, to_cs,
+ from, from_length, from_cs,
+ errors);
+ }
+ }
+
+ DBUG_ASSERT(FALSE); // Should never get to here
+ return 0; // Make compiler happy
+}
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-trunk branch (alexander.barkov:3647 to 3648) WL#5331 | Alexander Barkov | 15 Feb |