Based on Vlad's patch: http://lists.mysql.com/commits/105379
Version N2, with wide popen().
=== modified file 'client/mysql.cc'
--- client/mysql.cc 2011-02-15 12:38:39 +0000
+++ client/mysql.cc 2011-02-17 11:16:33 +0000
@@ -195,6 +195,20 @@
const char *default_dbug_option="d:t:o,/tmp/mysql.trace";
+
+#ifdef __WIN__
+static my_bool use_unicode_api= 1;
+static my_bool conversion_done= 0; /* Flag to avoid conversion on reconnect */
+#endif /* __WIN__ */
+
+/* Various printing flags */
+#define MY_PRINT_ESC_0 1 /* Replace 0x00 bytes to "\0" */
+#define MY_PRINT_SPS_0 2 /* Replace 0x00 bytes to space */
+#define MY_PRINT_XML 4 /* Encode XML entities */
+#define MY_PRINT_MB 8 /* Recognize multi-byte characters */
+#define MY_PRINT_CTRL 16 /* Replace TAB, NL, CR to "\t", "\n", "\r" */
+
+void tee_write(FILE *file, const char *s, size_t slen, int flags);
void tee_fprintf(FILE *file, const char *fmt, ...);
void tee_fputs(const char *s, FILE *file);
void tee_puts(const char *s, FILE *file);
@@ -1113,6 +1127,17 @@
close(stdout_fileno_copy); /* Clean up dup(). */
}
+#ifdef __WIN__
+ /*
+ Convert command line parameters from UTF16LE to UTF8MB4
+ when "mysql.exe --unicode" is specified.
+ Otherwise, the usual argc/argv will be used,
+ which bring arguments using ANSI code page.
+ */
+ if (use_unicode_api)
+ my_win_translate_command_line_args(&my_charset_utf8mb4_bin, &argc, &argv);
+#endif
+
if (load_defaults("my",load_default_groups,&argc,&argv))
{
my_end(0);
@@ -1885,6 +1910,15 @@
tmpbuf.alloc(65535);
tmpbuf.length(0);
buffer.length(0);
+
+ if (use_unicode_api)
+ {
+ line= my_win_console_readline(charset_info,
+ (char*) tmpbuf.ptr(),
+ tmpbuf.alloced_length());
+ goto got_a_line;
+ }
+
size_t clen;
do
{
@@ -1901,6 +1935,8 @@
*/
if (line)
line= buffer.c_ptr();
+
+got_a_line:
#else
if (opt_outfile)
fputs(prompt, OUTFILE);
@@ -3455,19 +3491,12 @@
grid. (The \0 is also the reason we can't use fprintf() .)
*/
unsigned int i;
- const char *p;
if (right_justified)
for (i= data_length; i < total_bytes_to_send; i++)
tee_putc((int)' ', PAGER);
- for (i= 0, p= data; i < data_length; i+= 1, p+= 1)
- {
- if (*p == '\0')
- tee_putc((int)' ', PAGER);
- else
- tee_putc((int)*p, PAGER);
- }
+ tee_write(PAGER, data, data_length, MY_PRINT_SPS_0|MY_PRINT_MB);
if (! right_justified)
for (i= data_length; i < total_bytes_to_send; i++)
@@ -3587,16 +3616,7 @@
tee_fprintf(PAGER, "%*s: ",(int) max_length,field->name);
if (cur[off])
{
- unsigned int i;
- const char *p;
-
- for (i= 0, p= cur[off]; i < lengths[off]; i+= 1, p+= 1)
- {
- if (*p == '\0')
- tee_putc((int)' ', PAGER);
- else
- tee_putc((int)*p, PAGER);
- }
+ tee_write(PAGER, cur[off], lengths[off], MY_PRINT_SPS_0|MY_PRINT_MB);
tee_putc('\n', PAGER);
}
else
@@ -3666,16 +3686,7 @@
if (!src)
tee_fputs("NULL", PAGER);
else
- {
- for (const char *p = src; length; p++, length--)
- {
- const char *t;
- if ((t = array_value(xmlmeta, *p)))
- tee_fputs(t, PAGER);
- else
- tee_putc(*p, PAGER);
- }
- }
+ tee_write(PAGER, src, length, MY_PRINT_XML|MY_PRINT_MB);
}
@@ -3686,37 +3697,9 @@
tee_fputs("NULL", PAGER);
else
{
- if (opt_raw_data)
- {
- unsigned long i;
- /* Can't use tee_fputs(), it stops with NUL characters. */
- for (i= 0; i < length; i++, pos++)
- tee_putc(*pos, PAGER);
- }
- else for (const char *end=pos+length ; pos != end ; pos++)
- {
-#ifdef USE_MB
- int l;
- if (use_mb(charset_info) &&
- (l = my_ismbchar(charset_info, pos, end)))
- {
- while (l--)
- tee_putc(*pos++, PAGER);
- pos--;
- continue;
- }
-#endif
- if (!*pos)
- tee_fputs("\\0", PAGER); // This makes everything hard
- else if (*pos == '\t')
- tee_fputs("\\t", PAGER); // This would destroy tab format
- else if (*pos == '\n')
- tee_fputs("\\n", PAGER); // This too
- else if (*pos == '\\')
- tee_fputs("\\\\", PAGER);
- else
- tee_putc(*pos, PAGER);
- }
+ int flags= MY_PRINT_MB | (opt_raw_data ? 0 : (MY_PRINT_ESC_0|MY_PRINT_CTRL));
+ /* Can't use tee_fputs(), it stops with NUL characters. */
+ tee_write(PAGER, pos, length, flags);
}
}
@@ -4317,7 +4300,30 @@
mysql_options(&mysql, MYSQL_INIT_COMMAND, init_command);
}
- mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
+ mysql_set_character_set(&mysql, default_charset);
+#ifdef __WIN__
+ uint cnv_errors;
+ String converted_database, converted_user;
+ if (use_unicode_api &&
+ !my_charset_same(&my_charset_utf8mb4_bin, mysql.charset))
+ {
+ /* Convert user and database from UTF8MB4 to connection character set */
+ if (user)
+ {
+ converted_user.copy(user, strlen(user) + 1,
+ &my_charset_utf8mb4_bin, mysql.charset,
+ &cnv_errors);
+ user= (char*) converted_user.ptr();
+ }
+ if (database)
+ {
+ converted_database.copy(database, strlen(database) + 1,
+ &my_charset_utf8mb4_bin, mysql.charset,
+ &cnv_errors);
+ database= (char*) converted_database.ptr();
+ }
+ }
+#endif
if (opt_plugin_dir && *opt_plugin_dir)
mysql_options(&mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir);
@@ -4339,7 +4345,39 @@
}
return -1; // Retryable
}
-
+
+#ifdef __WIN__
+ /* Convert --execute buffer from UTF8MB4 to connection character set */
+ if (!conversion_done++ &&
+ status.line_buff &&
+ !status.line_buff->file && /* Convert only -e buffer, not real file */
+ status.line_buff->buffer < status.line_buff->end && /* Non-empty */
+ use_unicode_api &&
+ !my_charset_same(&my_charset_utf8mb4_bin, mysql.charset))
+ {
+ String tmp;
+ size_t len= status.line_buff->end - status.line_buff->buffer;
+ uint dummy_errors;
+ /*
+ Don't convert trailing '\n' character - it was appended during
+ last batch_readline_command() call.
+ Otherwise we'll get an extra line, which makes some tests fail.
+ */
+ if (status.line_buff->buffer[len - 1] == '\n')
+ len--;
+ if (tmp.copy(status.line_buff->buffer, len,
+ &my_charset_utf8mb4_bin, mysql.charset, &dummy_errors))
+ return 1;
+
+ /* Free the old line buffer */
+ batch_readline_end(status.line_buff);
+
+ /* Re-initialize line buffer from the converted string */
+ if (!(status.line_buff= batch_readline_command(NULL, (char*) tmp.c_ptr_safe())))
+ return 1;
+ }
+#endif /* __WIN__ */
+
charset_info= mysql.charset;
connected=1;
@@ -4645,11 +4683,75 @@
}
+/**
+ Write data to a stream.
+ Various modes, corresponding to --tab, --xml, --raw parameters,
+ are supported.
+
+ @param file Stream to write to
+ @param s String to write
+ @param slen String length
+ @flags Flags for --tab, --xml, --raw.
+*/
+void tee_write(FILE *file, const char *s, size_t slen, int flags)
+{
+ const char *se;
+
+ for (se= s + slen; s < se; s++)
+ {
+ const char *t;
+
+ if (flags & MY_PRINT_MB)
+ {
+ int mblen, i;
+ if (use_mb(charset_info) &&
+ (mblen= my_ismbchar(charset_info, s, se)))
+ {
+#ifdef __WIN__
+ if (use_unicode_api && my_win_is_console(file))
+ my_win_console_write(charset_info, s, mblen);
+ else
+ fwrite(s, 1, mblen, file);
+ if (opt_outfile)
+ fwrite(s, 1, mblen, OUTFILE);
+ s+= mblen - 1;
+ continue;
+#endif
+ for (i= 0; i < mblen; i++)
+ tee_putc(s[i], file);
+ s+= mblen - 1;
+ continue;
+ }
+ }
+
+ if ((flags & MY_PRINT_XML) && (t= array_value(xmlmeta, *s)))
+ tee_fputs(t, file);
+ else if ((flags & MY_PRINT_SPS_0) && *s == '\0')
+ tee_putc((int) ' ', file); // This makes everything hard
+ else if ((flags & MY_PRINT_ESC_0) && *s == '\0')
+ tee_fputs("\\0", file); // This makes everything hard
+ else if ((flags & MY_PRINT_CTRL) && *s == '\t')
+ tee_fputs("\\t", file); // This would destroy tab format
+ else if ((flags & MY_PRINT_CTRL) && *s == '\n')
+ tee_fputs("\\n", file); // This too
+ else if ((flags & MY_PRINT_CTRL) && *s == '\\')
+ tee_fputs("\\\\", file);
+ else
+ tee_putc((int) *s, file);
+ }
+}
+
+
void tee_fprintf(FILE *file, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
+#ifdef __WIN__
+ if (use_unicode_api && my_win_is_console(file))
+ my_win_console_vfprintf(charset_info, fmt, args);
+ else
+#endif
(void) vfprintf(file, fmt, args);
va_end(args);
@@ -4664,6 +4766,11 @@
void tee_fputs(const char *s, FILE *file)
{
+#ifdef __WIN__
+ if (use_unicode_api && my_win_is_console(file))
+ my_win_console_fputs(charset_info, s);
+ else
+#endif
fputs(s, file);
if (opt_outfile)
fputs(s, OUTFILE);
@@ -4672,17 +4779,17 @@
void tee_puts(const char *s, FILE *file)
{
- fputs(s, file);
- fputc('\n', file);
- if (opt_outfile)
- {
- fputs(s, OUTFILE);
- fputc('\n', OUTFILE);
- }
+ tee_fputs(s, file);
+ tee_putc('\n', file);
}
void tee_putc(int c, FILE *file)
{
+#ifdef __WIN__
+ if (use_unicode_api && my_win_is_console(file))
+ my_win_console_putc(charset_info, c);
+ else
+#endif
putc(c, file);
if (opt_outfile)
putc(c, OUTFILE);
=== modified file 'client/mysqltest.cc'
--- client/mysqltest.cc 2011-01-26 20:13:31 +0000
+++ client/mysqltest.cc 2011-02-17 11:05:22 +0000
@@ -302,7 +302,7 @@
Q_ENABLE_WARNINGS, Q_DISABLE_WARNINGS,
Q_ENABLE_INFO, Q_DISABLE_INFO,
Q_ENABLE_METADATA, Q_DISABLE_METADATA,
- Q_EXEC, Q_DELIMITER,
+ Q_EXEC, Q_EXECW, Q_DELIMITER,
Q_DISABLE_ABORT_ON_ERROR, Q_ENABLE_ABORT_ON_ERROR,
Q_DISPLAY_VERTICAL_RESULTS, Q_DISPLAY_HORIZONTAL_RESULTS,
Q_QUERY_VERTICAL, Q_QUERY_HORIZONTAL, Q_SORTED_RESULT,
@@ -373,6 +373,7 @@
"enable_metadata",
"disable_metadata",
"exec",
+ "execw",
"delimiter",
"disable_abort_on_error",
"enable_abort_on_error",
@@ -2750,8 +2751,52 @@
#endif
-FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode)
+FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode,
+ struct st_command *command)
{
+#if __WIN__
+ /*
+ --execw is for tests executing commands containing non-ASCII characters.
+
+ To correctly start such a program on Windows, we need to use the "wide"
+ version of popen, with prior translation of the command line from
+ the file character set to wide string. We use the current value
+ of --character_set as a file charcter set, so before using --execw
+ make sure to set --character_set properly.
+
+ If we use the non-wide version of popen, Windows internally
+ converts command line from the current ANSI code page to wide string.
+ In case when character set of the command line does not match the
+ current ANSI code page, non-ASCII characters get garbled in most cases.
+
+ On Linux, the command line passed to popen() is considered
+ as a binary string, no any internal to-wide and from-wide
+ character set conversion happens, so we don't need to do anything.
+ On Linux --execw is just a synonym to --exec.
+
+ For simplicity, assume that command line is limited to 4KB
+ (like in cmd.exe) and that mode at most 10 characters.
+ */
+ if (command->type == Q_EXECW)
+ {
+ wchar_t wcmd[4096];
+ wchar_t wmode[10];
+ const char *cmd= ds_cmd->str;
+ uint dummy_errors;
+ size_t len;
+ len= my_convert((char*) wcmd, sizeof(wcmd) - sizeof(wcmd[0]),
+ &my_charset_utf16le_bin,
+ ds_cmd->str, strlen(ds_cmd->str), charset_info,
+ &dummy_errors);
+ wcmd[len / sizeof(wchar_t)]= 0;
+ len= my_convert((char*) wmode, sizeof(wmode) - sizeof(wmode[0]),
+ &my_charset_utf16le_bin,
+ mode, strlen(mode), charset_info, &dummy_errors);
+ wmode[len / sizeof(wchar_t)]= 0;
+ return _wpopen(wcmd, wmode);
+ }
+#endif __WIN__
+
#if defined __WIN__ && defined USE_CYGWIN
/* Dump the command into a sh script file and execute with popen */
str_to_file(tmp_sh_name, ds_cmd->str, ds_cmd->length);
@@ -2888,7 +2933,7 @@
DBUG_PRINT("info", ("Executing '%s' as '%s'",
command->first_argument, ds_cmd.str));
- if (!(res_file= my_popen(&ds_cmd, "r")) && command->abort_on_error)
+ if (!(res_file= my_popen(&ds_cmd, "r", command)) && command->abort_on_error)
{
dynstr_free(&ds_cmd);
die("popen(\"%s\", \"r\") failed", command->first_argument);
@@ -8763,6 +8808,7 @@
do_shutdown_server(command);
break;
case Q_EXEC:
+ case Q_EXECW:
do_exec(command);
command_executed++;
break;
=== modified file 'include/my_sys.h'
--- include/my_sys.h 2011-02-08 15:54:12 +0000
+++ include/my_sys.h 2011-02-16 16:47:14 +0000
@@ -947,8 +947,14 @@
/* implemented in my_conio.c */
char* my_cgets(char *string, size_t clen, size_t* plen);
-
-#endif
+my_bool my_win_is_console(FILE *file);
+char *my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize);
+void my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen);
+void my_win_console_fputs(CHARSET_INFO *cs, const char *data);
+void my_win_console_putc(CHARSET_INFO *cs, int c);
+void my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args);
+int my_win_translate_command_line_args(CHARSET_INFO *cs, int *ac, char ***av);
+#endif /* __WIN__ */
#include <mysql/psi/psi.h>
=== modified file 'mysql-test/t/grant.test'
--- mysql-test/t/grant.test 2010-12-15 16:15:40 +0000
+++ mysql-test/t/grant.test 2011-02-17 09:41:06 +0000
@@ -1401,9 +1401,10 @@
#
# Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte
#
+--character_set utf8
set names utf8;
grant select on test.* to
ÑзеÑ_ÑзеÑ@localhost;
---exec $MYSQL --default-character-set=utf8 --user=ÑзеÑ_ÑÐ·ÐµÑ -e "select user()"
+--execw $MYSQL --default-character-set=utf8
--user=ÑзеÑ_ÑзеÑ
-e "select user()"
revoke all on test.* from
ÑзеÑ_ÑзеÑ@localhost;
drop user
ÑзеÑ_ÑзеÑ@localhost;
--error ER_WRONG_STRING_LENGTH
=== modified file 'mysql-test/t/mysql.test'
--- mysql-test/t/mysql.test 2011-02-05 05:06:29 +0000
+++ mysql-test/t/mysql.test 2011-02-17 11:09:16 +0000
@@ -51,13 +51,14 @@
#
# Bug#17939 Wrong table format when using UTF8 strings
#
---exec $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as '__tañgè
Ããmé'" 2>&1
---exec $MYSQL --default-character-set=utf8 --table -e "SELECT '__tañgè Ããmé' as 'John
Doe'" 2>&1
+--character_set utf8
+--execw $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as '__tañgè
Ããmé'" 2>&1
+--execw $MYSQL --default-character-set=utf8 --table -e "SELECT '__tañgè
Ããmé' as 'John Doe'" 2>&1
#
# Bug#18265 -- mysql client: No longer right-justifies numeric columns
#
---exec $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values ('<----------------------->'); insert into t1 (k) values ('<-----'); insert into t1 (k) values ('Τη
γλÏÏÏα'); insert into t1 (k) values ('áá´
á·áá'); select * from t1; DROP TABLE t1;"
+--execw $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values ('<----------------------->'); insert into t1 (k) values ('<-----'); insert into t1 (k) values ('Τη
γλÏÏÏα'); insert into t1 (k) values ('áá´ á·áá');
select * from t1; DROP TABLE t1;"
#
# "DESCRIBE" commands may return strange NULLness flags.
=== modified file 'mysql-test/t/mysql_cp932.test'
--- mysql-test/t/mysql_cp932.test 2007-02-21 16:50:48 +0000
+++ mysql-test/t/mysql_cp932.test 2011-02-17 11:13:16 +0000
@@ -15,8 +15,9 @@
--exec $MYSQL --default-character-set=cp932 test -e "charset utf8;"
# its usage to switch internally in mysql to requested charset
---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select '\'; create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('\'); select * from t1; drop table t1;"
---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select '\'"
---exec $MYSQL --default-character-set=utf8 test -e "/*charset cp932 */; set character_set_client= cp932; select '\'"
---exec $MYSQL --default-character-set=utf8 test -e "/*!\C cp932 */; set character_set_client= cp932; select '\'"
+--character_set latin1
+--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select '\'; create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('\'); select * from t1; drop table t1;"
+--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select '\'"
+--execw $MYSQL --default-character-set=latin1 test -e "/*charset cp932 */; set names cp932, character_set_results=utf8; select '\'"
+--execw $MYSQL --default-character-set=latin1 test -e "/*!\C cp932 */; set character_set_client= cp932; select '\'"
=== modified file 'mysql-test/t/mysqlbinlog-cp932.test'
--- mysql-test/t/mysqlbinlog-cp932.test 2009-09-07 05:42:54 +0000
+++ mysql-test/t/mysqlbinlog-cp932.test 2011-02-17 09:16:34 +0000
@@ -10,8 +10,10 @@
# Bug#16217 (mysql client did not know how not switch its internal charset)
create table t3 (f text character set utf8);
create table t4 (f text character set cp932);
---exec $MYSQL --default-character-set=utf8 test -e "insert into t3 values(_utf8'ã½')"
---exec $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'\');"
+--character_set utf8
+--execw $MYSQL --default-character-set=utf8 test -e "insert into t3 values(_utf8'ã½')"
+--character_set cp932
+--execw $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'\');"
flush logs;
rename table t3 to t03, t4 to t04;
let $MYSQLD_DATADIR= `select @@datadir`;
=== modified file 'mysys/my_conio.c'
--- mysys/my_conio.c 2009-02-13 16:41:47 +0000
+++ mysys/my_conio.c 2011-02-16 16:47:14 +0000
@@ -219,4 +219,261 @@
return result;
}
+
+/* Windows console handling */
+
+/* Maximum line length on Windows console */
+#define MAX_CONSOLE_LINE_SIZE 65535
+
+/**
+ Determine if a file is a windows console
+
+ @param file Input stream
+
+ @return
+ @retval 0 if file is not Windows console
+ @retval 1 if file is Windows console
+*/
+my_bool
+my_win_is_console(FILE *file)
+{
+ DWORD mode;
+ if (GetConsoleMode((HANDLE) _get_osfhandle(_fileno(file)), &mode))
+ return 1;
+ return 0;
+}
+
+
+/**
+ Read line from Windows console using Unicode API
+ and translate input to session character set.
+ Note, as Windows API breaks supplementary characters
+ into to wchar_t pairs, we cannot read and convert individual
+ wchar_t values separately. So let's use a buffer for
+ Unicode console input, and then convert it to "cs" in a single shot.
+ String is terminated with '\0' character.
+
+ @param cs Character string to convert to.
+ @param mbbuf Write input data here.
+ @param mbbufsize Number of bytes available in mbbuf.
+
+ @rerval Pointer to mbbuf, or NULL on I/0 error.
+*/
+char *
+my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize)
+{
+ uint dummy_errors;
+ static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1];
+ size_t pos, mblen;
+ DWORD console_mode;
+ HANDLE console= GetStdHandle(STD_INPUT_HANDLE);
+
+ DBUG_ASSERT(mbbufsize > 0); /* Need space for at least trailing '\0' */
+ GetConsoleMode(console, &console_mode);
+ SetConsoleMode(console, ENABLE_LINE_INPUT |
+ ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
+ for(pos= 0; ; )
+ {
+ DWORD nchars;
+ BOOL ok= ReadConsoleW(console, &u16buf[pos], 1, &nchars, NULL);
+ if (!ok || nchars == 0)
+ {
+ SetConsoleMode(console, console_mode);
+ return NULL;
+ }
+ if (u16buf[pos] == L'\r')
+ continue;
+ if (pos == MAX_CONSOLE_LINE_SIZE || u16buf[pos] == L'\n')
+ break;
+ pos++;
+ }
+ SetConsoleMode(console, console_mode);
+ /* Convert Unicode to session character set */
+ mblen= my_convert(mbbuf, mbbufsize - 1, cs,
+ (const char *) u16buf, pos * sizeof(wchar_t),
+ &my_charset_utf16le_bin, &dummy_errors);
+ DBUG_ASSERT(mblen < mbbufsize); /* Safety */
+ mbbuf[mblen]= 0;
+ return mbbuf;
+}
+
+
+/**
+ Translate client charset to Windows wchars for console I/O.
+ Unlike copy_and_convert(), in case of a wrong multi-byte sequence
+ we don't print '?' character, we fallback to ISO-8859-1 instead.
+ This gives a better idea how binary data (e.g. BLOB) look like.
+
+ @param cs Character set of the input string
+ @param from Input string
+ @param from_length Length of the input string
+ @param to[OUT] Write Unicode data here
+ @param to_chars Number of characters available in "to"
+*/
+static size_t
+my_mbstou16s(CHARSET_INFO *cs, const uchar * from, size_t from_length,
+ wchar_t *to, size_t to_chars)
+{
+ CHARSET_INFO *to_cs= &my_charset_utf16le_bin;
+ const uchar *from_end= from + from_length;
+ wchar_t *to_orig= to, *to_end= to + to_chars;
+ my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
+ my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+ while (from < from_end)
+ {
+ int cnvres;
+ my_wc_t wc;
+ if ((cnvres= (*mb_wc)(cs, &wc, from, from_end)) > 0)
+ {
+ if (!wc)
+ break;
+ from+= cnvres;
+ }
+ else if (cnvres == MY_CS_ILSEQ)
+ {
+ wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */
+ from+= 1;
+ }
+ else if (cnvres > MY_CS_TOOSMALL)
+ {
+ /*
+ A correct multibyte sequence detected
+ But it doesn't have Unicode mapping.
+ */
+ from+= (-cnvres);
+ wc= '?';
+ }
+ else /* Incomplete character */
+ {
+ wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */
+ from+= 1;
+ }
+outp:
+ if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, (uchar*) to_end)) > 0)
+ {
+ /* We can never convert only a part of wchar_t */
+ DBUG_ASSERT((cnvres % sizeof(wchar_t)) == 0);
+ /* cnvres returns numner of bytes, convert to number of wchar_t's */
+ to+= cnvres / sizeof(wchar_t);
+ }
+ else if (cnvres == MY_CS_ILUNI && wc != '?')
+ {
+ wc= '?';
+ goto outp;
+ }
+ else
+ break; /* Not enough space */
+ }
+ return to - to_orig;
+}
+
+
+/**
+ Write a string in the given character set to Windows console.
+ As Window breaks supplementary characters into two parts,
+ we cannot use a simple loop sending the result of
+ cs->cset->mb_wc() to console.
+ So we converts string from client charset to an array of wchar_t,
+ then write the array to console in a single shot.
+
+ @param cs Character set of the string
+ @param data String to print
+ @param datalen Length of input string in bytes
+*/
+void
+my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen)
+{
+ static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1];
+ size_t nchars= my_mbstou16s(cs, (const uchar*) data, datalen,
+ u16buf, sizeof(u16buf));
+ DWORD nwritten;
+ WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE),
+ u16buf, (DWORD)nchars, &nwritten, NULL);
+}
+
+
+/**
+ Write a single-byte character to console.
+ Note: one should not send parts of a single multi-byte characters
+ in separate consequent my_win_console_putc() calls.
+ For multi-byte characters use my_win_colsole_write() instead.
+
+ @param cs Character set of the input character
+ @param c Character (single byte)
+*/
+void
+my_win_console_putc(CHARSET_INFO *cs, int c)
+{
+ char ch= (char) c;
+ my_win_console_write(cs, &ch, 1);
+}
+
+
+/**
+ Write a 0-terminated string to Windows console.
+
+ @param cs Character set of the string to print
+ @param data String to print
+*/
+void
+my_win_console_fputs(CHARSET_INFO *cs, const char *data)
+{
+ my_win_console_write(cs, data, strlen(data));
+}
+
+
+/*
+ Handle formatted output on the Windows console.
+*/
+void
+my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args)
+{
+ static char buff[MAX_CONSOLE_LINE_SIZE + 1];
+ size_t len= vsnprintf(buff, sizeof(buff) - 1, fmt, args);
+ my_win_console_write(cs, buff, len);
+}
+
+
+#include <shellapi.h>
+
+/**
+ Translate Unicode command line parameters to the given character set
+ (Typically to utf8mb4).
+ Translated parameters are allocated using my_once_alloc().
+
+ @param tocs Character set to convert parameters to.
+ @param[OUT] argc Write number of parameters here
+ @param[OUT] argv Write pointer to allocated parameters here.
+*/
+int
+my_win_translate_command_line_args(CHARSET_INFO *cs, int *argc, char ***argv)
+{
+ int i, ac;
+ char **av;
+ wchar_t *command_line= GetCommandLineW();
+ wchar_t **wargs= CommandLineToArgvW(command_line, &ac);
+ size_t nbytes= (ac + 1) * sizeof(char*);
+
+ /* Allocate new command line parameter */
+ av= (char**) my_once_alloc(nbytes, MYF(MY_ZEROFILL));
+
+ for(i= 0; i < *argc; i++)
+ {
+ uint dummy_errors;
+ size_t arg_len= wcslen(wargs[i]);
+ size_t len, alloced_len= arg_len * cs->mbmaxlen + 1;
+ av[i]= (char *) my_once_alloc(alloced_len, MYF(0));
+ len= my_convert(av[i], alloced_len, cs,
+ (const char *) wargs[i], arg_len * sizeof(wchar_t),
+ &my_charset_utf16le_bin, &dummy_errors);
+ DBUG_ASSERT(len < alloced_len);
+ av[i][len]= '\0';
+ }
+ *argv= av;
+ *argc= ac;
+ /* Cleanup on exit */
+ LocalFree((HLOCAL) wargs);
+ return 0;
+}
+
#endif /* __WIN__ */
=== modified file 'sql-common/client.c'
--- sql-common/client.c 2011-01-31 15:55:58 +0000
+++ sql-common/client.c 2011-02-16 16:47:14 +0000
@@ -4246,11 +4246,25 @@
if (mysql->options.charset_dir)
charsets_dir= mysql->options.charset_dir;
+ if (!mysql->net.vio)
+ {
+ /* Initialize with automatic OS character set detection. */
+ mysql_options(mysql, MYSQL_SET_CHARSET_NAME, cs_name);
+ mysql_init_character_set(mysql);
+ cs_name= mysql->options.charset_name;
+ }
+
if (strlen(cs_name) < MY_CS_NAME_SIZE &&
(cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0))))
{
char buff[MY_CS_NAME_SIZE + 10];
charsets_dir= save_csdir;
+ if (!mysql->net.vio)
+ {
+ /* If there is no connection yet we don't send "SET NAMES" query */
+ mysql->charset= cs;
+ return 0;
+ }
/* Skip execution of "SET NAMES" for pre-4.1 servers */
if (mysql_get_server_version(mysql) < 40100)
return 0;
| Thread |
|---|
| • WL#5331 Support Unicode for Windows command line client | Alexander Barkov | 17 Feb |