Version 4:
- cached my_win_is_console_cached() added
- tee_write improvements, not to call my_win_is_cosole() too often
WL#5331 Support Unicode for Windows command line client
Based on the original patch form Vladislav Vaintroub:
http://lists.mysql.com/commits/105379
@ client/mysql.cc
- introducing new function tee_write(), to reuse
in a number of places where similar loops displaying
data occurs.
- introducing flags for tee_write(), to support different
printing modes, according to --xml, --raw, --tab, etc,
parameters
- Instead if using argv (which is always in ANSI code page),
we now use UTF16LE API to access command line arguments on Windows,
using this scenario:
a. We translate arguments to UTF8MB4 on startup.
b. Then we process arguments and detect connection character set
from --default-character-set arguments (or my.ini value),
or from the OS localization information by default.
c. Then we convert user, database and the --execute (-e) buffer
from UTF8MB4 to the connection character set.
d. Connect
- Instead of printing using printif/fputs family functions on Windows,
which are limited to the current DOS code page (cp850 on a Western machine)
we now use UTF16LE console API through the new my_win_console_xxx()
functions implemented in my_conio.c
- Using mysql_set_character_set() instead of
mysql_option(OPT_CHARACTER_SET_NAME) to know the ongoing
session character set *before* mysql_real_connect() call,
to convert user and database properly.
- my_win_is_console_cached() has been added to cache
my_win_is_console() result for stdout and stderr,
for performance purposes.
@ client/mysqltest.cc
- Introducing a new mysqltest command: --execw, to
execute commands with non-ASCII characters correctly in Windows.
@ include/my_sys.h
- Adding prototypes for the my_win_console_xxx() functions
@ mysql-test/grant.test
@ mysql-test/t/mysql.test
@ mysql-test/t/mysql_cp932.test
@ mysql-test/t/mysqlbinlog-cp932.test
- Using --execw instead of --exec for the affected tests
@ mysys/my_conio.c
- Implementing functions for Windows console read/write and
command line argument processing.
- Removing my_cgets(), as it's not used any more.
@ sql-common/client.c
- Fixing mysql_set_character_set() to set mysql.charset on
a non-connected "mysql". Previously such a call crashed.
This change allows to know what character set for the
ongoing session is going to be *before* calling mysql_real_connect().
This is needed in mysql.cc, to convert user and database correctly
on Windows.
=== modified file 'client/mysql.cc'
--- client/mysql.cc 2011-02-15 12:38:39 +0000
+++ client/mysql.cc 2011-02-22 12:58:25 +0000
@@ -195,6 +195,43 @@
const char *default_dbug_option="d:t:o,/tmp/mysql.trace";
+#ifdef __WIN__
+/*
+ A flag that indicates if --execute buffer has already been converted,
+ to avoid double conversion on reconnect.
+*/
+static my_bool execute_buffer_conversion_done= 0;
+
+/*
+ my_win_is_console(...) is quite slow.
+ We cache my_win_is_console() results for stdout and stderr.
+ Any other output files, except stdout and stderr,
+ cannot be Windows console.
+ Note, if mysql.exe is executed from a service, its _fileno(stdout) is -1,
+ so shift (1 << -1) can return implementation defined result.
+ This corner case is taken into account, as the shift result
+ will be multiplied to 0 and we'll get 0 as a result.
+ The same is true for stderr.
+*/
+static uint win_is_console_cache=
+ (test(my_win_is_console(stdout)) * (1 << _fileno(stdout))) |
+ (test(my_win_is_console(stderr)) * (1 << _fileno(stderr)));
+
+static inline my_bool
+my_win_is_console_cached(FILE *file)
+{
+ return win_is_console_cache & (1 << _fileno(file));
+}
+#endif /* __WIN__ */
+
+/* Various printing flags */
+#define MY_PRINT_ESC_0 1 /* Replace 0x00 bytes to "\0" */
+#define MY_PRINT_SPS_0 2 /* Replace 0x00 bytes to space */
+#define MY_PRINT_XML 4 /* Encode XML entities */
+#define MY_PRINT_MB 8 /* Recognize multi-byte characters */
+#define MY_PRINT_CTRL 16 /* Replace TAB, NL, CR to "\t", "\n", "\r" */
+
+void tee_write(FILE *file, const char *s, size_t slen, int flags);
void tee_fprintf(FILE *file, const char *fmt, ...);
void tee_fputs(const char *s, FILE *file);
void tee_puts(const char *s, FILE *file);
@@ -1113,6 +1150,11 @@
close(stdout_fileno_copy); /* Clean up dup(). */
}
+#ifdef __WIN__
+ /* Convert command line parameters from UTF16LE to UTF8MB4. */
+ my_win_translate_command_line_args(&my_charset_utf8mb4_bin, &argc, &argv);
+#endif
+
if (load_defaults("my",load_default_groups,&argc,&argv))
{
my_end(0);
@@ -1885,22 +1927,9 @@
tmpbuf.alloc(65535);
tmpbuf.length(0);
buffer.length(0);
- size_t clen;
- do
- {
- line= my_cgets((char*)tmpbuf.ptr(), tmpbuf.alloced_length()-1, &clen);
- buffer.append(line, clen);
- /*
- if we got buffer fully filled than there is a chance that
- something else is still in console input buffer
- */
- } while (tmpbuf.alloced_length() <= clen);
- /*
- An empty line is returned from my_cgets when there's error reading :
- Ctrl-c for example
- */
- if (line)
- line= buffer.c_ptr();
+ line= my_win_console_readline(charset_info,
+ (char *) tmpbuf.ptr(),
+ tmpbuf.alloced_length());
#else
if (opt_outfile)
fputs(prompt, OUTFILE);
@@ -3455,19 +3484,12 @@
grid. (The \0 is also the reason we can't use fprintf() .)
*/
unsigned int i;
- const char *p;
if (right_justified)
for (i= data_length; i < total_bytes_to_send; i++)
tee_putc((int)' ', PAGER);
- for (i= 0, p= data; i < data_length; i+= 1, p+= 1)
- {
- if (*p == '\0')
- tee_putc((int)' ', PAGER);
- else
- tee_putc((int)*p, PAGER);
- }
+ tee_write(PAGER, data, data_length, MY_PRINT_SPS_0 | MY_PRINT_MB);
if (! right_justified)
for (i= data_length; i < total_bytes_to_send; i++)
@@ -3587,16 +3609,7 @@
tee_fprintf(PAGER, "%*s: ",(int) max_length,field->name);
if (cur[off])
{
- unsigned int i;
- const char *p;
-
- for (i= 0, p= cur[off]; i < lengths[off]; i+= 1, p+= 1)
- {
- if (*p == '\0')
- tee_putc((int)' ', PAGER);
- else
- tee_putc((int)*p, PAGER);
- }
+ tee_write(PAGER, cur[off], lengths[off], MY_PRINT_SPS_0 | MY_PRINT_MB);
tee_putc('\n', PAGER);
}
else
@@ -3666,16 +3679,7 @@
if (!src)
tee_fputs("NULL", PAGER);
else
- {
- for (const char *p = src; length; p++, length--)
- {
- const char *t;
- if ((t = array_value(xmlmeta, *p)))
- tee_fputs(t, PAGER);
- else
- tee_putc(*p, PAGER);
- }
- }
+ tee_write(PAGER, src, length, MY_PRINT_XML | MY_PRINT_MB);
}
@@ -3686,37 +3690,9 @@
tee_fputs("NULL", PAGER);
else
{
- if (opt_raw_data)
- {
- unsigned long i;
- /* Can't use tee_fputs(), it stops with NUL characters. */
- for (i= 0; i < length; i++, pos++)
- tee_putc(*pos, PAGER);
- }
- else for (const char *end=pos+length ; pos != end ; pos++)
- {
-#ifdef USE_MB
- int l;
- if (use_mb(charset_info) &&
- (l = my_ismbchar(charset_info, pos, end)))
- {
- while (l--)
- tee_putc(*pos++, PAGER);
- pos--;
- continue;
- }
-#endif
- if (!*pos)
- tee_fputs("\\0", PAGER); // This makes everything hard
- else if (*pos == '\t')
- tee_fputs("\\t", PAGER); // This would destroy tab format
- else if (*pos == '\n')
- tee_fputs("\\n", PAGER); // This too
- else if (*pos == '\\')
- tee_fputs("\\\\", PAGER);
- else
- tee_putc(*pos, PAGER);
- }
+ int flags= MY_PRINT_MB | (opt_raw_data ? 0 : (MY_PRINT_ESC_0 | MY_PRINT_CTRL));
+ /* Can't use tee_fputs(), it stops with NUL characters. */
+ tee_write(PAGER, pos, length, flags);
}
}
@@ -4317,7 +4293,29 @@
mysql_options(&mysql, MYSQL_INIT_COMMAND, init_command);
}
- mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
+ mysql_set_character_set(&mysql, default_charset);
+#ifdef __WIN__
+ uint cnv_errors;
+ String converted_database, converted_user;
+ if (!my_charset_same(&my_charset_utf8mb4_bin, mysql.charset))
+ {
+ /* Convert user and database from UTF8MB4 to connection character set */
+ if (user)
+ {
+ converted_user.copy(user, strlen(user) + 1,
+ &my_charset_utf8mb4_bin, mysql.charset,
+ &cnv_errors);
+ user= (char *) converted_user.ptr();
+ }
+ if (database)
+ {
+ converted_database.copy(database, strlen(database) + 1,
+ &my_charset_utf8mb4_bin, mysql.charset,
+ &cnv_errors);
+ database= (char *) converted_database.ptr();
+ }
+ }
+#endif
if (opt_plugin_dir && *opt_plugin_dir)
mysql_options(&mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir);
@@ -4339,7 +4337,38 @@
}
return -1; // Retryable
}
-
+
+#ifdef __WIN__
+ /* Convert --execute buffer from UTF8MB4 to connection character set */
+ if (!execute_buffer_conversion_done++ &&
+ status.line_buff &&
+ !status.line_buff->file && /* Convert only -e buffer, not real file */
+ status.line_buff->buffer < status.line_buff->end && /* Non-empty
*/
+ !my_charset_same(&my_charset_utf8mb4_bin, mysql.charset))
+ {
+ String tmp;
+ size_t len= status.line_buff->end - status.line_buff->buffer;
+ uint dummy_errors;
+ /*
+ Don't convert trailing '\n' character - it was appended during
+ last batch_readline_command() call.
+ Oherwise we'll get an extra line, which makes some tests fail.
+ */
+ if (status.line_buff->buffer[len - 1] == '\n')
+ len--;
+ if (tmp.copy(status.line_buff->buffer, len,
+ &my_charset_utf8mb4_bin, mysql.charset, &dummy_errors))
+ return 1;
+
+ /* Free the old line buffer */
+ batch_readline_end(status.line_buff);
+
+ /* Re-initialize line buffer from the converted string */
+ if (!(status.line_buff= batch_readline_command(NULL, (char *) tmp.c_ptr_safe())))
+ return 1;
+ }
+#endif /* __WIN__ */
+
charset_info= mysql.charset;
connected=1;
@@ -4645,11 +4674,82 @@
}
+/**
+ Write data to a stream.
+ Various modes, corresponding to --tab, --xml, --raw parameters,
+ are supported.
+
+ @param file Stream to write to
+ @param s String to write
+ @param slen String length
+ @flags Flags for --tab, --xml, --raw.
+*/
+void tee_write(FILE *file, const char *s, size_t slen, int flags)
+{
+#ifdef __WIN__
+ my_bool is_console= my_win_is_console_cached(file);
+#endif
+ const char *se;
+ for (se= s + slen; s < se; s++)
+ {
+ const char *t;
+
+ if (flags & MY_PRINT_MB)
+ {
+ int mblen;
+ if (use_mb(charset_info) &&
+ (mblen= my_ismbchar(charset_info, s, se)))
+ {
+#ifdef __WIN__
+ if (is_console)
+ my_win_console_write(charset_info, s, mblen);
+ else
+#endif
+ fwrite(s, 1, mblen, file);
+ if (opt_outfile)
+ fwrite(s, 1, mblen, OUTFILE);
+ s+= mblen - 1;
+ continue;
+ }
+ }
+
+ if ((flags & MY_PRINT_XML) && (t= array_value(xmlmeta, *s)))
+ tee_fputs(t, file);
+ else if ((flags & MY_PRINT_SPS_0) && *s == '\0')
+ tee_putc((int) ' ', file); // This makes everything hard
+ else if ((flags & MY_PRINT_ESC_0) && *s == '\0')
+ tee_fputs("\\0", file); // This makes everything hard
+ else if ((flags & MY_PRINT_CTRL) && *s == '\t')
+ tee_fputs("\\t", file); // This would destroy tab format
+ else if ((flags & MY_PRINT_CTRL) && *s == '\n')
+ tee_fputs("\\n", file); // This too
+ else if ((flags & MY_PRINT_CTRL) && *s == '\\')
+ tee_fputs("\\\\", file);
+ else
+ {
+#ifdef __WIN__
+ if (is_console)
+ my_win_console_putc(charset_info, (int) *s);
+ else
+#endif
+ putc((int) *s, file);
+ if (opt_outfile)
+ putc((int) *s, OUTFILE);
+ }
+ }
+}
+
+
void tee_fprintf(FILE *file, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
+#ifdef __WIN__
+ if (my_win_is_console_cached(file))
+ my_win_console_vfprintf(charset_info, fmt, args);
+ else
+#endif
(void) vfprintf(file, fmt, args);
va_end(args);
@@ -4662,8 +4762,20 @@
}
+/*
+ Write a 0-terminated string to file and OUTFILE.
+ TODO: possibly it's nice to have a version with length some day,
+ e.g. tee_fnputs(s, slen, file),
+ to print numerous ASCII constant strings among mysql.cc
+ code, to avoid strlen(s) in my_win_console_fputs().
+*/
void tee_fputs(const char *s, FILE *file)
{
+#ifdef __WIN__
+ if (my_win_is_console_cached(file))
+ my_win_console_fputs(charset_info, s);
+ else
+#endif
fputs(s, file);
if (opt_outfile)
fputs(s, OUTFILE);
@@ -4672,17 +4784,17 @@
void tee_puts(const char *s, FILE *file)
{
- fputs(s, file);
- fputc('\n', file);
- if (opt_outfile)
- {
- fputs(s, OUTFILE);
- fputc('\n', OUTFILE);
- }
+ tee_fputs(s, file);
+ tee_putc('\n', file);
}
void tee_putc(int c, FILE *file)
{
+#ifdef __WIN__
+ if (my_win_is_console_cached(file))
+ my_win_console_putc(charset_info, c);
+ else
+#endif
putc(c, file);
if (opt_outfile)
putc(c, OUTFILE);
=== modified file 'client/mysqltest.cc'
--- client/mysqltest.cc 2011-01-26 20:13:31 +0000
+++ client/mysqltest.cc 2011-02-22 09:15:58 +0000
@@ -302,7 +302,7 @@
Q_ENABLE_WARNINGS, Q_DISABLE_WARNINGS,
Q_ENABLE_INFO, Q_DISABLE_INFO,
Q_ENABLE_METADATA, Q_DISABLE_METADATA,
- Q_EXEC, Q_DELIMITER,
+ Q_EXEC, Q_EXECW, Q_DELIMITER,
Q_DISABLE_ABORT_ON_ERROR, Q_ENABLE_ABORT_ON_ERROR,
Q_DISPLAY_VERTICAL_RESULTS, Q_DISPLAY_HORIZONTAL_RESULTS,
Q_QUERY_VERTICAL, Q_QUERY_HORIZONTAL, Q_SORTED_RESULT,
@@ -373,6 +373,7 @@
"enable_metadata",
"disable_metadata",
"exec",
+ "execw",
"delimiter",
"disable_abort_on_error",
"enable_abort_on_error",
@@ -2750,8 +2751,52 @@
#endif
-FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode)
+FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode,
+ struct st_command *command)
{
+#if __WIN__
+ /*
+ --execw is for tests executing commands containing non-ASCII characters.
+
+ To correctly start such a program on Windows, we need to use the "wide"
+ version of popen, with prior translation of the command line from
+ the file character set to wide string. We use the current value
+ of --character_set as a file character set, so before using --execw
+ make sure to set --character_set properly.
+
+ If we use the non-wide version of popen, Windows internally
+ converts command line from the current ANSI code page to wide string.
+ In case when character set of the command line does not match the
+ current ANSI code page, non-ASCII characters get garbled in most cases.
+
+ On Linux, the command line passed to popen() is considered
+ as a binary string, no any internal to-wide and from-wide
+ character set conversion happens, so we don't need to do anything.
+ On Linux --execw is just a synonym to --exec.
+
+ For simplicity, assume that command line is limited to 4KB
+ (like in cmd.exe) and that mode at most 10 characters.
+ */
+ if (command->type == Q_EXECW)
+ {
+ wchar_t wcmd[4096];
+ wchar_t wmode[10];
+ const char *cmd= ds_cmd->str;
+ uint dummy_errors;
+ size_t len;
+ len= my_convert((char *) wcmd, sizeof(wcmd) - sizeof(wcmd[0]),
+ &my_charset_utf16le_bin,
+ ds_cmd->str, strlen(ds_cmd->str), charset_info,
+ &dummy_errors);
+ wcmd[len / sizeof(wchar_t)]= 0;
+ len= my_convert((char *) wmode, sizeof(wmode) - sizeof(wmode[0]),
+ &my_charset_utf16le_bin,
+ mode, strlen(mode), charset_info, &dummy_errors);
+ wmode[len / sizeof(wchar_t)]= 0;
+ return _wpopen(wcmd, wmode);
+ }
+#endif /* __WIN__ */
+
#if defined __WIN__ && defined USE_CYGWIN
/* Dump the command into a sh script file and execute with popen */
str_to_file(tmp_sh_name, ds_cmd->str, ds_cmd->length);
@@ -2888,7 +2933,7 @@
DBUG_PRINT("info", ("Executing '%s' as '%s'",
command->first_argument, ds_cmd.str));
- if (!(res_file= my_popen(&ds_cmd, "r")) && command->abort_on_error)
+ if (!(res_file= my_popen(&ds_cmd, "r", command)) &&
command->abort_on_error)
{
dynstr_free(&ds_cmd);
die("popen(\"%s\", \"r\") failed", command->first_argument);
@@ -8763,6 +8808,7 @@
do_shutdown_server(command);
break;
case Q_EXEC:
+ case Q_EXECW:
do_exec(command);
command_executed++;
break;
=== modified file 'include/my_sys.h'
--- include/my_sys.h 2011-02-08 15:54:12 +0000
+++ include/my_sys.h 2011-02-17 12:39:50 +0000
@@ -946,9 +946,14 @@
void my_security_attr_free(SECURITY_ATTRIBUTES *sa);
/* implemented in my_conio.c */
-char* my_cgets(char *string, size_t clen, size_t* plen);
-
-#endif
+my_bool my_win_is_console(FILE *file);
+char *my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize);
+void my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen);
+void my_win_console_fputs(CHARSET_INFO *cs, const char *data);
+void my_win_console_putc(CHARSET_INFO *cs, int c);
+void my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args);
+int my_win_translate_command_line_args(CHARSET_INFO *cs, int *ac, char ***av);
+#endif /* __WIN__ */
#include <mysql/psi/psi.h>
=== modified file 'mysql-test/t/grant.test'
--- mysql-test/t/grant.test 2010-12-15 16:15:40 +0000
+++ mysql-test/t/grant.test 2011-02-17 09:41:06 +0000
@@ -1401,9 +1401,10 @@
#
# Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte
#
+--character_set utf8
set names utf8;
grant select on test.* to
ÑзеÑ_ÑзеÑ@localhost;
---exec $MYSQL --default-character-set=utf8
--user=ÑзеÑ_ÑзеÑ
-e "select user()"
+--execw $MYSQL --default-character-set=utf8
--user=ÑзеÑ_ÑзеÑ
-e "select user()"
revoke all on test.* from
ÑзеÑ_ÑзеÑ@localhost;
drop user
ÑзеÑ_ÑзеÑ@localhost;
--error ER_WRONG_STRING_LENGTH
=== modified file 'mysql-test/t/mysql.test'
--- mysql-test/t/mysql.test 2011-02-05 05:06:29 +0000
+++ mysql-test/t/mysql.test 2011-02-17 11:09:16 +0000
@@ -51,13 +51,14 @@
#
# Bug#17939 Wrong table format when using UTF8 strings
#
---exec $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as
'__tañgè Ããmé'"
2>&1
---exec $MYSQL --default-character-set=utf8 --table -e "SELECT
'__tañgè Ããmé' as 'John
Doe'" 2>&1
+--character_set utf8
+--execw $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as
'__tañgè Ããmé'"
2>&1
+--execw $MYSQL --default-character-set=utf8 --table -e "SELECT
'__tañgè Ããmé' as 'John
Doe'" 2>&1
#
# Bug#18265 -- mysql client: No longer right-justifies numeric columns
#
---exec $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k
char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values
('<----------------------->'); insert into t1 (k) values ('<-----'); insert into
t1 (k) values ('Τη
γλÏÏÏα'); insert into t1 (k)
values ('áá´ á·áá');
select * from t1; DROP TABLE t1;"
+--execw $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k
char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values
('<----------------------->'); insert into t1 (k) values ('<-----'); insert into
t1 (k) values ('Τη
γλÏÏÏα'); insert into t1 (k)
values ('áá´ á·áá');
select * from t1; DROP TABLE t1;"
#
# "DESCRIBE" commands may return strange NULLness flags.
=== modified file 'mysql-test/t/mysql_cp932.test'
--- mysql-test/t/mysql_cp932.test 2007-02-21 16:50:48 +0000
+++ mysql-test/t/mysql_cp932.test 2011-02-17 11:13:16 +0000
@@ -15,8 +15,9 @@
--exec $MYSQL --default-character-set=cp932 test -e "charset utf8;"
# its usage to switch internally in mysql to requested charset
---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select '\'; create
table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('\'); select * from
t1; drop table t1;"
---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select '\'"
---exec $MYSQL --default-character-set=utf8 test -e "/*charset cp932 */; set
character_set_client= cp932; select '\'"
---exec $MYSQL --default-character-set=utf8 test -e "/*!\C cp932 */; set
character_set_client= cp932; select '\'"
+--character_set latin1
+--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select '\';
create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('\'); select
* from t1; drop table t1;"
+--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select '\'"
+--execw $MYSQL --default-character-set=latin1 test -e "/*charset cp932 */; set names
cp932, character_set_results=utf8; select '\'"
+--execw $MYSQL --default-character-set=latin1 test -e "/*!\C cp932 */; set
character_set_client= cp932; select '\'"
=== modified file 'mysql-test/t/mysqlbinlog-cp932.test'
--- mysql-test/t/mysqlbinlog-cp932.test 2009-09-07 05:42:54 +0000
+++ mysql-test/t/mysqlbinlog-cp932.test 2011-02-17 09:16:34 +0000
@@ -10,8 +10,10 @@
# Bug#16217 (mysql client did not know how not switch its internal charset)
create table t3 (f text character set utf8);
create table t4 (f text character set cp932);
---exec $MYSQL --default-character-set=utf8 test -e "insert into t3
values(_utf8'ã½')"
---exec $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'\');"
+--character_set utf8
+--execw $MYSQL --default-character-set=utf8 test -e "insert into t3
values(_utf8'ã½')"
+--character_set cp932
+--execw $MYSQL --default-character-set=cp932 test -e "insert into t4
values(_cp932'\');"
flush logs;
rename table t3 to t03, t4 to t04;
let $MYSQLD_DATADIR= `select @@datadir`;
=== modified file 'mysys/my_conio.c'
--- mysys/my_conio.c 2009-02-13 16:41:47 +0000
+++ mysys/my_conio.c 2011-02-22 11:59:53 +0000
@@ -18,205 +18,260 @@
#ifdef __WIN__
-static HANDLE my_coninpfh= 0; /* console input */
-
-/*
- functions my_pthread_auto_mutex_lock & my_pthread_auto_mutex_free
- are experimental at this moment, they are intended to bring
- ability of protecting code sections without necessity to explicitly
- initialize synchronization object in one of threads
-
- if found useful they are to be exported in mysys
-*/
-
-
-/*
- int my_pthread_auto_mutex_lock(HANDLE* ph, const char* name,
- int id, int time)
- NOTES
- creates a mutex with given name and tries to lock it time msec.
- mutex name is appended with id to allow system wide or process wide
- locks. Handle to created mutex returned in ph argument.
-
- RETURN
- 0 thread owns mutex
- <>0 error
-*/
-
-static
-int my_pthread_auto_mutex_lock(HANDLE* ph, const char* name, int id, int time)
-{
- int res;
- char tname[FN_REFLEN];
-
- sprintf(tname, "%s-%08X", name, id);
-
- *ph= CreateMutex(NULL, FALSE, tname);
- if (*ph == NULL)
- return GetLastError();
-
- res= WaitForSingleObject(*ph, time);
-
- if (res == WAIT_TIMEOUT)
- return ERROR_SEM_TIMEOUT;
-
- if (res == WAIT_FAILED)
- return GetLastError();
-
- return 0;
-}
-
-/*
- int my_pthread_auto_mutex_free(HANDLE* ph)
-
- NOTES
- releases a mutex.
-
- RETURN
- 0 thread released mutex
- <>0 error
-
-*/
-static
-int my_pthread_auto_mutex_free(HANDLE* ph)
-{
- if (*ph)
- {
- ReleaseMutex(*ph);
- CloseHandle(*ph);
- *ph= NULL;
- }
-
- return 0;
-}
-
-
-#define pthread_auto_mutex_decl(name) \
- HANDLE __h##name= NULL;
-
-#define pthread_auto_mutex_lock(name, proc, time) \
- my_pthread_auto_mutex_lock(&__h##name, #name, (proc), (time))
-
-#define pthread_auto_mutex_free(name) \
- my_pthread_auto_mutex_free(&__h##name)
-
-
-/*
- char* my_cgets()
-
- NOTES
- Replaces _cgets from libc to support input of more than 255 chars.
- Reads from the console via ReadConsole into buffer which
- should be at least clen characters.
- Actual length of string returned in plen.
-
- WARNING
- my_cgets() does NOT check the pushback character buffer (i.e., _chbuf).
- Thus, my_cgets() will not return any character that is pushed back by
- the _ungetch() call.
-
- RETURN
- string pointer ok
- NULL Error
-
-*/
-
-char* my_cgets(char *buffer, size_t clen, size_t* plen)
-{
- ULONG state;
- char *result;
- DWORD plen_res;
- CONSOLE_SCREEN_BUFFER_INFO csbi;
-
- pthread_auto_mutex_decl(my_conio_cs);
-
- /* lock the console for the current process*/
- if (pthread_auto_mutex_lock(my_conio_cs, GetCurrentProcessId(), INFINITE))
- {
- /* can not lock console */
- pthread_auto_mutex_free(my_conio_cs);
- return NULL;
- }
-
- /* init console input */
- if (my_coninpfh == 0)
- {
- /* same handle will be used until process termination */
- my_coninpfh= CreateFile("CONIN$", GENERIC_READ | GENERIC_WRITE,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- NULL, OPEN_EXISTING, 0, NULL);
- }
-
- if (my_coninpfh == INVALID_HANDLE_VALUE)
- {
- /* unlock the console */
- pthread_auto_mutex_free(my_conio_cs);
- return(NULL);
- }
-
- GetConsoleMode((HANDLE)my_coninpfh, &state);
- SetConsoleMode((HANDLE)my_coninpfh, ENABLE_LINE_INPUT |
- ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
-
- GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
-
- /*
- there is no known way to determine allowed buffer size for input
- though it is known it should not be more than 64K
- so we cut 64K and try first size of screen buffer
- if it is still to large we cut half of it and try again
- later we may want to cycle from min(clen, 65535) to allowed size
- with small decrement to determine exact allowed buffer
- */
- clen= min(clen, 65535);
- do
- {
- clen= min(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y);
- if (!ReadConsole((HANDLE)my_coninpfh, (LPVOID)buffer, (DWORD) clen - 1,
&plen_res,
- NULL))
- {
- result= NULL;
- clen>>= 1;
+
+/* Windows console handling */
+
+/* Maximum line length on Windows console */
+#define MAX_CONSOLE_LINE_SIZE 65535
+
+/**
+ Determine if a file is a windows console
+
+ @param file Input stream
+
+ @return
+ @retval 0 if file is not Windows console
+ @retval 1 if file is Windows console
+*/
+my_bool
+my_win_is_console(FILE *file)
+{
+ DWORD mode;
+ if (GetConsoleMode((HANDLE) _get_osfhandle(_fileno(file)), &mode))
+ return 1;
+ return 0;
+}
+
+
+/**
+ Read line from Windows console using Unicode API
+ and translate input to session character set.
+ Note, as Windows API breaks supplementary characters
+ into two wchar_t pieces, we cannot read and convert individual
+ wchar_t values separately. So let's use a buffer for
+ Unicode console input, and then convert it to "cs" in a single shot.
+ String is terminated with '\0' character.
+
+ @param cs Character string to convert to.
+ @param mbbuf Write input data here.
+ @param mbbufsize Number of bytes available in mbbuf.
+
+ @rerval Pointer to mbbuf, or NULL on I/0 error.
+*/
+char *
+my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize)
+{
+ uint dummy_errors;
+ static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1], *pos;
+ size_t mblen;
+ DWORD console_mode;
+ HANDLE console= GetStdHandle(STD_INPUT_HANDLE);
+
+ DBUG_ASSERT(mbbufsize > 0); /* Need space for at least trailing '\0' */
+ GetConsoleMode(console, &console_mode);
+ SetConsoleMode(console, ENABLE_LINE_INPUT |
+ ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
+ for(pos= u16buf; pos < &u16buf[MAX_CONSOLE_LINE_SIZE] ; )
+ {
+ DWORD nchars;
+ if (!ReadConsoleW(console, pos, 1, &nchars, NULL) || nchars == 0)
+ {
+ SetConsoleMode(console, console_mode);
+ return NULL;
+ }
+ if (*pos == L'\r') /* We don't need '\r' in the result string, skip it */
+ continue;
+ if (*pos == L'\n')
+ break;
+ pos++;
+ }
+ SetConsoleMode(console, console_mode);
+ /* Convert Unicode to session character set */
+ mblen= my_convert(mbbuf, mbbufsize - 1, cs,
+ (const char *) u16buf, (pos - u16buf) * sizeof(wchar_t),
+ &my_charset_utf16le_bin, &dummy_errors);
+ DBUG_ASSERT(mblen < mbbufsize); /* Safety */
+ mbbuf[mblen]= 0;
+ return mbbuf;
+}
+
+
+/**
+ Translate client charset to Windows wchars for console I/O.
+ Unlike copy_and_convert(), in case of a wrong multi-byte sequence
+ we don't print '?' character, we fallback to ISO-8859-1 instead.
+ This gives a better idea how binary data (e.g. BLOB) look like.
+
+ @param cs Character set of the input string
+ @param from Input string
+ @param from_length Length of the input string
+ @param to[OUT] Write Unicode data here
+ @param to_chars Number of characters available in "to"
+*/
+static size_t
+my_mbstou16s(CHARSET_INFO *cs, const uchar * from, size_t from_length,
+ wchar_t *to, size_t to_chars)
+{
+ CHARSET_INFO *to_cs= &my_charset_utf16le_bin;
+ const uchar *from_end= from + from_length;
+ wchar_t *to_orig= to, *to_end= to + to_chars;
+ my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
+ my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+ while (from < from_end)
+ {
+ int cnvres;
+ my_wc_t wc;
+ if ((cnvres= (*mb_wc)(cs, &wc, from, from_end)) > 0)
+ {
+ if (!wc)
+ break;
+ from+= cnvres;
+ }
+ else if (cnvres == MY_CS_ILSEQ)
+ {
+ wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */
+ from+= 1;
+ }
+ else if (cnvres > MY_CS_TOOSMALL)
+ {
+ /*
+ A correct multibyte sequence detected
+ But it doesn't have Unicode mapping.
+ */
+ wc= '?';
+ from+= (-cnvres); /* Note: cnvres is negative here */
+ }
+ else /* Incomplete character */
+ {
+ wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */
+ from+= 1;
+ }
+outp:
+ if ((cnvres= (*wc_mb)(to_cs, wc, (uchar *) to, (uchar *) to_end)) > 0)
+ {
+ /* We can never convert only a part of wchar_t */
+ DBUG_ASSERT((cnvres % sizeof(wchar_t)) == 0);
+ /* cnvres returns number of bytes, convert to number of wchar_t's */
+ to+= cnvres / sizeof(wchar_t);
+ }
+ else if (cnvres == MY_CS_ILUNI && wc != '?')
+ {
+ wc= '?';
+ goto outp;
}
else
- {
- result= buffer;
- break;
- }
+ break; /* Not enough space */
}
- while (GetLastError() == ERROR_NOT_ENOUGH_MEMORY);
- *plen= plen_res;
-
- /* We go here on error reading the string (Ctrl-C for example) */
- if (!*plen)
- result= NULL; /* purecov: inspected */
-
- if (result != NULL)
+ return to - to_orig;
+}
+
+
+/**
+ Write a string in the given character set to Windows console.
+ As Window breaks supplementary characters into two parts,
+ we cannot use a simple loop sending the result of
+ cs->cset->mb_wc() to console.
+ So we converts string from client charset to an array of wchar_t,
+ then write the array to console in a single shot.
+
+ @param cs Character set of the string
+ @param data String to print
+ @param datalen Length of input string in bytes
+*/
+void
+my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen)
+{
+ static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1];
+ size_t nchars= my_mbstou16s(cs, (const uchar *) data, datalen,
+ u16buf, sizeof(u16buf));
+ DWORD nwritten;
+ WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE),
+ u16buf, (DWORD) nchars, &nwritten, NULL);
+}
+
+
+/**
+ Write a single-byte character to console.
+ Note: one should not send parts of the same multi-byte character
+ in separate consequent my_win_console_putc() calls.
+ For multi-byte characters use my_win_colsole_write() instead.
+
+ @param cs Character set of the input character
+ @param c Character (single byte)
+*/
+void
+my_win_console_putc(CHARSET_INFO *cs, int c)
+{
+ char ch= (char) c;
+ my_win_console_write(cs, &ch, 1);
+}
+
+
+/**
+ Write a 0-terminated string to Windows console.
+
+ @param cs Character set of the string to print
+ @param data String to print
+*/
+void
+my_win_console_fputs(CHARSET_INFO *cs, const char *data)
+{
+ my_win_console_write(cs, data, strlen(data));
+}
+
+
+/*
+ Handle formatted output on the Windows console.
+*/
+void
+my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args)
+{
+ static char buff[MAX_CONSOLE_LINE_SIZE + 1];
+ size_t len= vsnprintf(buff, sizeof(buff) - 1, fmt, args);
+ my_win_console_write(cs, buff, len);
+}
+
+
+#include <shellapi.h>
+
+/**
+ Translate Unicode command line parameters to the given character set
+ (Typically to utf8mb4).
+ Translated parameters are allocated using my_once_alloc().
+
+ @param tocs Character set to convert parameters to.
+ @param[OUT] argc Write number of parameters here
+ @param[OUT] argv Write pointer to allocated parameters here.
+*/
+int
+my_win_translate_command_line_args(CHARSET_INFO *cs, int *argc, char ***argv)
+{
+ int i, ac;
+ char **av;
+ wchar_t *command_line= GetCommandLineW();
+ wchar_t **wargs= CommandLineToArgvW(command_line, &ac);
+ size_t nbytes= (ac + 1) * sizeof(char *);
+
+ /* Allocate new command line parameter */
+ av= (char **) my_once_alloc(nbytes, MYF(MY_ZEROFILL));
+
+ for(i= 0; i < *argc; i++)
{
- if (*plen > 1 && buffer[*plen - 2] == '\r')
- {
- *plen= *plen - 2;
- }
- else
- {
- if (*plen > 0 && buffer[*plen - 1] == '\r')
- {
- char tmp[3];
- int tmplen= sizeof(tmp);
-
- *plen= *plen - 1;
- /* read /n left in the buffer */
- ReadConsole((HANDLE)my_coninpfh, (LPVOID)tmp, tmplen, &tmplen, NULL);
- }
- }
- buffer[*plen]= '\0';
+ uint dummy_errors;
+ size_t arg_len= wcslen(wargs[i]);
+ size_t len, alloced_len= arg_len * cs->mbmaxlen + 1;
+ av[i]= (char *) my_once_alloc(alloced_len, MYF(0));
+ len= my_convert(av[i], alloced_len, cs,
+ (const char *) wargs[i], arg_len * sizeof(wchar_t),
+ &my_charset_utf16le_bin, &dummy_errors);
+ DBUG_ASSERT(len < alloced_len);
+ av[i][len]= '\0';
}
-
- SetConsoleMode((HANDLE)my_coninpfh, state);
- /* unlock the console */
- pthread_auto_mutex_free(my_conio_cs);
-
- return result;
+ *argv= av;
+ *argc= ac;
+ /* Cleanup on exit */
+ LocalFree((HLOCAL) wargs);
+ return 0;
}
#endif /* __WIN__ */
=== modified file 'sql-common/client.c'
--- sql-common/client.c 2011-01-31 15:55:58 +0000
+++ sql-common/client.c 2011-02-22 09:12:41 +0000
@@ -4246,11 +4246,31 @@
if (mysql->options.charset_dir)
charsets_dir= mysql->options.charset_dir;
+ if (!mysql->net.vio)
+ {
+ /* Initialize with automatic OS character set detection. */
+ mysql_options(mysql, MYSQL_SET_CHARSET_NAME, cs_name);
+ mysql_init_character_set(mysql);
+ /*
+ In case of automatic OS character set detection
+ mysql_init_character_set changes mysql->options.charset_name
+ from "auto" to the real character set name.
+ Reset cs_name to the detected character set name, accordingly.
+ */
+ cs_name= mysql->options.charset_name;
+ }
+
if (strlen(cs_name) < MY_CS_NAME_SIZE &&
(cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0))))
{
char buff[MY_CS_NAME_SIZE + 10];
charsets_dir= save_csdir;
+ if (!mysql->net.vio)
+ {
+ /* If there is no connection yet we don't send "SET NAMES" query */
+ mysql->charset= cs;
+ return 0;
+ }
/* Skip execution of "SET NAMES" for pre-4.1 servers */
if (mysql_get_server_version(mysql) < 40100)
return 0;
| Thread |
|---|
| • WL#5331 Unicode API for Windows command line | Alexander Barkov | 22 Feb |