From: Alexander Barkov Date: February 22 2011 12:24pm Subject: WL#5331 Unicode API for Window command line client List-Archive: http://lists.mysql.com/commits/131849 Message-Id: <4D63AAF4.9010500@oracle.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------020503080504050509000108" --------------020503080504050509000108 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit --------------020503080504050509000108 Content-Type: text/plain; name="w5331v4.diff" Content-Transfer-Encoding: 8bit Content-Disposition: inline; filename="w5331v4.diff" Version 4: - cached my_win_is_console_cached() added - tee_write improvements, not to call my_win_is_cosole() too often WL#5331 Support Unicode for Windows command line client Based on the original patch form Vladislav Vaintroub: http://lists.mysql.com/commits/105379 @ client/mysql.cc - introducing new function tee_write(), to reuse in a number of places where similar loops displaying data occurs. - introducing flags for tee_write(), to support different printing modes, according to --xml, --raw, --tab, etc, parameters - Instead if using argv (which is always in ANSI code page), we now use UTF16LE API to access command line arguments on Windows, using this scenario: a. We translate arguments to UTF8MB4 on startup. b. Then we process arguments and detect connection character set from --default-character-set arguments (or my.ini value), or from the OS localization information by default. c. Then we convert user, database and the --execute (-e) buffer from UTF8MB4 to the connection character set. d. Connect - Instead of printing using printif/fputs family functions on Windows, which are limited to the current DOS code page (cp850 on a Western machine) we now use UTF16LE console API through the new my_win_console_xxx() functions implemented in my_conio.c - Using mysql_set_character_set() instead of mysql_option(OPT_CHARACTER_SET_NAME) to know the ongoing session character set *before* mysql_real_connect() call, to convert user and database properly. - my_win_is_console_cached() has been added to cache my_win_is_console() result for stdout and stderr, for performance purposes. @ client/mysqltest.cc - Introducing a new mysqltest command: --execw, to execute commands with non-ASCII characters correctly in Windows. @ include/my_sys.h - Adding prototypes for the my_win_console_xxx() functions @ mysql-test/grant.test @ mysql-test/t/mysql.test @ mysql-test/t/mysql_cp932.test @ mysql-test/t/mysqlbinlog-cp932.test - Using --execw instead of --exec for the affected tests @ mysys/my_conio.c - Implementing functions for Windows console read/write and command line argument processing. - Removing my_cgets(), as it's not used any more. @ sql-common/client.c - Fixing mysql_set_character_set() to set mysql.charset on a non-connected "mysql". Previously such a call crashed. This change allows to know what character set for the ongoing session is going to be *before* calling mysql_real_connect(). This is needed in mysql.cc, to convert user and database correctly on Windows. === modified file 'client/mysql.cc' --- client/mysql.cc 2011-02-15 12:38:39 +0000 +++ client/mysql.cc 2011-02-22 11:51:31 +0000 @@ -195,6 +195,38 @@ const char *default_dbug_option="d:t:o,/tmp/mysql.trace"; +#ifdef __WIN__ +/* + A flag that indicates if --execute buffer has already been converted, + to avoid double conversion on reconnect. +*/ +static my_bool execute_buffer_conversion_done= 0; + +/* + my_win_is_console(...) is quite slow. + We cache my_win_is_console() results for stdout and stderr. + Any other output files, except stdout and stderr, + cannot be Windows console. +*/ +static uint win_is_console_cache= + (test(my_win_is_console(stdout)) << _fileno(stdout)) | + (test(my_win_is_console(stderr)) << _fileno(stderr)); + +static inline my_bool +my_win_is_console_cached(FILE *file) +{ + return win_is_console_cache & (1 << _fileno(file)); +} +#endif /* __WIN__ */ + +/* Various printing flags */ +#define MY_PRINT_ESC_0 1 /* Replace 0x00 bytes to "\0" */ +#define MY_PRINT_SPS_0 2 /* Replace 0x00 bytes to space */ +#define MY_PRINT_XML 4 /* Encode XML entities */ +#define MY_PRINT_MB 8 /* Recognize multi-byte characters */ +#define MY_PRINT_CTRL 16 /* Replace TAB, NL, CR to "\t", "\n", "\r" */ + +void tee_write(FILE *file, const char *s, size_t slen, int flags); void tee_fprintf(FILE *file, const char *fmt, ...); void tee_fputs(const char *s, FILE *file); void tee_puts(const char *s, FILE *file); @@ -1113,6 +1145,11 @@ close(stdout_fileno_copy); /* Clean up dup(). */ } +#ifdef __WIN__ + /* Convert command line parameters from UTF16LE to UTF8MB4. */ + my_win_translate_command_line_args(&my_charset_utf8mb4_bin, &argc, &argv); +#endif + if (load_defaults("my",load_default_groups,&argc,&argv)) { my_end(0); @@ -1885,22 +1922,9 @@ tmpbuf.alloc(65535); tmpbuf.length(0); buffer.length(0); - size_t clen; - do - { - line= my_cgets((char*)tmpbuf.ptr(), tmpbuf.alloced_length()-1, &clen); - buffer.append(line, clen); - /* - if we got buffer fully filled than there is a chance that - something else is still in console input buffer - */ - } while (tmpbuf.alloced_length() <= clen); - /* - An empty line is returned from my_cgets when there's error reading : - Ctrl-c for example - */ - if (line) - line= buffer.c_ptr(); + line= my_win_console_readline(charset_info, + (char *) tmpbuf.ptr(), + tmpbuf.alloced_length()); #else if (opt_outfile) fputs(prompt, OUTFILE); @@ -3455,19 +3479,12 @@ grid. (The \0 is also the reason we can't use fprintf() .) */ unsigned int i; - const char *p; if (right_justified) for (i= data_length; i < total_bytes_to_send; i++) tee_putc((int)' ', PAGER); - for (i= 0, p= data; i < data_length; i+= 1, p+= 1) - { - if (*p == '\0') - tee_putc((int)' ', PAGER); - else - tee_putc((int)*p, PAGER); - } + tee_write(PAGER, data, data_length, MY_PRINT_SPS_0 | MY_PRINT_MB); if (! right_justified) for (i= data_length; i < total_bytes_to_send; i++) @@ -3587,16 +3604,7 @@ tee_fprintf(PAGER, "%*s: ",(int) max_length,field->name); if (cur[off]) { - unsigned int i; - const char *p; - - for (i= 0, p= cur[off]; i < lengths[off]; i+= 1, p+= 1) - { - if (*p == '\0') - tee_putc((int)' ', PAGER); - else - tee_putc((int)*p, PAGER); - } + tee_write(PAGER, cur[off], lengths[off], MY_PRINT_SPS_0 | MY_PRINT_MB); tee_putc('\n', PAGER); } else @@ -3666,16 +3674,7 @@ if (!src) tee_fputs("NULL", PAGER); else - { - for (const char *p = src; length; p++, length--) - { - const char *t; - if ((t = array_value(xmlmeta, *p))) - tee_fputs(t, PAGER); - else - tee_putc(*p, PAGER); - } - } + tee_write(PAGER, src, length, MY_PRINT_XML | MY_PRINT_MB); } @@ -3686,37 +3685,9 @@ tee_fputs("NULL", PAGER); else { - if (opt_raw_data) - { - unsigned long i; - /* Can't use tee_fputs(), it stops with NUL characters. */ - for (i= 0; i < length; i++, pos++) - tee_putc(*pos, PAGER); - } - else for (const char *end=pos+length ; pos != end ; pos++) - { -#ifdef USE_MB - int l; - if (use_mb(charset_info) && - (l = my_ismbchar(charset_info, pos, end))) - { - while (l--) - tee_putc(*pos++, PAGER); - pos--; - continue; - } -#endif - if (!*pos) - tee_fputs("\\0", PAGER); // This makes everything hard - else if (*pos == '\t') - tee_fputs("\\t", PAGER); // This would destroy tab format - else if (*pos == '\n') - tee_fputs("\\n", PAGER); // This too - else if (*pos == '\\') - tee_fputs("\\\\", PAGER); - else - tee_putc(*pos, PAGER); - } + int flags= MY_PRINT_MB | (opt_raw_data ? 0 : (MY_PRINT_ESC_0 | MY_PRINT_CTRL)); + /* Can't use tee_fputs(), it stops with NUL characters. */ + tee_write(PAGER, pos, length, flags); } } @@ -4317,7 +4288,29 @@ mysql_options(&mysql, MYSQL_INIT_COMMAND, init_command); } - mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset); + mysql_set_character_set(&mysql, default_charset); +#ifdef __WIN__ + uint cnv_errors; + String converted_database, converted_user; + if (!my_charset_same(&my_charset_utf8mb4_bin, mysql.charset)) + { + /* Convert user and database from UTF8MB4 to connection character set */ + if (user) + { + converted_user.copy(user, strlen(user) + 1, + &my_charset_utf8mb4_bin, mysql.charset, + &cnv_errors); + user= (char *) converted_user.ptr(); + } + if (database) + { + converted_database.copy(database, strlen(database) + 1, + &my_charset_utf8mb4_bin, mysql.charset, + &cnv_errors); + database= (char *) converted_database.ptr(); + } + } +#endif if (opt_plugin_dir && *opt_plugin_dir) mysql_options(&mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir); @@ -4339,7 +4332,38 @@ } return -1; // Retryable } - + +#ifdef __WIN__ + /* Convert --execute buffer from UTF8MB4 to connection character set */ + if (!execute_buffer_conversion_done++ && + status.line_buff && + !status.line_buff->file && /* Convert only -e buffer, not real file */ + status.line_buff->buffer < status.line_buff->end && /* Non-empty */ + !my_charset_same(&my_charset_utf8mb4_bin, mysql.charset)) + { + String tmp; + size_t len= status.line_buff->end - status.line_buff->buffer; + uint dummy_errors; + /* + Don't convert trailing '\n' character - it was appended during + last batch_readline_command() call. + Oherwise we'll get an extra line, which makes some tests fail. + */ + if (status.line_buff->buffer[len - 1] == '\n') + len--; + if (tmp.copy(status.line_buff->buffer, len, + &my_charset_utf8mb4_bin, mysql.charset, &dummy_errors)) + return 1; + + /* Free the old line buffer */ + batch_readline_end(status.line_buff); + + /* Re-initialize line buffer from the converted string */ + if (!(status.line_buff= batch_readline_command(NULL, (char *) tmp.c_ptr_safe()))) + return 1; + } +#endif /* __WIN__ */ + charset_info= mysql.charset; connected=1; @@ -4645,11 +4669,82 @@ } +/** + Write data to a stream. + Various modes, corresponding to --tab, --xml, --raw parameters, + are supported. + + @param file Stream to write to + @param s String to write + @param slen String length + @flags Flags for --tab, --xml, --raw. +*/ +void tee_write(FILE *file, const char *s, size_t slen, int flags) +{ +#ifdef __WIN__ + my_bool is_console= my_win_is_console_cached(file); +#endif + const char *se; + for (se= s + slen; s < se; s++) + { + const char *t; + + if (flags & MY_PRINT_MB) + { + int mblen; + if (use_mb(charset_info) && + (mblen= my_ismbchar(charset_info, s, se))) + { +#ifdef __WIN__ + if (is_console) + my_win_console_write(charset_info, s, mblen); + else +#endif + fwrite(s, 1, mblen, file); + if (opt_outfile) + fwrite(s, 1, mblen, OUTFILE); + s+= mblen - 1; + continue; + } + } + + if ((flags & MY_PRINT_XML) && (t= array_value(xmlmeta, *s))) + tee_fputs(t, file); + else if ((flags & MY_PRINT_SPS_0) && *s == '\0') + tee_putc((int) ' ', file); // This makes everything hard + else if ((flags & MY_PRINT_ESC_0) && *s == '\0') + tee_fputs("\\0", file); // This makes everything hard + else if ((flags & MY_PRINT_CTRL) && *s == '\t') + tee_fputs("\\t", file); // This would destroy tab format + else if ((flags & MY_PRINT_CTRL) && *s == '\n') + tee_fputs("\\n", file); // This too + else if ((flags & MY_PRINT_CTRL) && *s == '\\') + tee_fputs("\\\\", file); + else + { +#ifdef __WIN__ + if (is_console) + my_win_console_putc(charset_info, (int) *s); + else +#endif + putc((int) *s, file); + if (opt_outfile) + putc((int) *s, OUTFILE); + } + } +} + + void tee_fprintf(FILE *file, const char *fmt, ...) { va_list args; va_start(args, fmt); +#ifdef __WIN__ + if (my_win_is_console_cached(file)) + my_win_console_vfprintf(charset_info, fmt, args); + else +#endif (void) vfprintf(file, fmt, args); va_end(args); @@ -4664,6 +4759,11 @@ void tee_fputs(const char *s, FILE *file) { +#ifdef __WIN__ + if (my_win_is_console_cached(file)) + my_win_console_fputs(charset_info, s); + else +#endif fputs(s, file); if (opt_outfile) fputs(s, OUTFILE); @@ -4672,17 +4772,17 @@ void tee_puts(const char *s, FILE *file) { - fputs(s, file); - fputc('\n', file); - if (opt_outfile) - { - fputs(s, OUTFILE); - fputc('\n', OUTFILE); - } + tee_fputs(s, file); + tee_putc('\n', file); } void tee_putc(int c, FILE *file) { +#ifdef __WIN__ + if (my_win_is_console_cached(file)) + my_win_console_putc(charset_info, c); + else +#endif putc(c, file); if (opt_outfile) putc(c, OUTFILE); === modified file 'client/mysqltest.cc' --- client/mysqltest.cc 2011-01-26 20:13:31 +0000 +++ client/mysqltest.cc 2011-02-22 09:15:58 +0000 @@ -302,7 +302,7 @@ Q_ENABLE_WARNINGS, Q_DISABLE_WARNINGS, Q_ENABLE_INFO, Q_DISABLE_INFO, Q_ENABLE_METADATA, Q_DISABLE_METADATA, - Q_EXEC, Q_DELIMITER, + Q_EXEC, Q_EXECW, Q_DELIMITER, Q_DISABLE_ABORT_ON_ERROR, Q_ENABLE_ABORT_ON_ERROR, Q_DISPLAY_VERTICAL_RESULTS, Q_DISPLAY_HORIZONTAL_RESULTS, Q_QUERY_VERTICAL, Q_QUERY_HORIZONTAL, Q_SORTED_RESULT, @@ -373,6 +373,7 @@ "enable_metadata", "disable_metadata", "exec", + "execw", "delimiter", "disable_abort_on_error", "enable_abort_on_error", @@ -2750,8 +2751,52 @@ #endif -FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode) +FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode, + struct st_command *command) { +#if __WIN__ + /* + --execw is for tests executing commands containing non-ASCII characters. + + To correctly start such a program on Windows, we need to use the "wide" + version of popen, with prior translation of the command line from + the file character set to wide string. We use the current value + of --character_set as a file character set, so before using --execw + make sure to set --character_set properly. + + If we use the non-wide version of popen, Windows internally + converts command line from the current ANSI code page to wide string. + In case when character set of the command line does not match the + current ANSI code page, non-ASCII characters get garbled in most cases. + + On Linux, the command line passed to popen() is considered + as a binary string, no any internal to-wide and from-wide + character set conversion happens, so we don't need to do anything. + On Linux --execw is just a synonym to --exec. + + For simplicity, assume that command line is limited to 4KB + (like in cmd.exe) and that mode at most 10 characters. + */ + if (command->type == Q_EXECW) + { + wchar_t wcmd[4096]; + wchar_t wmode[10]; + const char *cmd= ds_cmd->str; + uint dummy_errors; + size_t len; + len= my_convert((char *) wcmd, sizeof(wcmd) - sizeof(wcmd[0]), + &my_charset_utf16le_bin, + ds_cmd->str, strlen(ds_cmd->str), charset_info, + &dummy_errors); + wcmd[len / sizeof(wchar_t)]= 0; + len= my_convert((char *) wmode, sizeof(wmode) - sizeof(wmode[0]), + &my_charset_utf16le_bin, + mode, strlen(mode), charset_info, &dummy_errors); + wmode[len / sizeof(wchar_t)]= 0; + return _wpopen(wcmd, wmode); + } +#endif /* __WIN__ */ + #if defined __WIN__ && defined USE_CYGWIN /* Dump the command into a sh script file and execute with popen */ str_to_file(tmp_sh_name, ds_cmd->str, ds_cmd->length); @@ -2888,7 +2933,7 @@ DBUG_PRINT("info", ("Executing '%s' as '%s'", command->first_argument, ds_cmd.str)); - if (!(res_file= my_popen(&ds_cmd, "r")) && command->abort_on_error) + if (!(res_file= my_popen(&ds_cmd, "r", command)) && command->abort_on_error) { dynstr_free(&ds_cmd); die("popen(\"%s\", \"r\") failed", command->first_argument); @@ -8763,6 +8808,7 @@ do_shutdown_server(command); break; case Q_EXEC: + case Q_EXECW: do_exec(command); command_executed++; break; === modified file 'include/my_sys.h' --- include/my_sys.h 2011-02-08 15:54:12 +0000 +++ include/my_sys.h 2011-02-17 12:39:50 +0000 @@ -946,9 +946,14 @@ void my_security_attr_free(SECURITY_ATTRIBUTES *sa); /* implemented in my_conio.c */ -char* my_cgets(char *string, size_t clen, size_t* plen); - -#endif +my_bool my_win_is_console(FILE *file); +char *my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize); +void my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen); +void my_win_console_fputs(CHARSET_INFO *cs, const char *data); +void my_win_console_putc(CHARSET_INFO *cs, int c); +void my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args); +int my_win_translate_command_line_args(CHARSET_INFO *cs, int *ac, char ***av); +#endif /* __WIN__ */ #include === modified file 'mysql-test/t/grant.test' --- mysql-test/t/grant.test 2010-12-15 16:15:40 +0000 +++ mysql-test/t/grant.test 2011-02-17 09:41:06 +0000 @@ -1401,9 +1401,10 @@ # # Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte # +--character_set utf8 set names utf8; grant select on test.* to юзер_юзер@localhost; ---exec $MYSQL --default-character-set=utf8 --user=юзер_юзер -e "select user()" +--execw $MYSQL --default-character-set=utf8 --user=юзер_юзер -e "select user()" revoke all on test.* from юзер_юзер@localhost; drop user юзер_юзер@localhost; --error ER_WRONG_STRING_LENGTH === modified file 'mysql-test/t/mysql.test' --- mysql-test/t/mysql.test 2011-02-05 05:06:29 +0000 +++ mysql-test/t/mysql.test 2011-02-17 11:09:16 +0000 @@ -51,13 +51,14 @@ # # Bug#17939 Wrong table format when using UTF8 strings # ---exec $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as '__tañgè Ñãmé'" 2>&1 ---exec $MYSQL --default-character-set=utf8 --table -e "SELECT '__tañgè Ñãmé' as 'John Doe'" 2>&1 +--character_set utf8 +--execw $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as '__tañgè Ñãmé'" 2>&1 +--execw $MYSQL --default-character-set=utf8 --table -e "SELECT '__tañgè Ñãmé' as 'John Doe'" 2>&1 # # Bug#18265 -- mysql client: No longer right-justifies numeric columns # ---exec $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values ('<----------------------->'); insert into t1 (k) values ('<-----'); insert into t1 (k) values ('Τη γλώσσα'); insert into t1 (k) values ('ᛖᚴ ᚷᛖᛏ'); select * from t1; DROP TABLE t1;" +--execw $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values ('<----------------------->'); insert into t1 (k) values ('<-----'); insert into t1 (k) values ('Τη γλώσσα'); insert into t1 (k) values ('ᛖᚴ ᚷᛖᛏ'); select * from t1; DROP TABLE t1;" # # "DESCRIBE" commands may return strange NULLness flags. === modified file 'mysql-test/t/mysql_cp932.test' --- mysql-test/t/mysql_cp932.test 2007-02-21 16:50:48 +0000 +++ mysql-test/t/mysql_cp932.test 2011-02-17 11:13:16 +0000 @@ -15,8 +15,9 @@ --exec $MYSQL --default-character-set=cp932 test -e "charset utf8;" # its usage to switch internally in mysql to requested charset ---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select '\'; create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('\'); select * from t1; drop table t1;" ---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select '\'" ---exec $MYSQL --default-character-set=utf8 test -e "/*charset cp932 */; set character_set_client= cp932; select '\'" ---exec $MYSQL --default-character-set=utf8 test -e "/*!\C cp932 */; set character_set_client= cp932; select '\'" +--character_set latin1 +--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select '\'; create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('\'); select * from t1; drop table t1;" +--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select '\'" +--execw $MYSQL --default-character-set=latin1 test -e "/*charset cp932 */; set names cp932, character_set_results=utf8; select '\'" +--execw $MYSQL --default-character-set=latin1 test -e "/*!\C cp932 */; set character_set_client= cp932; select '\'" === modified file 'mysql-test/t/mysqlbinlog-cp932.test' --- mysql-test/t/mysqlbinlog-cp932.test 2009-09-07 05:42:54 +0000 +++ mysql-test/t/mysqlbinlog-cp932.test 2011-02-17 09:16:34 +0000 @@ -10,8 +10,10 @@ # Bug#16217 (mysql client did not know how not switch its internal charset) create table t3 (f text character set utf8); create table t4 (f text character set cp932); ---exec $MYSQL --default-character-set=utf8 test -e "insert into t3 values(_utf8'ソ')" ---exec $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'\');" +--character_set utf8 +--execw $MYSQL --default-character-set=utf8 test -e "insert into t3 values(_utf8'ソ')" +--character_set cp932 +--execw $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'\');" flush logs; rename table t3 to t03, t4 to t04; let $MYSQLD_DATADIR= `select @@datadir`; === modified file 'mysys/my_conio.c' --- mysys/my_conio.c 2009-02-13 16:41:47 +0000 +++ mysys/my_conio.c 2011-02-22 11:53:33 +0000 @@ -18,205 +18,260 @@ #ifdef __WIN__ -static HANDLE my_coninpfh= 0; /* console input */ - -/* - functions my_pthread_auto_mutex_lock & my_pthread_auto_mutex_free - are experimental at this moment, they are intended to bring - ability of protecting code sections without necessity to explicitly - initialize synchronization object in one of threads - - if found useful they are to be exported in mysys -*/ - - -/* - int my_pthread_auto_mutex_lock(HANDLE* ph, const char* name, - int id, int time) - NOTES - creates a mutex with given name and tries to lock it time msec. - mutex name is appended with id to allow system wide or process wide - locks. Handle to created mutex returned in ph argument. - - RETURN - 0 thread owns mutex - <>0 error -*/ - -static -int my_pthread_auto_mutex_lock(HANDLE* ph, const char* name, int id, int time) -{ - int res; - char tname[FN_REFLEN]; - - sprintf(tname, "%s-%08X", name, id); - - *ph= CreateMutex(NULL, FALSE, tname); - if (*ph == NULL) - return GetLastError(); - - res= WaitForSingleObject(*ph, time); - - if (res == WAIT_TIMEOUT) - return ERROR_SEM_TIMEOUT; - - if (res == WAIT_FAILED) - return GetLastError(); - - return 0; -} - -/* - int my_pthread_auto_mutex_free(HANDLE* ph) - - NOTES - releases a mutex. - - RETURN - 0 thread released mutex - <>0 error - -*/ -static -int my_pthread_auto_mutex_free(HANDLE* ph) -{ - if (*ph) - { - ReleaseMutex(*ph); - CloseHandle(*ph); - *ph= NULL; - } - - return 0; -} - - -#define pthread_auto_mutex_decl(name) \ - HANDLE __h##name= NULL; - -#define pthread_auto_mutex_lock(name, proc, time) \ - my_pthread_auto_mutex_lock(&__h##name, #name, (proc), (time)) - -#define pthread_auto_mutex_free(name) \ - my_pthread_auto_mutex_free(&__h##name) - - -/* - char* my_cgets() - - NOTES - Replaces _cgets from libc to support input of more than 255 chars. - Reads from the console via ReadConsole into buffer which - should be at least clen characters. - Actual length of string returned in plen. - - WARNING - my_cgets() does NOT check the pushback character buffer (i.e., _chbuf). - Thus, my_cgets() will not return any character that is pushed back by - the _ungetch() call. - - RETURN - string pointer ok - NULL Error - -*/ - -char* my_cgets(char *buffer, size_t clen, size_t* plen) -{ - ULONG state; - char *result; - DWORD plen_res; - CONSOLE_SCREEN_BUFFER_INFO csbi; - - pthread_auto_mutex_decl(my_conio_cs); - - /* lock the console for the current process*/ - if (pthread_auto_mutex_lock(my_conio_cs, GetCurrentProcessId(), INFINITE)) - { - /* can not lock console */ - pthread_auto_mutex_free(my_conio_cs); - return NULL; - } - - /* init console input */ - if (my_coninpfh == 0) - { - /* same handle will be used until process termination */ - my_coninpfh= CreateFile("CONIN$", GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, 0, NULL); - } - - if (my_coninpfh == INVALID_HANDLE_VALUE) - { - /* unlock the console */ - pthread_auto_mutex_free(my_conio_cs); - return(NULL); - } - - GetConsoleMode((HANDLE)my_coninpfh, &state); - SetConsoleMode((HANDLE)my_coninpfh, ENABLE_LINE_INPUT | - ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT); - - GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); - - /* - there is no known way to determine allowed buffer size for input - though it is known it should not be more than 64K - so we cut 64K and try first size of screen buffer - if it is still to large we cut half of it and try again - later we may want to cycle from min(clen, 65535) to allowed size - with small decrement to determine exact allowed buffer - */ - clen= min(clen, 65535); - do - { - clen= min(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y); - if (!ReadConsole((HANDLE)my_coninpfh, (LPVOID)buffer, (DWORD) clen - 1, &plen_res, - NULL)) - { - result= NULL; - clen>>= 1; + +/* Windows console handling */ + +/* Maximum line length on Windows console */ +#define MAX_CONSOLE_LINE_SIZE 65535 + +/** + Determine if a file is a windows console + + @param file Input stream + + @return + @retval 0 if file is not Windows console + @retval 1 if file is Windows console +*/ +my_bool +my_win_is_console(FILE *file) +{ + DWORD mode; + if (GetConsoleMode((HANDLE) _get_osfhandle(_fileno(file)), &mode)) + return 1; + return 0; +} + + +/** + Read line from Windows console using Unicode API + and translate input to session character set. + Note, as Windows API breaks supplementary characters + into two wchar_t pieces, we cannot read and convert individual + wchar_t values separately. So let's use a buffer for + Unicode console input, and then convert it to "cs" in a single shot. + String is terminated with '\0' character. + + @param cs Character string to convert to. + @param mbbuf Write input data here. + @param mbbufsize Number of bytes available in mbbuf. + + @rerval Pointer to mbbuf, or NULL on I/0 error. +*/ +char * +my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize) +{ + uint dummy_errors; + static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1], *pos; + size_t mblen; + DWORD console_mode; + HANDLE console= GetStdHandle(STD_INPUT_HANDLE); + + DBUG_ASSERT(mbbufsize > 0); /* Need space for at least trailing '\0' */ + GetConsoleMode(console, &console_mode); + SetConsoleMode(console, ENABLE_LINE_INPUT | + ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT); + for(pos= u16buf; pos < &u16buf[MAX_CONSOLE_LINE_SIZE] ; ) + { + DWORD nchars; + if (!ReadConsoleW(console, pos, 1, &nchars, NULL) || nchars == 0) + { + SetConsoleMode(console, console_mode); + return NULL; + } + if (*pos == L'\r') /* We don't need '\r' in the result string, skip it */ + continue; + if (*pos == L'\n') + break; + pos++; + } + SetConsoleMode(console, console_mode); + /* Convert Unicode to session character set */ + mblen= my_convert(mbbuf, mbbufsize - 1, cs, + (const char *) u16buf, (pos - u16buf) * sizeof(wchar_t), + &my_charset_utf16le_bin, &dummy_errors); + DBUG_ASSERT(mblen < mbbufsize); /* Safety */ + mbbuf[mblen]= 0; + return mbbuf; +} + + +/** + Translate client charset to Windows wchars for console I/O. + Unlike copy_and_convert(), in case of a wrong multi-byte sequence + we don't print '?' character, we fallback to ISO-8859-1 instead. + This gives a better idea how binary data (e.g. BLOB) look like. + + @param cs Character set of the input string + @param from Input string + @param from_length Length of the input string + @param to[OUT] Write Unicode data here + @param to_chars Number of characters available in "to" +*/ +static size_t +my_mbstou16s(CHARSET_INFO *cs, const uchar * from, size_t from_length, + wchar_t *to, size_t to_chars) +{ + CHARSET_INFO *to_cs= &my_charset_utf16le_bin; + const uchar *from_end= from + from_length; + wchar_t *to_orig= to, *to_end= to + to_chars; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; + while (from < from_end) + { + int cnvres; + my_wc_t wc; + if ((cnvres= (*mb_wc)(cs, &wc, from, from_end)) > 0) + { + if (!wc) + break; + from+= cnvres; + } + else if (cnvres == MY_CS_ILSEQ) + { + wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */ + from+= 1; + } + else if (cnvres > MY_CS_TOOSMALL) + { + /* + A correct multibyte sequence detected + But it doesn't have Unicode mapping. + */ + from+= (-cnvres); + wc= '?'; + } + else /* Incomplete character */ + { + wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */ + from+= 1; + } +outp: + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar *) to, (uchar *) to_end)) > 0) + { + /* We can never convert only a part of wchar_t */ + DBUG_ASSERT((cnvres % sizeof(wchar_t)) == 0); + /* cnvres returns number of bytes, convert to number of wchar_t's */ + to+= cnvres / sizeof(wchar_t); + } + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + wc= '?'; + goto outp; } else - { - result= buffer; - break; - } + break; /* Not enough space */ } - while (GetLastError() == ERROR_NOT_ENOUGH_MEMORY); - *plen= plen_res; - - /* We go here on error reading the string (Ctrl-C for example) */ - if (!*plen) - result= NULL; /* purecov: inspected */ - - if (result != NULL) + return to - to_orig; +} + + +/** + Write a string in the given character set to Windows console. + As Window breaks supplementary characters into two parts, + we cannot use a simple loop sending the result of + cs->cset->mb_wc() to console. + So we converts string from client charset to an array of wchar_t, + then write the array to console in a single shot. + + @param cs Character set of the string + @param data String to print + @param datalen Length of input string in bytes +*/ +void +my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen) +{ + static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1]; + size_t nchars= my_mbstou16s(cs, (const uchar *) data, datalen, + u16buf, sizeof(u16buf)); + DWORD nwritten; + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), + u16buf, (DWORD) nchars, &nwritten, NULL); +} + + +/** + Write a single-byte character to console. + Note: one should not send parts of the same multi-byte character + in separate consequent my_win_console_putc() calls. + For multi-byte characters use my_win_colsole_write() instead. + + @param cs Character set of the input character + @param c Character (single byte) +*/ +void +my_win_console_putc(CHARSET_INFO *cs, int c) +{ + char ch= (char) c; + my_win_console_write(cs, &ch, 1); +} + + +/** + Write a 0-terminated string to Windows console. + + @param cs Character set of the string to print + @param data String to print +*/ +void +my_win_console_fputs(CHARSET_INFO *cs, const char *data) +{ + my_win_console_write(cs, data, strlen(data)); +} + + +/* + Handle formatted output on the Windows console. +*/ +void +my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args) +{ + static char buff[MAX_CONSOLE_LINE_SIZE + 1]; + size_t len= vsnprintf(buff, sizeof(buff) - 1, fmt, args); + my_win_console_write(cs, buff, len); +} + + +#include + +/** + Translate Unicode command line parameters to the given character set + (Typically to utf8mb4). + Translated parameters are allocated using my_once_alloc(). + + @param tocs Character set to convert parameters to. + @param[OUT] argc Write number of parameters here + @param[OUT] argv Write pointer to allocated parameters here. +*/ +int +my_win_translate_command_line_args(CHARSET_INFO *cs, int *argc, char ***argv) +{ + int i, ac; + char **av; + wchar_t *command_line= GetCommandLineW(); + wchar_t **wargs= CommandLineToArgvW(command_line, &ac); + size_t nbytes= (ac + 1) * sizeof(char *); + + /* Allocate new command line parameter */ + av= (char **) my_once_alloc(nbytes, MYF(MY_ZEROFILL)); + + for(i= 0; i < *argc; i++) { - if (*plen > 1 && buffer[*plen - 2] == '\r') - { - *plen= *plen - 2; - } - else - { - if (*plen > 0 && buffer[*plen - 1] == '\r') - { - char tmp[3]; - int tmplen= sizeof(tmp); - - *plen= *plen - 1; - /* read /n left in the buffer */ - ReadConsole((HANDLE)my_coninpfh, (LPVOID)tmp, tmplen, &tmplen, NULL); - } - } - buffer[*plen]= '\0'; + uint dummy_errors; + size_t arg_len= wcslen(wargs[i]); + size_t len, alloced_len= arg_len * cs->mbmaxlen + 1; + av[i]= (char *) my_once_alloc(alloced_len, MYF(0)); + len= my_convert(av[i], alloced_len, cs, + (const char *) wargs[i], arg_len * sizeof(wchar_t), + &my_charset_utf16le_bin, &dummy_errors); + DBUG_ASSERT(len < alloced_len); + av[i][len]= '\0'; } - - SetConsoleMode((HANDLE)my_coninpfh, state); - /* unlock the console */ - pthread_auto_mutex_free(my_conio_cs); - - return result; + *argv= av; + *argc= ac; + /* Cleanup on exit */ + LocalFree((HLOCAL) wargs); + return 0; } #endif /* __WIN__ */ === modified file 'sql-common/client.c' --- sql-common/client.c 2011-01-31 15:55:58 +0000 +++ sql-common/client.c 2011-02-22 09:12:41 +0000 @@ -4246,11 +4246,31 @@ if (mysql->options.charset_dir) charsets_dir= mysql->options.charset_dir; + if (!mysql->net.vio) + { + /* Initialize with automatic OS character set detection. */ + mysql_options(mysql, MYSQL_SET_CHARSET_NAME, cs_name); + mysql_init_character_set(mysql); + /* + In case of automatic OS character set detection + mysql_init_character_set changes mysql->options.charset_name + from "auto" to the real character set name. + Reset cs_name to the detected character set name, accordingly. + */ + cs_name= mysql->options.charset_name; + } + if (strlen(cs_name) < MY_CS_NAME_SIZE && (cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0)))) { char buff[MY_CS_NAME_SIZE + 10]; charsets_dir= save_csdir; + if (!mysql->net.vio) + { + /* If there is no connection yet we don't send "SET NAMES" query */ + mysql->charset= cs; + return 0; + } /* Skip execution of "SET NAMES" for pre-4.1 servers */ if (mysql_get_server_version(mysql) < 40100) return 0; --------------020503080504050509000108--