List:Commits« Previous MessageNext Message »
From:Alexander Barkov Date:February 17 2011 2:05pm
Subject:WL#5331 Unicode API for Windows command line client
View as plain text  
Version 3, with old API code removed.



WL#5331 Support Unicode for Windows command line client

Based on the original patch form Vladislav Vaintroub:
http://lists.mysql.com/commits/105379


  @ client/mysql.cc
  - introducing new function tee_write(), to reuse
    in a number of places where similar loops displaying
    data occurs.
  - introducing flags for tee_write(), to support different
    printing modes, according to --xml, --raw, --tab, etc,
    parameters
  - Instead if using argv (which is always in ANSI code page),
    we now use UTF16LE API to access command line arguments on Windows,
    using this scenario:
    a. We translate arguments to UTF8MB4 on startup.
    b. Then we process arguments and detect connection character set
       from --default-character-set arguments (or my.ini value),
       or from the OS localization information by default.
    c. Then we convert user, database and the --execute (-e) buffer
       from UTF8MB4 to the connection character set.
    d. Connect
  - Instead of printing using printif/fputs family functions on Windows,
    which are limited to the current DOS code page (cp850 on a Western machine)
    we now use UTF16LE console API through the new my_win_console_xxx() 
    functions implemented in my_conio.c
  - Using mysql_set_character_set() instead of
    mysql_option(OPT_CHARACTER_SET_NAME) to know the ongoing
    session character set *before* mysql_real_connect() call,
    to convert user and database properly.

  @ client/mysqltest.cc
  - Introducing a new mysqltest command: --execw, to 
    execute commands with non-ASCII characters correctly in Windows.


  @ include/my_sys.h
  - Adding prototypes for the my_win_console_xxx() functions

  @ mysql-test/grant.test
  @ mysql-test/t/mysql.test
  @ mysql-test/t/mysql_cp932.test
  @ mysql-test/t/mysqlbinlog-cp932.test
  - Using --execw instead of --exec for the affected tests

  @ mysys/my_conio.c
  - Implementing functions for Windows console read/write and
    command line argument processing.
  - Removing my_cgets(), as it's not used any more.

  @ sql-common/client.c
  - Fixing mysql_set_character_set() to set mysql.charset on
    a non-connected "mysql". Previously such a call crashed.
    This change allows to know what character set for the
    ongoing sessions is going to be *before* calling mysql_real_connect(),
    which is used in mysql.cc.

=== modified file 'client/mysql.cc'
--- client/mysql.cc	2011-02-15 12:38:39 +0000
+++ client/mysql.cc	2011-02-17 12:05:04 +0000
@@ -195,6 +195,22 @@
 
 const char *default_dbug_option="d:t:o,/tmp/mysql.trace";
 
+#ifdef __WIN__
+ /*
+   A flag that indicates if --execute buffer has already been converted,
+   to avoid double conversion on reconnect
+ */
+static my_bool conversion_done= 0;
+#endif /* __WIN__ */
+
+/* Various printing flags */
+#define MY_PRINT_ESC_0 1  /* Replace 0x00 bytes to "\0"              */
+#define MY_PRINT_SPS_0 2  /* Replace 0x00 bytes to space             */
+#define MY_PRINT_XML   4  /* Encode XML entities                     */
+#define MY_PRINT_MB    8  /* Recognize multi-byte characters         */
+#define MY_PRINT_CTRL 16  /* Replace TAB, NL, CR to "\t", "\n", "\r" */
+
+void tee_write(FILE *file, const char *s, size_t slen, int flags);
 void tee_fprintf(FILE *file, const char *fmt, ...);
 void tee_fputs(const char *s, FILE *file);
 void tee_puts(const char *s, FILE *file);
@@ -1113,6 +1129,11 @@
       close(stdout_fileno_copy);             /* Clean up dup(). */
   }
 
+#ifdef __WIN__
+  /* Convert command line parameters from UTF16LE to UTF8MB4. */
+  my_win_translate_command_line_args(&my_charset_utf8mb4_bin, &argc, &argv);
+#endif
+
   if (load_defaults("my",load_default_groups,&argc,&argv))
   {
     my_end(0);
@@ -1885,22 +1906,9 @@
         tmpbuf.alloc(65535);
       tmpbuf.length(0);
       buffer.length(0);
-      size_t clen;
-      do
-      {
-	line= my_cgets((char*)tmpbuf.ptr(), tmpbuf.alloced_length()-1, &clen);
-        buffer.append(line, clen);
-        /* 
-           if we got buffer fully filled than there is a chance that
-           something else is still in console input buffer
-        */
-      } while (tmpbuf.alloced_length() <= clen);
-      /* 
-        An empty line is returned from my_cgets when there's error reading :
-        Ctrl-c for example
-      */
-      if (line)
-        line= buffer.c_ptr();
+      line= my_win_console_readline(charset_info,
+                                    (char*) tmpbuf.ptr(),
+                                    tmpbuf.alloced_length());
 #else
       if (opt_outfile)
 	fputs(prompt, OUTFILE);
@@ -3455,19 +3463,12 @@
     grid.  (The \0 is also the reason we can't use fprintf() .) 
   */
   unsigned int i;
-  const char *p;
 
   if (right_justified) 
     for (i= data_length; i < total_bytes_to_send; i++)
       tee_putc((int)' ', PAGER);
 
-  for (i= 0, p= data; i < data_length; i+= 1, p+= 1)
-  {
-    if (*p == '\0')
-      tee_putc((int)' ', PAGER);
-    else
-      tee_putc((int)*p, PAGER);
-  }
+  tee_write(PAGER, data, data_length, MY_PRINT_SPS_0|MY_PRINT_MB);
 
   if (! right_justified) 
     for (i= data_length; i < total_bytes_to_send; i++)
@@ -3587,16 +3588,7 @@
         tee_fprintf(PAGER, "%*s: ",(int) max_length,field->name);
       if (cur[off])
       {
-        unsigned int i;
-        const char *p;
-
-        for (i= 0, p= cur[off]; i < lengths[off]; i+= 1, p+= 1)
-        {
-          if (*p == '\0')
-            tee_putc((int)' ', PAGER);
-          else
-            tee_putc((int)*p, PAGER);
-        }
+        tee_write(PAGER, cur[off], lengths[off], MY_PRINT_SPS_0|MY_PRINT_MB);
         tee_putc('\n', PAGER);
       }
       else
@@ -3666,16 +3658,7 @@
   if (!src)
     tee_fputs("NULL", PAGER);
   else
-  {
-    for (const char *p = src; length; p++, length--)
-    {
-      const char *t;
-      if ((t = array_value(xmlmeta, *p)))
-	tee_fputs(t, PAGER);
-      else
-	tee_putc(*p, PAGER);
-    }
-  }
+    tee_write(PAGER, src, length, MY_PRINT_XML|MY_PRINT_MB);
 }
 
 
@@ -3686,37 +3669,9 @@
     tee_fputs("NULL", PAGER);
   else
   {
-    if (opt_raw_data)
-    {
-      unsigned long i;
-      /* Can't use tee_fputs(), it stops with NUL characters. */
-      for (i= 0; i < length; i++, pos++)
-        tee_putc(*pos, PAGER);
-    }
-    else for (const char *end=pos+length ; pos != end ; pos++)
-    {
-#ifdef USE_MB
-      int l;
-      if (use_mb(charset_info) &&
-          (l = my_ismbchar(charset_info, pos, end)))
-      {
-	  while (l--)
-	    tee_putc(*pos++, PAGER);
-	  pos--;
-	  continue;
-      }
-#endif
-      if (!*pos)
-	tee_fputs("\\0", PAGER); // This makes everything hard
-      else if (*pos == '\t')
-	tee_fputs("\\t", PAGER); // This would destroy tab format
-      else if (*pos == '\n')
-	tee_fputs("\\n", PAGER); // This too
-      else if (*pos == '\\')
-	tee_fputs("\\\\", PAGER);
-	else
-	tee_putc(*pos, PAGER);
-    }
+    int flags= MY_PRINT_MB | (opt_raw_data ? 0 : (MY_PRINT_ESC_0|MY_PRINT_CTRL));
+    /* Can't use tee_fputs(), it stops with NUL characters. */
+    tee_write(PAGER, pos, length, flags);
   }
 }
 
@@ -4317,7 +4272,29 @@
     mysql_options(&mysql, MYSQL_INIT_COMMAND, init_command);
   }
 
-  mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
+  mysql_set_character_set(&mysql, default_charset);
+#ifdef __WIN__
+  uint cnv_errors;
+  String converted_database, converted_user;
+  if (!my_charset_same(&my_charset_utf8mb4_bin, mysql.charset))
+  {
+    /* Convert user and database from UTF8MB4 to connection character set */
+    if (user)
+    {
+      converted_user.copy(user, strlen(user) + 1,
+                          &my_charset_utf8mb4_bin, mysql.charset,
+                          &cnv_errors);
+      user= (char*) converted_user.ptr();
+    }
+    if (database)
+    {
+      converted_database.copy(database, strlen(database) + 1,
+                              &my_charset_utf8mb4_bin, mysql.charset,
+                              &cnv_errors);
+      database= (char*) converted_database.ptr();
+    }
+  }
+#endif
   
   if (opt_plugin_dir && *opt_plugin_dir)
     mysql_options(&mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir);
@@ -4339,7 +4316,38 @@
     }
     return -1;					// Retryable
   }
-  
+
+#ifdef __WIN__
+  /* Convert --execute buffer from UTF8MB4 to connection character set */
+  if (!conversion_done++ &&
+      status.line_buff &&
+      !status.line_buff->file && /* Convert only -e buffer, not real file */
+      status.line_buff->buffer < status.line_buff->end && /* Non-empty */
+      !my_charset_same(&my_charset_utf8mb4_bin, mysql.charset))
+  {
+    String tmp;
+    size_t len= status.line_buff->end - status.line_buff->buffer;
+    uint dummy_errors;
+    /*
+      Don't convert trailing '\n' character - it was appended during
+      last batch_readline_command() call. 
+      Oherwise we'll get an extra line, which makes some tests fail.
+    */
+    if (status.line_buff->buffer[len - 1] == '\n')
+      len--;
+    if (tmp.copy(status.line_buff->buffer, len,
+                 &my_charset_utf8mb4_bin, mysql.charset, &dummy_errors))
+      return 1;
+
+    /* Free the old line buffer */
+    batch_readline_end(status.line_buff);
+
+    /* Re-initialize line buffer from the converted string */
+    if (!(status.line_buff= batch_readline_command(NULL, (char*) tmp.c_ptr_safe())))
+      return 1;
+  }
+#endif /* __WIN__ */
+
   charset_info= mysql.charset;
   
   connected=1;
@@ -4645,11 +4653,75 @@
 }
 
 
+/**
+  Write data to a stream.
+  Various modes, corresponding to --tab, --xml, --raw parameters,
+  are supported.
+
+  @param file   Stream to write to
+  @param s      String to write
+  @param slen   String length
+  @flags        Flags for --tab, --xml, --raw.
+*/
+void tee_write(FILE *file, const char *s, size_t slen, int flags)
+{
+  const char *se;
+
+  for (se= s + slen; s < se; s++)
+  {
+    const char *t;
+
+    if (flags & MY_PRINT_MB)
+    {
+      int mblen, i;
+      if (use_mb(charset_info) &&
+          (mblen= my_ismbchar(charset_info, s, se)))
+      {
+#ifdef __WIN__
+        if (my_win_is_console(file))
+          my_win_console_write(charset_info, s, mblen);
+        else
+          fwrite(s, 1, mblen, file);
+        if (opt_outfile)
+          fwrite(s, 1, mblen, OUTFILE);
+        s+= mblen - 1;
+        continue;
+#endif
+        for (i= 0; i < mblen; i++)
+	        tee_putc(s[i], file);
+        s+= mblen - 1;
+	      continue;
+      }
+    }
+
+    if ((flags & MY_PRINT_XML) && (t= array_value(xmlmeta, *s)))
+      tee_fputs(t, file);
+    else if ((flags & MY_PRINT_SPS_0) && *s == '\0')
+      tee_putc((int) ' ', file);   // This makes everything hard
+    else if ((flags & MY_PRINT_ESC_0) && *s == '\0')
+      tee_fputs("\\0", file);   // This makes everything hard
+    else if ((flags & MY_PRINT_CTRL) && *s == '\t')
+      tee_fputs("\\t", file);      // This would destroy tab format
+    else if ((flags & MY_PRINT_CTRL) && *s == '\n')
+      tee_fputs("\\n", file);      // This too
+    else if ((flags & MY_PRINT_CTRL) && *s == '\\')
+      tee_fputs("\\\\", file);
+    else
+      tee_putc((int) *s, file);
+  }
+}
+
+
 void tee_fprintf(FILE *file, const char *fmt, ...)
 {
   va_list args;
 
   va_start(args, fmt);
+#ifdef __WIN__
+  if (my_win_is_console(file))
+    my_win_console_vfprintf(charset_info, fmt, args);
+  else
+#endif
   (void) vfprintf(file, fmt, args);
   va_end(args);
 
@@ -4664,6 +4736,11 @@
 
 void tee_fputs(const char *s, FILE *file)
 {
+#ifdef __WIN__
+  if (my_win_is_console(file))
+    my_win_console_fputs(charset_info, s);
+  else
+#endif
   fputs(s, file);
   if (opt_outfile)
     fputs(s, OUTFILE);
@@ -4672,17 +4749,17 @@
 
 void tee_puts(const char *s, FILE *file)
 {
-  fputs(s, file);
-  fputc('\n', file);
-  if (opt_outfile)
-  {
-    fputs(s, OUTFILE);
-    fputc('\n', OUTFILE);
-  }
+  tee_fputs(s, file);
+  tee_putc('\n', file);
 }
 
 void tee_putc(int c, FILE *file)
 {
+#ifdef __WIN__
+  if (my_win_is_console(file))
+    my_win_console_putc(charset_info, c);
+  else
+#endif
   putc(c, file);
   if (opt_outfile)
     putc(c, OUTFILE);

=== modified file 'client/mysqltest.cc'
--- client/mysqltest.cc	2011-01-26 20:13:31 +0000
+++ client/mysqltest.cc	2011-02-17 11:32:06 +0000
@@ -302,7 +302,7 @@
   Q_ENABLE_WARNINGS, Q_DISABLE_WARNINGS,
   Q_ENABLE_INFO, Q_DISABLE_INFO,
   Q_ENABLE_METADATA, Q_DISABLE_METADATA,
-  Q_EXEC, Q_DELIMITER,
+  Q_EXEC, Q_EXECW, Q_DELIMITER,
   Q_DISABLE_ABORT_ON_ERROR, Q_ENABLE_ABORT_ON_ERROR,
   Q_DISPLAY_VERTICAL_RESULTS, Q_DISPLAY_HORIZONTAL_RESULTS,
   Q_QUERY_VERTICAL, Q_QUERY_HORIZONTAL, Q_SORTED_RESULT,
@@ -373,6 +373,7 @@
   "enable_metadata",
   "disable_metadata",
   "exec",
+  "execw",
   "delimiter",
   "disable_abort_on_error",
   "enable_abort_on_error",
@@ -2750,8 +2751,52 @@
 #endif
 
 
-FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode)
+FILE* my_popen(DYNAMIC_STRING *ds_cmd, const char *mode,
+               struct st_command *command)
 {
+#if __WIN__
+  /*
+    --execw is for tests executing commands containing non-ASCII characters.
+
+    To correctly start such a program on Windows, we need to use the "wide"
+    version of popen, with prior translation of the command line from
+    the file character set to wide string. We use the current value
+    of --character_set as a file character set, so before using --execw
+    make sure to set --character_set properly.
+
+    If we use the non-wide version of popen, Windows internally
+    converts command line from the current ANSI code page to wide string.
+    In case when character set of the command line does not match the
+    current ANSI code page, non-ASCII characters get garbled in most cases.
+
+    On Linux, the command line passed to popen() is considered
+    as a binary string, no any internal to-wide and from-wide
+    character set conversion happens, so we don't need to do anything.
+    On Linux --execw is just a synonym to --exec.
+
+    For simplicity, assume that  command line is limited to 4KB
+    (like in cmd.exe) and that mode at most 10 characters.
+  */
+  if (command->type == Q_EXECW)
+  {
+    wchar_t wcmd[4096];
+    wchar_t wmode[10];
+    const char *cmd= ds_cmd->str;
+    uint dummy_errors;
+    size_t len;
+    len= my_convert((char*) wcmd, sizeof(wcmd) - sizeof(wcmd[0]),
+                    &my_charset_utf16le_bin,
+                    ds_cmd->str, strlen(ds_cmd->str), charset_info,
+                    &dummy_errors);
+    wcmd[len / sizeof(wchar_t)]= 0;
+    len= my_convert((char*) wmode, sizeof(wmode) - sizeof(wmode[0]),
+                    &my_charset_utf16le_bin,
+                    mode, strlen(mode), charset_info, &dummy_errors);
+    wmode[len / sizeof(wchar_t)]= 0;
+    return _wpopen(wcmd, wmode);
+  }
+#endif __WIN__
+
 #if defined __WIN__ && defined USE_CYGWIN
   /* Dump the command into a sh script file and execute with popen */
   str_to_file(tmp_sh_name, ds_cmd->str, ds_cmd->length);
@@ -2888,7 +2933,7 @@
   DBUG_PRINT("info", ("Executing '%s' as '%s'",
                       command->first_argument, ds_cmd.str));
 
-  if (!(res_file= my_popen(&ds_cmd, "r")) && command->abort_on_error)
+  if (!(res_file= my_popen(&ds_cmd, "r", command)) && command->abort_on_error)
   {
     dynstr_free(&ds_cmd);
     die("popen(\"%s\", \"r\") failed", command->first_argument);
@@ -8763,6 +8808,7 @@
         do_shutdown_server(command);
         break;
       case Q_EXEC:
+      case Q_EXECW:
 	do_exec(command);
 	command_executed++;
 	break;

=== modified file 'include/my_sys.h'
--- include/my_sys.h	2011-02-08 15:54:12 +0000
+++ include/my_sys.h	2011-02-17 12:39:50 +0000
@@ -946,9 +946,14 @@
 void my_security_attr_free(SECURITY_ATTRIBUTES *sa);
 
 /* implemented in my_conio.c */
-char* my_cgets(char *string, size_t clen, size_t* plen);
-
-#endif
+my_bool my_win_is_console(FILE *file);
+char *my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize);
+void my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen);
+void my_win_console_fputs(CHARSET_INFO *cs, const char *data);
+void my_win_console_putc(CHARSET_INFO *cs, int c);
+void my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args);
+int my_win_translate_command_line_args(CHARSET_INFO *cs, int *ac, char ***av);
+#endif /* __WIN__ */
 
 #include <mysql/psi/psi.h>
 

=== modified file 'mysql-test/t/grant.test'
--- mysql-test/t/grant.test	2010-12-15 16:15:40 +0000
+++ mysql-test/t/grant.test	2011-02-17 09:41:06 +0000
@@ -1401,9 +1401,10 @@
 #
 # Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte
 #
+--character_set utf8
 set names utf8;
 grant select on test.* to
юзер_юзер@localhost;
---exec $MYSQL --default-character-set=utf8
--user=юзер_юзер
-e "select user()"
+--execw $MYSQL --default-character-set=utf8
--user=юзер_юзер
-e "select user()"
 revoke all on test.* from
юзер_юзер@localhost;
 drop user
юзер_юзер@localhost;
 --error ER_WRONG_STRING_LENGTH

=== modified file 'mysql-test/t/mysql.test'
--- mysql-test/t/mysql.test	2011-02-05 05:06:29 +0000
+++ mysql-test/t/mysql.test	2011-02-17 11:09:16 +0000
@@ -51,13 +51,14 @@
 #
 # Bug#17939 Wrong table format when using UTF8 strings
 #
---exec $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as '__tañgè Ñãmé'"
2>&1
---exec $MYSQL --default-character-set=utf8 --table -e "SELECT '__tañgè
Ñãmé' as 'John Doe'" 2>&1
+--character_set utf8
+--execw $MYSQL --default-character-set=utf8 --table -e "SELECT 'John Doe' as '__tañgè
Ñãmé'" 2>&1
+--execw $MYSQL --default-character-set=utf8 --table -e "SELECT '__tañgè Ñãmé' as 'John
Doe'" 2>&1
 
 #
 # Bug#18265 -- mysql client: No longer right-justifies numeric columns
 #
---exec $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values ('<----------------------->'); insert into t1 (k) values ('<-----'); insert into t1 (k) values ('Τη
γλώσσα'); insert into t1 (k)
values ('ᛖᚴ ᚷᛖᛏ'); select *
from t1; DROP TABLE t1;"
+--execw $MYSQL -t --default-character-set utf8 test -e "create table t1 (i int, j int, k char(25) charset utf8); insert into t1 (i) values (1); insert into t1 (k) values ('<----------------------->'); insert into t1 (k) values ('<-----'); insert into t1 (k) values ('Τη
γλώσσα'); insert into t1 (k)
values ('ᛖᚴ ᚷᛖᛏ'); select * from t1; DROP TABLE t1;"
 
 #
 # "DESCRIBE" commands may return strange NULLness flags.

=== modified file 'mysql-test/t/mysql_cp932.test'
--- mysql-test/t/mysql_cp932.test	2007-02-21 16:50:48 +0000
+++ mysql-test/t/mysql_cp932.test	2011-02-17 11:13:16 +0000
@@ -15,8 +15,9 @@
 --exec $MYSQL --default-character-set=cp932 test -e "charset utf8;"
 
 # its usage to switch internally in mysql to requested charset
---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select 'ƒ\'; create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('ƒ\'); select * from t1;  drop table t1;"
---exec $MYSQL --default-character-set=utf8 test -e "charset cp932; select 'ƒ\'"
---exec $MYSQL --default-character-set=utf8 test -e "/*charset cp932 */; set character_set_client= cp932; select 'ƒ\'"
---exec $MYSQL --default-character-set=utf8 test -e "/*!\C cp932 */; set character_set_client= cp932; select 'ƒ\'"
+--character_set latin1
+--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select 'ƒ\'; create table t1 (c_cp932 TEXT CHARACTER SET cp932); insert into t1 values('ƒ\'); select * from t1;  drop table t1;"
+--execw $MYSQL --default-character-set=latin1 test -e "charset cp932; select 'ƒ\'"
+--execw $MYSQL --default-character-set=latin1 test -e "/*charset cp932 */; set names cp932, character_set_results=utf8; select 'ƒ\'"
+--execw $MYSQL --default-character-set=latin1 test -e "/*!\C cp932 */; set character_set_client= cp932; select 'ƒ\'"
 

=== modified file 'mysql-test/t/mysqlbinlog-cp932.test'
--- mysql-test/t/mysqlbinlog-cp932.test	2009-09-07 05:42:54 +0000
+++ mysql-test/t/mysqlbinlog-cp932.test	2011-02-17 09:16:34 +0000
@@ -10,8 +10,10 @@
 # Bug#16217 (mysql client did not know how not switch its internal charset)
 create table t3 (f text character set utf8);
 create table t4 (f text character set cp932); 
---exec $MYSQL --default-character-set=utf8 test -e "insert into t3 values(_utf8'ソ')"
---exec $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'ƒ\');"
+--character_set utf8
+--execw $MYSQL --default-character-set=utf8 test -e "insert into t3 values(_utf8'ソ')"
+--character_set cp932
+--execw $MYSQL --default-character-set=cp932 test -e "insert into t4 values(_cp932'ƒ\');"
 flush logs;
 rename table t3 to t03, t4 to t04;
 let $MYSQLD_DATADIR= `select @@datadir`;

=== modified file 'mysys/my_conio.c'
--- mysys/my_conio.c	2009-02-13 16:41:47 +0000
+++ mysys/my_conio.c	2011-02-17 13:36:45 +0000
@@ -18,205 +18,261 @@
 
 #ifdef __WIN__
 
-static HANDLE my_coninpfh= 0;     /* console input */
-
-/*
-  functions my_pthread_auto_mutex_lock & my_pthread_auto_mutex_free
-  are experimental at this moment, they are intended to bring
-  ability of protecting code sections without necessity to explicitly
-  initialize synchronization object in one of threads
-
-  if found useful they are to be exported in mysys
-*/
-
-
-/*
-  int my_pthread_auto_mutex_lock(HANDLE* ph, const char* name, 
-                                 int id, int time)
-  NOTES
-    creates a mutex with given name and tries to lock it time msec.
-    mutex name is appended with id to allow system wide or process wide
-    locks. Handle to created mutex returned in ph argument.
-
-  RETURN
-    0	              thread owns mutex
-    <>0	            error
-*/
-
-static
-int my_pthread_auto_mutex_lock(HANDLE* ph, const char* name, int id, int time)
-{
-  int res;
-  char tname[FN_REFLEN];
-  
-  sprintf(tname, "%s-%08X", name, id);
-  
-  *ph= CreateMutex(NULL, FALSE, tname);
-  if (*ph == NULL)
-    return GetLastError();
-
-  res= WaitForSingleObject(*ph, time);
-  
-  if (res == WAIT_TIMEOUT)
-    return ERROR_SEM_TIMEOUT;
-
-  if (res == WAIT_FAILED)
-    return GetLastError();
-
-  return 0;
-}
-
-/*
-  int my_pthread_auto_mutex_free(HANDLE* ph)
-
-  NOTES
-    releases a mutex.
-
-  RETURN
-    0	              thread released mutex
-    <>0	            error
-
-*/
-static
-int my_pthread_auto_mutex_free(HANDLE* ph)
-{
-  if (*ph)
-  {
-    ReleaseMutex(*ph);
-    CloseHandle(*ph);
-    *ph= NULL;
-  }
-
-  return 0;
-}
-
-
-#define pthread_auto_mutex_decl(name)                           \
-  HANDLE __h##name= NULL;
-
-#define pthread_auto_mutex_lock(name, proc, time)               \
-  my_pthread_auto_mutex_lock(&__h##name, #name, (proc), (time))
-
-#define pthread_auto_mutex_free(name)                           \
-  my_pthread_auto_mutex_free(&__h##name)
-
-
-/*
-  char* my_cgets()
-
-  NOTES
-    Replaces _cgets from libc to support input of more than 255 chars.
-    Reads from the console via ReadConsole into buffer which 
-    should be at least clen characters.
-    Actual length of string returned in plen.
-
-  WARNING
-    my_cgets() does NOT check the pushback character buffer (i.e., _chbuf).
-    Thus, my_cgets() will not return any character that is pushed back by 
-    the _ungetch() call.
-
-  RETURN
-    string pointer	ok
-    NULL	          Error
-
-*/
-
-char* my_cgets(char *buffer, size_t clen, size_t* plen)
-{
-  ULONG state;
-  char *result;
-  DWORD plen_res;
-  CONSOLE_SCREEN_BUFFER_INFO csbi;
-  
-  pthread_auto_mutex_decl(my_conio_cs);
- 
-  /* lock the console for the current process*/
-  if (pthread_auto_mutex_lock(my_conio_cs, GetCurrentProcessId(), INFINITE))
-  {
-    /* can not lock console */
-    pthread_auto_mutex_free(my_conio_cs);  
-    return NULL;
-  }
-
-  /* init console input */
-  if (my_coninpfh == 0)
-  {
-    /* same handle will be used until process termination */
-    my_coninpfh= CreateFile("CONIN$", GENERIC_READ | GENERIC_WRITE,
-                            FILE_SHARE_READ | FILE_SHARE_WRITE,
-                            NULL, OPEN_EXISTING, 0, NULL);
-  }
-
-  if (my_coninpfh == INVALID_HANDLE_VALUE) 
-  {
-    /* unlock the console */
-    pthread_auto_mutex_free(my_conio_cs);  
-    return(NULL);
-  }
-
-  GetConsoleMode((HANDLE)my_coninpfh, &state);
-  SetConsoleMode((HANDLE)my_coninpfh, ENABLE_LINE_INPUT | 
-                 ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
-
-  GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
-
-  /* 
-    there is no known way to determine allowed buffer size for input
-    though it is known it should not be more than 64K               
-    so we cut 64K and try first size of screen buffer               
-    if it is still to large we cut half of it and try again         
-    later we may want to cycle from min(clen, 65535) to allowed size
-    with small decrement to determine exact allowed buffer           
-  */
-  clen= min(clen, 65535);
-  do
-  {
-    clen= min(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y);
-    if (!ReadConsole((HANDLE)my_coninpfh, (LPVOID)buffer, (DWORD) clen - 1, &plen_res,
-                     NULL))
-    {
-      result= NULL;
-      clen>>= 1;
+
+/* Windows console handling */
+
+/* Maximum line length on Windows console */
+#define MAX_CONSOLE_LINE_SIZE 65535
+
+/**
+  Determine if a file is a windows console
+
+  @param file Input stream
+
+  @return
+  @retval  0 if file is not Windows console
+  @retval  1 if file is Windows console
+*/
+my_bool
+my_win_is_console(FILE *file)
+{
+  DWORD mode;
+  if (GetConsoleMode((HANDLE) _get_osfhandle(_fileno(file)), &mode))
+    return 1;
+  return 0;
+}
+
+
+/**
+  Read line from Windows console using Unicode API
+  and translate input to session character set.
+  Note, as Windows API breaks supplementary characters
+  into two wchar_t pieces, we cannot read and convert individual
+  wchar_t values separately. So let's use a buffer for
+  Unicode console input, and then convert it to "cs" in a single shot.
+  String is terminated with '\0' character.
+
+  @param cs         Character string to convert to.
+  @param mbbuf      Write input data here.
+  @param mbbufsize  Number of bytes available in mbbuf.
+
+  @rerval           Pointer to mbbuf, or NULL on I/0 error.
+*/
+char *
+my_win_console_readline(CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize)
+{
+  uint dummy_errors;
+  static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1];
+  size_t pos, mblen;
+  DWORD console_mode;
+  HANDLE console= GetStdHandle(STD_INPUT_HANDLE);
+
+  DBUG_ASSERT(mbbufsize > 0); /* Need space for at least trailing '\0' */
+  GetConsoleMode(console, &console_mode);
+  SetConsoleMode(console, ENABLE_LINE_INPUT |
+	                        ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
+  for(pos= 0; ; )
+  {
+    DWORD nchars;
+    BOOL ok= ReadConsoleW(console, &u16buf[pos], 1, &nchars, NULL);
+    if (!ok || nchars == 0)
+    {
+      SetConsoleMode(console, console_mode);
+      return NULL;
+    }
+    if (u16buf[pos] == L'\r')
+      continue;
+    if (pos == MAX_CONSOLE_LINE_SIZE || u16buf[pos] == L'\n')
+      break;
+    pos++;
+  }
+  SetConsoleMode(console, console_mode);
+  /* Convert Unicode to session character set */
+  mblen= my_convert(mbbuf, mbbufsize - 1, cs,
+                    (const char *) u16buf, pos * sizeof(wchar_t),
+	                   &my_charset_utf16le_bin, &dummy_errors);
+  DBUG_ASSERT(mblen < mbbufsize); /* Safety */
+  mbbuf[mblen]= 0;
+  return mbbuf;
+}
+
+
+/**
+  Translate client charset to Windows wchars for console I/O.
+  Unlike copy_and_convert(), in case of a wrong multi-byte sequence
+  we don't print '?' character, we fallback to ISO-8859-1 instead.
+  This gives a better idea how binary data (e.g. BLOB) look like.
+
+  @param cs           Character set of the input string
+  @param from         Input string
+  @param from_length  Length of the input string
+  @param to[OUT]      Write Unicode data here
+  @param to_chars     Number of characters available in "to"
+*/
+static size_t
+my_mbstou16s(CHARSET_INFO *cs, const uchar * from, size_t from_length,
+             wchar_t *to, size_t to_chars)
+{
+  CHARSET_INFO *to_cs= &my_charset_utf16le_bin;
+  const uchar *from_end= from + from_length;
+  wchar_t *to_orig= to, *to_end= to + to_chars;
+  my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
+  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+  while (from < from_end)
+  {
+    int cnvres;
+    my_wc_t wc;
+    if ((cnvres= (*mb_wc)(cs, &wc, from, from_end)) > 0)
+    {
+      if (!wc)
+        break;
+      from+= cnvres;
+    }
+    else if (cnvres == MY_CS_ILSEQ)
+    {
+      wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */
+      from+= 1;
+    }
+    else if (cnvres > MY_CS_TOOSMALL)
+    {
+      /*
+        A correct multibyte sequence detected
+        But it doesn't have Unicode mapping. 
+      */
+      from+= (-cnvres);
+      wc= '?';
+    }
+    else /* Incomplete character */
+    {
+      wc= (my_wc_t) (uchar) *from; /* Fallback to ISO-8859-1 */
+      from+= 1;
+    }
+outp:
+    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, (uchar*) to_end)) > 0)
+    {
+      /* We can never convert only a part of wchar_t */
+      DBUG_ASSERT((cnvres % sizeof(wchar_t)) == 0);
+      /* cnvres returns numner of bytes, convert to number of wchar_t's */
+      to+= cnvres / sizeof(wchar_t);
+    }
+    else if (cnvres == MY_CS_ILUNI && wc != '?')
+    {
+      wc= '?';
+      goto outp;
     }
     else
-    {
-      result= buffer;
-      break;
-    }
+      break; /* Not enough space */
   }
-  while (GetLastError() == ERROR_NOT_ENOUGH_MEMORY);
-  *plen= plen_res;
-
-  /* We go here on error reading the string (Ctrl-C for example) */
-  if (!*plen)
-    result= NULL;                              /* purecov: inspected */
-
-  if (result != NULL)
+  return to - to_orig;
+}
+
+
+/**
+  Write a string in the given character set to Windows console. 
+  As Window breaks supplementary characters into two parts,
+  we cannot use a simple loop sending the result of
+  cs->cset->mb_wc() to console.
+  So we converts string from client charset to an array of wchar_t,
+  then write the array to console in a single shot.
+
+  @param cs       Character set of the string
+  @param data     String to print
+  @param datalen  Length of input string in bytes
+*/
+void
+my_win_console_write(CHARSET_INFO *cs, const char *data, size_t datalen)
+{
+  static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1];
+  size_t nchars= my_mbstou16s(cs, (const uchar*) data, datalen,
+                              u16buf, sizeof(u16buf));
+  DWORD nwritten;
+  WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE),
+                u16buf, (DWORD)nchars, &nwritten, NULL);
+}
+
+
+/**
+  Write a single-byte character to console.
+  Note: one should not send parts of the same multi-byte character
+  in separate consequent my_win_console_putc() calls.
+  For multi-byte characters use my_win_colsole_write() instead.
+
+  @param cs  Character set of the input character
+  @param c   Character (single byte)
+*/
+void
+my_win_console_putc(CHARSET_INFO *cs, int c)
+{
+  char ch= (char) c;
+  my_win_console_write(cs, &ch, 1);
+}
+
+
+/**
+  Write a 0-terminated string to Windows console.
+
+  @param cs    Character set of the string to print
+  @param data  String to print
+*/
+void
+my_win_console_fputs(CHARSET_INFO *cs, const char *data)
+{
+  my_win_console_write(cs, data, strlen(data));
+}
+
+
+/*
+  Handle formatted output on the Windows console.
+*/
+void
+my_win_console_vfprintf(CHARSET_INFO *cs, const char *fmt, va_list args)
+{
+  static char buff[MAX_CONSOLE_LINE_SIZE + 1];
+  size_t len= vsnprintf(buff, sizeof(buff) - 1, fmt, args);
+  my_win_console_write(cs, buff, len);
+}
+
+
+#include <shellapi.h>
+
+/**
+  Translate Unicode command line parameters to the given character set
+  (Typically to utf8mb4).
+  Translated parameters are allocated using my_once_alloc().
+
+  @param      tocs    Character set to convert parameters to.
+  @param[OUT] argc    Write number of parameters here
+  @param[OUT] argv    Write pointer to allocated parameters here.
+*/
+int
+my_win_translate_command_line_args(CHARSET_INFO *cs, int *argc, char ***argv)
+{
+  int i, ac;
+  char **av;
+  wchar_t *command_line= GetCommandLineW();
+  wchar_t **wargs= CommandLineToArgvW(command_line, &ac);
+  size_t nbytes= (ac + 1) * sizeof(char*);
+
+  /* Allocate new command line parameter */
+  av= (char**) my_once_alloc(nbytes, MYF(MY_ZEROFILL));
+
+  for(i= 0; i < *argc; i++)
   {
-    if (*plen > 1 && buffer[*plen - 2] == '\r')
-    {
-      *plen= *plen - 2;
-    }
-    else 
-    {
-      if (*plen > 0 && buffer[*plen - 1] == '\r')
-      {
-        char tmp[3];
-        int  tmplen= sizeof(tmp);
-
-        *plen= *plen - 1;
-        /* read /n left in the buffer */
-        ReadConsole((HANDLE)my_coninpfh, (LPVOID)tmp, tmplen, &tmplen, NULL);
-      }
-    }
-    buffer[*plen]= '\0';
+    uint dummy_errors;
+    size_t arg_len= wcslen(wargs[i]);
+    size_t len, alloced_len= arg_len * cs->mbmaxlen + 1;
+    av[i]= (char *) my_once_alloc(alloced_len, MYF(0));
+    len= my_convert(av[i], alloced_len, cs,
+                    (const char *) wargs[i], arg_len * sizeof(wchar_t),
+                    &my_charset_utf16le_bin, &dummy_errors);
+    DBUG_ASSERT(len < alloced_len);
+    av[i][len]= '\0';
   }
-
-  SetConsoleMode((HANDLE)my_coninpfh, state);
-  /* unlock the console */
-  pthread_auto_mutex_free(my_conio_cs);  
-
-  return result;
+  *argv= av;
+  *argc= ac;
+  /* Cleanup on exit */
+  LocalFree((HLOCAL) wargs);
+  return 0;
 }
 
 #endif /* __WIN__ */

=== modified file 'sql-common/client.c'
--- sql-common/client.c	2011-01-31 15:55:58 +0000
+++ sql-common/client.c	2011-02-16 16:47:14 +0000
@@ -4246,11 +4246,25 @@
   if (mysql->options.charset_dir)
     charsets_dir= mysql->options.charset_dir;
 
+  if (!mysql->net.vio)
+  {
+    /* Initialize with automatic OS character set detection. */
+    mysql_options(mysql, MYSQL_SET_CHARSET_NAME, cs_name);
+    mysql_init_character_set(mysql);
+    cs_name= mysql->options.charset_name;
+  }
+
   if (strlen(cs_name) < MY_CS_NAME_SIZE &&
      (cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0))))
   {
     char buff[MY_CS_NAME_SIZE + 10];
     charsets_dir= save_csdir;
+    if (!mysql->net.vio)
+    {
+      /* If there is no connection yet we don't send "SET NAMES" query */
+      mysql->charset= cs;
+      return 0;
+    }
     /* Skip execution of "SET NAMES" for pre-4.1 servers */
     if (mysql_get_server_version(mysql) < 40100)
       return 0;

Thread
WL#5331 Unicode API for Windows command line clientAlexander Barkov17 Feb