List:Commits« Previous MessageNext Message »
From:Alexander Nozdrin Date:November 1 2006 11:36am
Subject:bk commit into 5.0 tree (anozdrin:1.2299) BUG#16291
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of alik. When alik does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-11-01 14:36:16+03:00, anozdrin@alik. +6 -0
  This is a preliminary parch for fixing CREATE VIEW - part problem
  of BUG#16291: mysqldump corrupts string-constants with non-ascii-chars

  sql/item.cc@stripped, 2006-11-01 14:36:13+03:00, anozdrin@alik. +47 -0
    Introduced a new Item-class -- Item_utf8_string.
    This class is used to store string value in UTF8 and
    dumps it with __as-introducer.

  sql/item.h@stripped, 2006-11-01 14:36:13+03:00, anozdrin@alik. +21 -0
    Introduced a new Item-class -- Item_utf8_string.
    This class is used to store string value in UTF8 and
    dumps it with __as-introducer.

  sql/sql_lex.cc@stripped, 2006-11-01 14:36:13+03:00, anozdrin@alik. +45 -12
    Change lexer so that it understands __as-introducers.

  sql/sql_lex.h@stripped, 2006-11-01 14:36:13+03:00, anozdrin@alik. +10 -0
    Added members needed for query rewriting.

  sql/sql_parse.cc@stripped, 2006-11-01 14:36:14+03:00, anozdrin@alik. +26 -0
    Parse rewritten query again to build correct tree.

  sql/sql_yacc.yy@stripped, 2006-11-01 14:36:14+03:00, anozdrin@alik. +67 -8
    1. Rewrite text constants in CREATE VIEW query;
    2. Support __as-introducers.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	anozdrin
# Host:	alik.
# Root:	/mnt/raid/alik/MySQL/devel/5.0-rt-charsets

--- 1.236/sql/item.cc	2006-11-01 14:36:20 +03:00
+++ 1.237/sql/item.cc	2006-11-01 14:36:20 +03:00
@@ -736,6 +736,53 @@ Item *Item_string::safe_charset_converte
   return conv;
 }
 
+///////////////////////////////////////////////////////////////////////////
+
+Item_utf8_string *Item_utf8_string::create(
+  THD *thd,
+  LEX_STRING &utf8_str,
+  CHARSET_INFO &local_cs)
+{
+  LEX_STRING local_str;
+
+  thd->convert_string(
+    &local_str,
+    &local_cs,
+    utf8_str.str,
+    utf8_str.length,
+    system_charset_info);
+
+  return new Item_utf8_string(utf8_str, local_str, local_cs);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+Item_utf8_string::Item_utf8_string(
+  LEX_STRING &utf8_str,
+  LEX_STRING &local_str,
+  CHARSET_INFO &local_cs) :
+  Item_string(local_str.str, local_str.length, &local_cs)
+{
+  _utf8_str_value.set_or_copy_aligned(
+    utf8_str.str,
+    utf8_str.length,
+    system_charset_info);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void Item_utf8_string::print(String *dump_buffer)
+{
+  dump_buffer->append("__as_");
+  dump_buffer->append(collation.collation->csname);
+  dump_buffer->append(" \'");
+
+  _utf8_str_value.print(dump_buffer);
+
+  dump_buffer->append('\'');
+}
+
+///////////////////////////////////////////////////////////////////////////
 
 Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
 {

--- 1.209/sql/item.h	2006-11-01 14:36:20 +03:00
+++ 1.210/sql/item.h	2006-11-01 14:36:21 +03:00
@@ -1717,6 +1717,27 @@ public:
 };
 
 
+class Item_utf8_string : public Item_string
+{
+private:
+  String _utf8_str_value;
+
+  Item_utf8_string(
+    LEX_STRING &utf8_str,
+    LEX_STRING &local_str,
+    CHARSET_INFO &local_cs);
+
+public:
+  static Item_utf8_string *create(
+    THD *thd,
+    LEX_STRING &utf8_str,
+    CHARSET_INFO &local_cs);
+
+public:
+  virtual void print(String *str);
+};
+
+
 class Item_static_string_func :public Item_string
 {
   const char *func_name;

--- 1.203/sql/sql_lex.cc	2006-11-01 14:36:21 +03:00
+++ 1.204/sql/sql_lex.cc	2006-11-01 14:36:21 +03:00
@@ -645,19 +645,51 @@ int MYSQLlex(void *arg, void *yythd)
       }
       yylval->lex_str=get_token(lex,length);
 
-      /* 
-         Note: "SELECT _bla AS 'alias'"
-         _bla should be considered as a IDENT if charset haven't been found.
-         So we don't use MYF(MY_WME) with get_charset_by_csname to avoid 
-         producing an error.
+      /*
+        Here we are going to parse the following tokens:
+
+          - explicit character set specification in the form
+            _<cs name> 'text constant'
+
+            Example:
+              _utf8 'UTF8-text'
+
+           - character set conversion in the form
+             __as_<cs name>
+
+            Example:
+              __as_koi8r 'text'
+
+          - simple indentifier: if the token starting with '_' is not of the
+            above forms, it is a simple identifier.
       */
 
-      if ((yylval->lex_str.str[0]=='_') && 
-          (lex->underscore_charset=
-             get_charset_by_csname(yylval->lex_str.str + 1,
-                                   MY_CS_PRIMARY,MYF(0))))
-        return(UNDERSCORE_CHARSET);
-      return(result_state);			// IDENT or IDENT_QUOTED
+      if (yylval->lex_str.str[0] != '_')
+        return result_state;
+
+      if (yylval->lex_str.str[1] != '_')
+      {
+        // This is either charset introducer or simple ident.
+
+        lex->underscore_charset=
+          get_charset_by_csname(yylval->lex_str.str + 1, MY_CS_PRIMARY,
+            MYF(0));
+
+        return lex->underscore_charset ? UNDERSCORE_CHARSET : result_state;
+      }
+
+      // This seems to be __as - introducer.
+
+      if (strncmp(yylval->lex_str.str + 2, "as_", 3))
+        return result_state;
+
+      lex->original_charset=
+        get_charset_by_csname(
+          yylval->lex_str.str + 5,
+          MY_CS_PRIMARY,
+          MYF(0));
+
+      return lex->original_charset ? ORIGINAL_CHARSET : result_state;
 
     case MY_LEX_IDENT_SEP:		// Found ident and now '.'
       yylval->lex_str.str=(char*) lex->ptr;
@@ -1672,7 +1704,8 @@ void Query_tables_list::destroy_query_ta
 
 st_lex::st_lex()
   :result(0), yacc_yyss(0), yacc_yyvs(0),
-   sql_command(SQLCOM_END)
+   sql_command(SQLCOM_END),
+   is_query_rewrite_disabled(FALSE)
 {
   reset_query_tables_list(TRUE);
 }

--- 1.230/sql/sql_lex.h	2006-11-01 14:36:21 +03:00
+++ 1.231/sql/sql_lex.h	2006-11-01 14:36:21 +03:00
@@ -861,6 +861,7 @@ typedef struct st_lex : public Query_tab
   gptr yacc_yyss,yacc_yyvs;
   THD *thd;
   CHARSET_INFO *charset, *underscore_charset;
+  CHARSET_INFO *original_charset;
   /* store original leaf_tables for INSERT SELECT and PS/SP */
   TABLE_LIST *leaf_tables_insert;
   /* Position (first character index) of SELECT of CREATE VIEW statement */
@@ -1035,6 +1036,15 @@ typedef struct st_lex : public Query_tab
   uchar *fname_start, *fname_end;
   
   bool escape_used;
+
+  /////////////////////////////////////////////////////////////////////////
+
+  String rewritten_query;
+  char *unprocessed_query_ptr;
+  bool is_view_clause;
+  bool is_query_rewrite_disabled;
+
+  /////////////////////////////////////////////////////////////////////////
 
   st_lex();
 

--- 1.586/sql/sql_parse.cc	2006-11-01 14:36:21 +03:00
+++ 1.587/sql/sql_parse.cc	2006-11-01 14:36:21 +03:00
@@ -4761,6 +4761,32 @@ end_with_restore_list:
 #endif // ifndef DBUG_OFF
   case SQLCOM_CREATE_VIEW:
     {
+      if (!lex->rewritten_query.is_empty())
+      {
+        lex->rewritten_query.append(lex->unprocessed_query_ptr);
+
+        // Parse re-written query again to build correct tree.
+
+        printf("Parsing rewritten query: >>>%s<<<...\n",
+          (const char *) lex->rewritten_query.c_ptr());
+
+        thd->query= strdup(lex->rewritten_query.c_ptr()); // XXX: is it really needed?
+        thd->query_length= lex->rewritten_query.length();
+
+        mysql_init_query(thd, (uchar*) thd->query, thd->query_length);
+
+        lex->is_query_rewrite_disabled= TRUE;
+
+        if (MYSQLparse(thd))
+          goto error;
+
+        lex->is_query_rewrite_disabled= FALSE;
+      }
+      else
+      {
+        printf("Original query hasn't been changed.\n");
+      }
+
       if (end_active_trans(thd))
         goto error;
 

--- 1.493/sql/sql_yacc.yy	2006-11-01 14:36:21 +03:00
+++ 1.494/sql/sql_yacc.yy	2006-11-01 14:36:21 +03:00
@@ -642,6 +642,7 @@ bool my_yyoverflow(short **a, YYSTYPE **
 %token  UNCOMMITTED_SYM
 %token  UNDEFINED_SYM
 %token  UNDERSCORE_CHARSET
+%token  ORIGINAL_CHARSET
 %token  UNDO_SYM
 %token  UNICODE_SYM
 %token  UNION_SYM
@@ -708,7 +709,8 @@ bool my_yyoverflow(short **a, YYSTYPE **
 %type <lex_str>
         IDENT IDENT_QUOTED TEXT_STRING DECIMAL_NUM FLOAT_NUM NUM LONG_NUM HEX_NUM
 	LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text
-        UNDERSCORE_CHARSET IDENT_sys TEXT_STRING_sys TEXT_STRING_literal
+        UNDERSCORE_CHARSET ORIGINAL_CHARSET IDENT_sys TEXT_STRING_sys
+        TEXT_STRING_literal
 	NCHAR_STRING opt_component key_cache_name
         sp_opt_label BIN_NUM label_ident TEXT_STRING_filesystem
 
@@ -891,7 +893,11 @@ query:
 	     thd->lex->sql_command= SQLCOM_EMPTY_QUERY;
 	   }
 	}
-	| verb_clause END_OF_INPUT {};
+	|
+	{
+	  Lex->is_view_clause= FALSE;
+	}
+        verb_clause END_OF_INPUT {};
 
 verb_clause:
 	  statement
@@ -7139,6 +7145,10 @@ text_literal:
 	  { $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
 	| text_literal TEXT_STRING_literal
 	  { ((Item_string*) $1)->append($2.str,$2.length); }
+	| ORIGINAL_CHARSET TEXT_STRING
+	  {
+	    $$ = Item_utf8_string::create(YYTHD, $2, *Lex->original_charset);
+	  }
 	;
 
 text_string:
@@ -7528,11 +7538,53 @@ TEXT_STRING_literal:
 	TEXT_STRING
 	{
 	  THD *thd= YYTHD;
-	  if (thd->charset_is_collation_connection)
-	    $$= $1;
+	  LEX *lex = Lex;
+
+	  if (lex->is_view_clause && !lex->is_query_rewrite_disabled)
+	  {
+	    /* This is CREATE VIEW statement. */
+            if (thd->charset() != system_charset_info)
+            {
+              thd->convert_string(&$$, system_charset_info, $1.str, $1.length,
+                                  thd->charset());
+            }
+
+	    lex->rewritten_query.append(
+              lex->unprocessed_query_ptr,
+              (char *) lex->tok_start - lex->unprocessed_query_ptr);
+
+	    // lex->rewritten_query.append("CONVERT(_utf8'");
+
+	    lex->rewritten_query.append("__as_");
+	    lex->rewritten_query.append(thd->charset()->csname);
+	    lex->rewritten_query.append(" '");
+
+	    {
+	      // Handle escape-sequences well.
+	      char str_holder[10000];
+	      String str(str_holder, 10000, system_charset_info);
+	      str.length(0);
+	      str.append($$.str, $$.length);
+
+	      str.print(&lex->rewritten_query);
+	    }
+
+	    lex->rewritten_query.append('\'');
+
+	    // lex->rewritten_query.append("' using ");
+	    // lex->rewritten_query.append(thd->charset()->csname);
+	    // lex->rewritten_query.append(")");
+
+	    lex->unprocessed_query_ptr= (char *) lex->ptr;
+	  }
 	  else
-	    thd->convert_string(&$$, thd->variables.collation_connection,
-				$1.str, $1.length, thd->charset());
+	  {
+	    if (thd->charset_is_collation_connection)
+	      $$= $1;
+	    else
+	      thd->convert_string(&$$, thd->variables.collation_connection,
+				  $1.str, $1.length, thd->charset());
+	  }
 	}
 	;
 
@@ -9137,7 +9189,7 @@ view_tail:
 	    YYABORT;
 	}
 	view_list_opt AS view_select view_check_option
-	{}
+	{ }
 	;
 
 view_list_opt:
@@ -9166,7 +9218,14 @@ view_select:
           lex->parsing_options.allows_select_into= FALSE;
           lex->parsing_options.allows_select_procedure= FALSE;
           lex->parsing_options.allows_derived= FALSE;
-        }        
+
+	  // Initialize members for rewriting query.
+
+          lex->is_view_clause= TRUE;
+
+          lex->rewritten_query.length(0);
+          lex->unprocessed_query_ptr= YYTHD->query;
+        }
         view_select_aux
         {
           LEX *lex= Lex;
Thread
bk commit into 5.0 tree (anozdrin:1.2299) BUG#16291Alexander Nozdrin1 Nov