List:Commits« Previous MessageNext Message »
From:bar Date:September 14 2006 8:47am
Subject:bk commit into 5.1 tree (bar:1.2313) BUG#20854
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-09-14 11:47:19+05:00, bar@stripped +4 -0
  Bug#20854 XML functions: wrong result in ExtractValue

  mysql-test/r/xml.result@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +27 -1
    - Adding test case
    - Fixing error message

  mysql-test/t/xml.test@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +16 -0
    Adding test case

  sql/item_xmlfunc.cc@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +58 -6
    For grammar rules with loops like:
    
      AdditiveExpr ::= MultiplicativeExpr ('+' MultiplicativeExpr)*
    
    If we scanned scanned '+' and then met an error when parsing
    MultiplicativeExpr, then we should fully stop parsing - without
    trying to apply any other rules.
    
    Fix: add "error" member into MY_XPATH structure,
    and make my_xpath_parse_term() never return success
    as soon as error set.

  strings/xml.c@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +51 -9
    Adding my_xml_ctype map for flags, indicating
    whether a character is a space character, is a
    valid identifier start character, is a valid
    identifier body character. Using this map to
    properly scan identifiers. Also, using this map
    to scan spaces faster (instead of strchr).

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.intranet.mysql.r18.ru
# Root:	/usr/home/bar/mysql-5.1.20854

--- 1.12/strings/xml.c	2006-09-14 11:47:28 +05:00
+++ 1.13/strings/xml.c	2006-09-14 11:47:28 +05:00
@@ -19,6 +19,7 @@
 #include "my_xml.h"
 
 
+#define MY_XML_UNKNOWN  'U'
 #define MY_XML_EOF	'E'
 #define MY_XML_STRING	'S'
 #define MY_XML_IDENT	'I'
@@ -39,6 +40,46 @@
 } MY_XML_ATTR;
 
 
+/*
+  XML ctype:
+*/
+#define	MY_XML_ID0  0x01 /* Identifier initial character */
+#define	MY_XML_ID1  0x02 /* Identifier medial  character */
+#define	MY_XML_SPC  0x08 /* Spacing character */
+
+
+/*
+ http://www.w3.org/TR/REC-xml/ 
+ [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
+                  CombiningChar | Extender
+ [5] Name ::= (Letter | '_' | ':') (NameChar)*
+*/
+
+static char my_xml_ctype[256]=
+{
+/*00*/  0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
+/*10*/  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+/*20*/  8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,  /*  !"#$%&'()*+,-./ */
+/*30*/  2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0,  /* 0123456789:;<=>? */
+/*40*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* @ABCDEFGHIJKLMNO */
+/*50*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,  /* PQRSTUVWXYZ[\]^_ */
+/*60*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* `abcdefghijklmno */
+/*70*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,  /* pqrstuvwxyz{|}~  */
+/*80*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*90*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*A0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*B0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*C0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*D0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*E0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*F0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
+};
+
+#define my_xml_is_space(c)  (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
+#define my_xml_is_id0(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
+#define my_xml_is_id1(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
+
+
 static const char *lex2str(int lex)
 {
   switch(lex)
@@ -56,13 +97,13 @@
     case MY_XML_QUESTION: return "'?'";
     case MY_XML_EXCLAM:   return "'!'";
   }
-  return "UNKNOWN";
+  return "unknown token";
 }
 
 static void my_xml_norm_text(MY_XML_ATTR *a)
 {
-  for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ );
-  for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- );
+  for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
+  for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
 }
 
 
@@ -70,7 +111,7 @@
 {
   int lex;
   
-  for(  ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ;  p->cur++);
+  for(  ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ;  p->cur++);
   
   if (p->cur >= p->end)
   {
@@ -124,16 +165,17 @@
       my_xml_norm_text(a);
     lex=MY_XML_STRING;
   }
-  else
+  else if (my_xml_is_id0(p->cur[0]))
   {
-    for(;
-	(p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]);
-	p->cur++)
-    {}
+    p->cur++;
+    while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
+      p->cur++;
     a->end=p->cur;
     my_xml_norm_text(a);
     lex=MY_XML_IDENT;
   }
+  else
+    lex= MY_XML_UNKNOWN;
 
 #if 0
   printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);

--- 1.17/mysql-test/r/xml.result	2006-09-14 11:47:28 +05:00
+++ 1.18/mysql-test/r/xml.result	2006-09-14 11:47:28 +05:00
@@ -570,7 +570,7 @@
 extractvalue('<a>a<b>B</b></a>','a|/b')
 a
 select extractvalue('<a>A</a>','/<a>');
-ERROR HY000: XPATH syntax error: '<a>'
+ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>'
 select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!');
 ERROR HY000: XPATH syntax error: '!'
 select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*');
@@ -710,3 +710,29 @@
 select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
 extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something')
 Otherdata
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
+ERROR HY000: XPATH syntax error: '02'
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
+extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*')
+NULL
+Warnings:
+Warning	1512	Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)'
+select extractValue('<.>test</.>','//*');
+extractValue('<.>test</.>','//*')
+NULL
+Warnings:
+Warning	1512	Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
+select extractValue('<->test</->','//*');
+extractValue('<->test</->','//*')
+NULL
+Warnings:
+Warning	1512	Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
+select extractValue('<:>test</:>','//*');
+extractValue('<:>test</:>','//*')
+test
+select extractValue('<_>test</_>','//*');
+extractValue('<_>test</_>','//*')
+test
+select extractValue('<x.-_:>test</x.-_:>','//*');
+extractValue('<x.-_:>test</x.-_:>','//*')
+test

--- 1.16/mysql-test/t/xml.test	2006-09-14 11:47:28 +05:00
+++ 1.17/mysql-test/t/xml.test	2006-09-14 11:47:28 +05:00
@@ -360,3 +360,19 @@
 #
 select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar');
 select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
+
+#
+# Bug#20854 XML functions: wrong result in ExtractValue
+#
+--error 1105
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
+# dot and dash are bad identtifier start character
+select extractValue('<.>test</.>','//*');
+select extractValue('<->test</->','//*');
+# semicolon is good identifier start character
+select extractValue('<:>test</:>','//*');
+# underscore is good identifier start character
+select extractValue('<_>test</_>','//*');
+# dot, dash, underscore and semicolon are good identifier middle characters
+select extractValue('<x.-_:>test</x.-_:>','//*');

--- 1.19/sql/item_xmlfunc.cc	2006-09-14 11:47:28 +05:00
+++ 1.20/sql/item_xmlfunc.cc	2006-09-14 11:47:28 +05:00
@@ -105,6 +105,7 @@
   String *context_cache; /* last context provider                     */
   String *pxml;          /* Parsed XML, an array of MY_XML_NODE       */
   CHARSET_INFO *cs;      /* character set/collation string comparison */
+  int error;
 } MY_XPATH;
 
 
@@ -913,7 +914,9 @@
   RETURN
     The newly created item.
 */
-static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
+static Item *create_comparator(MY_XPATH *xpath,
+                               int oper, MY_XPATH_LEX *context,
+                               Item *a, Item *b)
 {
   if (a->type() != Item::XPATH_NODESET &&
       b->type() != Item::XPATH_NODESET)
@@ -923,6 +926,13 @@
   else if (a->type() == Item::XPATH_NODESET &&
            b->type() == Item::XPATH_NODESET)
   {
+    uint len= context->end - context->beg;
+    set_if_bigger(len, 32);
+    my_printf_error(ER_UNKNOWN_ERROR,
+                    "XPATH error: "
+                    "comparison of two nodesets is not supported: '%.*s'",
+                    MYF(0), len, context->beg);
+
     return 0; // TODO: Comparison of two nodesets
   }
   else
@@ -1430,7 +1440,7 @@
 static int
 my_xpath_parse_term(MY_XPATH *xpath, int term)
 {
-  if (xpath->lasttok.term == term)
+  if (xpath->lasttok.term == term && !xpath->error)
   {
     xpath->prevtok= xpath->lasttok;
     my_xpath_lex_scan(xpath, &xpath->lasttok,
@@ -1558,8 +1568,9 @@
     return my_xpath_parse_RelativeLocationPath(xpath);
   }
 
-  return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) ||
-         my_xpath_parse_RelativeLocationPath(xpath);
+  my_xpath_parse_RelativeLocationPath(xpath);
+ 
+  return (xpath->error == 0);
 }
 
 
@@ -1596,7 +1607,10 @@
                                                              "*", 1,
                                                              xpath->pxml, 1);
     if (!my_xpath_parse_Step(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
   }
   return 1;
 }
@@ -1633,10 +1647,16 @@
     xpath->context_cache= context_cache;
 
     if(!my_xpath_parse_PredicateExpr(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
 
     if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB))
+    {
+      xpath->error= 1;
       return 0;
+    }
 
     xpath->item= nodeset2bool(xpath, xpath->item);
 
@@ -1893,7 +1913,10 @@
     
     if (!my_xpath_parse_PathExpr(xpath)
         || xpath->item->type() != Item::XPATH_NODESET)
+    {
+      xpath->error= 1;
       return 0;
+    }
     xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml);
   }
   return 1;
@@ -1929,6 +1952,7 @@
 {
   return my_xpath_parse_LocationPath(xpath) || 
          my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath);
+         
 }
 
 
@@ -1975,7 +1999,10 @@
   {
     Item *prev= xpath->item;
     if (!my_xpath_parse_AndExpr(xpath))
+    {
       return 0;
+      xpath->error= 1;
+    }
     xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
                                   nodeset2bool(xpath, xpath->item));
   }
@@ -2003,7 +2030,10 @@
   {
     Item *prev= xpath->item;
     if (!my_xpath_parse_EqualityExpr(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
 
     xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), 
                                    nodeset2bool(xpath,xpath->item));
@@ -2057,17 +2087,26 @@
 }
 static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath)
 {
+  MY_XPATH_LEX operator_context;
   if (!my_xpath_parse_RelationalExpr(xpath))
     return 0;
+
+  operator_context= xpath->lasttok;
   while (my_xpath_parse_EqualityOperator(xpath))
   {
     Item *prev= xpath->item;
     int oper= xpath->extra;
     if (!my_xpath_parse_RelationalExpr(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
 
-    if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
+    if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
+                                         prev, xpath->item)))
       return 0;
+
+    operator_context= xpath->lasttok;
   }
   return 1;
 }
@@ -2109,18 +2148,25 @@
 }
 static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath)
 {
+  MY_XPATH_LEX operator_context;
   if (!my_xpath_parse_AdditiveExpr(xpath))
     return 0;
+  operator_context= xpath->lasttok;
   while (my_xpath_parse_RelationalOperator(xpath))
   {
     Item *prev= xpath->item;
     int oper= xpath->extra;
 
     if (!my_xpath_parse_AdditiveExpr(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
 
-    if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
+    if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
+                                         prev, xpath->item)))
       return 0;
+    operator_context= xpath->lasttok;
   }
   return 1;
 }
@@ -2153,7 +2199,10 @@
     int oper= xpath->prevtok.term;
     Item *prev= xpath->item;
     if (!my_xpath_parse_MultiplicativeExpr(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
 
     if (oper == MY_XPATH_LEX_PLUS)
       xpath->item= new Item_func_plus(prev, xpath->item);
@@ -2198,7 +2247,10 @@
     int oper= xpath->prevtok.term;
     Item *prev= xpath->item;
     if (!my_xpath_parse_UnaryExpr(xpath))
+    {
+      xpath->error= 1;
       return 0;
+    }
     switch (oper)
     {
       case MY_XPATH_LEX_ASTERISK:
Thread
bk commit into 5.1 tree (bar:1.2313) BUG#20854bar14 Sep