Below is the list of changes that have just been committed into a local
5.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2006-09-14 11:47:19+05:00, bar@stripped +4 -0
Bug#20854 XML functions: wrong result in ExtractValue
mysql-test/r/xml.result@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +27 -1
- Adding test case
- Fixing error message
mysql-test/t/xml.test@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +16 -0
Adding test case
sql/item_xmlfunc.cc@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +58 -6
For grammar rules with loops like:
AdditiveExpr ::= MultiplicativeExpr ('+' MultiplicativeExpr)*
If we scanned scanned '+' and then met an error when parsing
MultiplicativeExpr, then we should fully stop parsing - without
trying to apply any other rules.
Fix: add "error" member into MY_XPATH structure,
and make my_xpath_parse_term() never return success
as soon as error set.
strings/xml.c@stripped, 2006-09-14 11:47:14+05:00, bar@stripped +51 -9
Adding my_xml_ctype map for flags, indicating
whether a character is a space character, is a
valid identifier start character, is a valid
identifier body character. Using this map to
properly scan identifiers. Also, using this map
to scan spaces faster (instead of strchr).
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: bar
# Host: bar.intranet.mysql.r18.ru
# Root: /usr/home/bar/mysql-5.1.20854
--- 1.12/strings/xml.c 2006-09-14 11:47:28 +05:00
+++ 1.13/strings/xml.c 2006-09-14 11:47:28 +05:00
@@ -19,6 +19,7 @@
#include "my_xml.h"
+#define MY_XML_UNKNOWN 'U'
#define MY_XML_EOF 'E'
#define MY_XML_STRING 'S'
#define MY_XML_IDENT 'I'
@@ -39,6 +40,46 @@
} MY_XML_ATTR;
+/*
+ XML ctype:
+*/
+#define MY_XML_ID0 0x01 /* Identifier initial character */
+#define MY_XML_ID1 0x02 /* Identifier medial character */
+#define MY_XML_SPC 0x08 /* Spacing character */
+
+
+/*
+ http://www.w3.org/TR/REC-xml/
+ [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
+ CombiningChar | Extender
+ [5] Name ::= (Letter | '_' | ':') (NameChar)*
+*/
+
+static char my_xml_ctype[256]=
+{
+/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
+/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
+/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
+/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
+/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
+/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
+/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
+/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
+};
+
+#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
+#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
+#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
+
+
static const char *lex2str(int lex)
{
switch(lex)
@@ -56,13 +97,13 @@
case MY_XML_QUESTION: return "'?'";
case MY_XML_EXCLAM: return "'!'";
}
- return "UNKNOWN";
+ return "unknown token";
}
static void my_xml_norm_text(MY_XML_ATTR *a)
{
- for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ );
- for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- );
+ for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
+ for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
}
@@ -70,7 +111,7 @@
{
int lex;
- for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++);
+ for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
if (p->cur >= p->end)
{
@@ -124,16 +165,17 @@
my_xml_norm_text(a);
lex=MY_XML_STRING;
}
- else
+ else if (my_xml_is_id0(p->cur[0]))
{
- for(;
- (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]);
- p->cur++)
- {}
+ p->cur++;
+ while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
+ p->cur++;
a->end=p->cur;
my_xml_norm_text(a);
lex=MY_XML_IDENT;
}
+ else
+ lex= MY_XML_UNKNOWN;
#if 0
printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
--- 1.17/mysql-test/r/xml.result 2006-09-14 11:47:28 +05:00
+++ 1.18/mysql-test/r/xml.result 2006-09-14 11:47:28 +05:00
@@ -570,7 +570,7 @@
extractvalue('<a>a<b>B</b></a>','a|/b')
a
select extractvalue('<a>A</a>','/<a>');
-ERROR HY000: XPATH syntax error: '<a>'
+ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>'
select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!');
ERROR HY000: XPATH syntax error: '!'
select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*');
@@ -710,3 +710,29 @@
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something')
Otherdata
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
+ERROR HY000: XPATH syntax error: '02'
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
+extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*')
+NULL
+Warnings:
+Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)'
+select extractValue('<.>test</.>','//*');
+extractValue('<.>test</.>','//*')
+NULL
+Warnings:
+Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
+select extractValue('<->test</->','//*');
+extractValue('<->test</->','//*')
+NULL
+Warnings:
+Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
+select extractValue('<:>test</:>','//*');
+extractValue('<:>test</:>','//*')
+test
+select extractValue('<_>test</_>','//*');
+extractValue('<_>test</_>','//*')
+test
+select extractValue('<x.-_:>test</x.-_:>','//*');
+extractValue('<x.-_:>test</x.-_:>','//*')
+test
--- 1.16/mysql-test/t/xml.test 2006-09-14 11:47:28 +05:00
+++ 1.17/mysql-test/t/xml.test 2006-09-14 11:47:28 +05:00
@@ -360,3 +360,19 @@
#
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar');
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
+
+#
+# Bug#20854 XML functions: wrong result in ExtractValue
+#
+--error 1105
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
+select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
+# dot and dash are bad identtifier start character
+select extractValue('<.>test</.>','//*');
+select extractValue('<->test</->','//*');
+# semicolon is good identifier start character
+select extractValue('<:>test</:>','//*');
+# underscore is good identifier start character
+select extractValue('<_>test</_>','//*');
+# dot, dash, underscore and semicolon are good identifier middle characters
+select extractValue('<x.-_:>test</x.-_:>','//*');
--- 1.19/sql/item_xmlfunc.cc 2006-09-14 11:47:28 +05:00
+++ 1.20/sql/item_xmlfunc.cc 2006-09-14 11:47:28 +05:00
@@ -105,6 +105,7 @@
String *context_cache; /* last context provider */
String *pxml; /* Parsed XML, an array of MY_XML_NODE */
CHARSET_INFO *cs; /* character set/collation string comparison */
+ int error;
} MY_XPATH;
@@ -913,7 +914,9 @@
RETURN
The newly created item.
*/
-static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
+static Item *create_comparator(MY_XPATH *xpath,
+ int oper, MY_XPATH_LEX *context,
+ Item *a, Item *b)
{
if (a->type() != Item::XPATH_NODESET &&
b->type() != Item::XPATH_NODESET)
@@ -923,6 +926,13 @@
else if (a->type() == Item::XPATH_NODESET &&
b->type() == Item::XPATH_NODESET)
{
+ uint len= context->end - context->beg;
+ set_if_bigger(len, 32);
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "XPATH error: "
+ "comparison of two nodesets is not supported: '%.*s'",
+ MYF(0), len, context->beg);
+
return 0; // TODO: Comparison of two nodesets
}
else
@@ -1430,7 +1440,7 @@
static int
my_xpath_parse_term(MY_XPATH *xpath, int term)
{
- if (xpath->lasttok.term == term)
+ if (xpath->lasttok.term == term && !xpath->error)
{
xpath->prevtok= xpath->lasttok;
my_xpath_lex_scan(xpath, &xpath->lasttok,
@@ -1558,8 +1568,9 @@
return my_xpath_parse_RelativeLocationPath(xpath);
}
- return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) ||
- my_xpath_parse_RelativeLocationPath(xpath);
+ my_xpath_parse_RelativeLocationPath(xpath);
+
+ return (xpath->error == 0);
}
@@ -1596,7 +1607,10 @@
"*", 1,
xpath->pxml, 1);
if (!my_xpath_parse_Step(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
}
return 1;
}
@@ -1633,10 +1647,16 @@
xpath->context_cache= context_cache;
if(!my_xpath_parse_PredicateExpr(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB))
+ {
+ xpath->error= 1;
return 0;
+ }
xpath->item= nodeset2bool(xpath, xpath->item);
@@ -1893,7 +1913,10 @@
if (!my_xpath_parse_PathExpr(xpath)
|| xpath->item->type() != Item::XPATH_NODESET)
+ {
+ xpath->error= 1;
return 0;
+ }
xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml);
}
return 1;
@@ -1929,6 +1952,7 @@
{
return my_xpath_parse_LocationPath(xpath) ||
my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath);
+
}
@@ -1975,7 +1999,10 @@
{
Item *prev= xpath->item;
if (!my_xpath_parse_AndExpr(xpath))
+ {
return 0;
+ xpath->error= 1;
+ }
xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
nodeset2bool(xpath, xpath->item));
}
@@ -2003,7 +2030,10 @@
{
Item *prev= xpath->item;
if (!my_xpath_parse_EqualityExpr(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
xpath->item= new Item_cond_and(nodeset2bool(xpath,prev),
nodeset2bool(xpath,xpath->item));
@@ -2057,17 +2087,26 @@
}
static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath)
{
+ MY_XPATH_LEX operator_context;
if (!my_xpath_parse_RelationalExpr(xpath))
return 0;
+
+ operator_context= xpath->lasttok;
while (my_xpath_parse_EqualityOperator(xpath))
{
Item *prev= xpath->item;
int oper= xpath->extra;
if (!my_xpath_parse_RelationalExpr(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
- if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
+ if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
+ prev, xpath->item)))
return 0;
+
+ operator_context= xpath->lasttok;
}
return 1;
}
@@ -2109,18 +2148,25 @@
}
static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath)
{
+ MY_XPATH_LEX operator_context;
if (!my_xpath_parse_AdditiveExpr(xpath))
return 0;
+ operator_context= xpath->lasttok;
while (my_xpath_parse_RelationalOperator(xpath))
{
Item *prev= xpath->item;
int oper= xpath->extra;
if (!my_xpath_parse_AdditiveExpr(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
- if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
+ if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
+ prev, xpath->item)))
return 0;
+ operator_context= xpath->lasttok;
}
return 1;
}
@@ -2153,7 +2199,10 @@
int oper= xpath->prevtok.term;
Item *prev= xpath->item;
if (!my_xpath_parse_MultiplicativeExpr(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
if (oper == MY_XPATH_LEX_PLUS)
xpath->item= new Item_func_plus(prev, xpath->item);
@@ -2198,7 +2247,10 @@
int oper= xpath->prevtok.term;
Item *prev= xpath->item;
if (!my_xpath_parse_UnaryExpr(xpath))
+ {
+ xpath->error= 1;
return 0;
+ }
switch (oper)
{
case MY_XPATH_LEX_ASTERISK:
| Thread |
|---|
| • bk commit into 5.1 tree (bar:1.2313) BUG#20854 | bar | 14 Sep |