Hi Bar!
Patch looks good. OK to push.
Just my few cents,
Mats Kindahl
bar@stripped wrote:
> Below is the list of changes that have just been committed into a local
> 5.1 repository of bar. When bar does a push these changes will
> be propagated to the main repository and, within 24 hours after the
> push, to the public repository.
> For information on how to access the public repository
> see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
>
> ChangeSet@stripped, 2007-10-09 13:53:39+05:00, bar@stripped +1 -0
> Bug#27287 extractvalue() (and updatexml()) extremely slow for large XML
> Performance improvements made.
> ExtractValue for large XML values is now much faster
> (about 2000 times faster of 1Mb-long XML values).
>
> sql/item_xmlfunc.cc@stripped, 2007-10-09 13:53:38+05:00, bar@stripped +24 -36
> Performance improvement for huge XML values:
> 1. Avoid reallocs - reserve extra memory:
> using String::reserve() + String::q_append()
> instead of String::append()
> 2. Parent is now not searched through the all previous
> nodes in backward direction. Instead, we do the following:
> - we introduce data->parent - a new member in MY_XML_USER_DATA
> - when entering a new tag/attribute node, we remember offset
> of the current node in data->parent
> - when leaving a tag/attribute node, we change data->parent
> to offset of the parent of the current node.
>
> diff -Nrup a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
> --- a/sql/item_xmlfunc.cc 2007-08-13 18:11:11 +05:00
> +++ b/sql/item_xmlfunc.cc 2007-10-09 13:53:38 +05:00
> @@ -2611,35 +2611,27 @@ typedef struct
> uint level;
> String *pxml; // parsed XML
> uint pos[MAX_LEVEL]; // Tag position stack
> + uint parent; // Offset of the parent of the current node
>
> } MY_XML_USER_DATA;
>
>
> -/*
> - Find the parent node
> -
> - SYNOPSYS
> - Find the parent node, i.e. a tag or attrubute node on the given level.
> -
> - RETURN
> - 1 - success
> - 0 - failure
> -*/
> -static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level)
> +static bool
> +append_node(String *str, MY_XML_NODE *node)
> {
> - if (!nitems)
> - return 0;
> -
> - MY_XML_NODE *p, *last= &items[nitems-1];
> - for (p= last; p >= items; p--)
> - {
> - if (p->level == level &&
> - (p->type == MY_XML_NODE_TAG ||
> - p->type == MY_XML_NODE_ATTR))
> - {
> - return p - items;
> - }
> - }
> - return 0;
> + /*
> + If "str" doesn't have space for a new node,
> + it will allocate two times more space that it has had so far.
> + (2*len+512) is a heuristic value,
> + which gave the best performance during tests.
> + The ideas behind this formula are:
> + - It allows to have a very small number of reallocs:
> + about 10 reallocs on a 1Mb-long XML value.
> + - At the same time, it avoids excessive memory use.
> + */
> + if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512))
> + return TRUE;
> + str->q_append((const char*) node, sizeof(MY_XML_NODE));
> + return FALSE;
> }
>
>
> @@ -2661,19 +2653,17 @@ extern "C" int xml_enter(MY_XML_PARSER *
> int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len)
> {
> MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
> - MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
> uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
> - uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
> MY_XML_NODE node;
>
> + node.parent= data->parent; // Set parent for the new node to old parent
> + data->parent= numnodes; // Remember current node as new parent
> data->pos[data->level]= numnodes;
> node.level= data->level++;
> node.type= st->current_node_type; // TAG or ATTR
> node.beg= attr;
> node.end= attr + len;
> - node.parent= parent;
> - data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
> - return MY_XML_OK;
> + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
> }
>
>
> @@ -2694,18 +2684,14 @@ extern "C" int xml_value(MY_XML_PARSER *
> int xml_value(MY_XML_PARSER *st,const char *attr, size_t len)
> {
> MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
> - MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
> - uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
> - uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
> MY_XML_NODE node;
>
> + node.parent= data->parent; // Set parent for the new text node to old parent
> node.level= data->level;
> node.type= MY_XML_NODE_TEXT;
> node.beg= attr;
> node.end= attr + len;
> - node.parent= parent;
> - data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
> - return MY_XML_OK;
> + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
> }
>
>
> @@ -2730,6 +2716,7 @@ int xml_leave(MY_XML_PARSER *st,const ch
> data->level--;
>
> MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
> + data->parent= nodes[data->parent].parent;
> nodes+= data->pos[data->level];
> nodes->tagend= st->cur;
>
> @@ -2760,6 +2747,7 @@ String *Item_xml_str_func::parse_xml(Str
> p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION;
> user_data.level= 0;
> user_data.pxml= parsed_xml_buf;
> + user_data.parent= 0;
> my_xml_set_enter_handler(&p, xml_enter);
> my_xml_set_value_handler(&p, xml_value);
> my_xml_set_leave_handler(&p, xml_leave);
>
>
--
Mats Kindahl
Lead Software Developer
Replication Team
MySQL AB, www.mysql.com