MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:bar Date:October 9 2007 8:53am
Subject:bk commit into 5.1 tree (bar:1.2571) BUG#27287
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-10-09 13:53:39+05:00, bar@stripped +1 -0
  Bug#27287 extractvalue() (and updatexml()) extremely slow for large XML
  Performance improvements made.
  ExtractValue for large XML values is now much faster
  (about 2000 times faster of 1Mb-long XML values).

  sql/item_xmlfunc.cc@stripped, 2007-10-09 13:53:38+05:00, bar@stripped +24 -36
    Performance improvement for huge XML values:
    1. Avoid reallocs - reserve extra memory:
    using String::reserve() + String::q_append()
    instead of String::append()
    2. Parent is now not searched through the all previous
    nodes in backward direction. Instead, we do the following:
    - we introduce data->parent - a new member in MY_XML_USER_DATA
    - when entering a new tag/attribute node, we remember offset
      of the current node in data->parent
    - when leaving a tag/attribute node, we change data->parent
      to offset of the parent of the current node.

diff -Nrup a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
--- a/sql/item_xmlfunc.cc	2007-08-13 18:11:11 +05:00
+++ b/sql/item_xmlfunc.cc	2007-10-09 13:53:38 +05:00
@@ -2611,35 +2611,27 @@ typedef struct 
   uint level;
   String *pxml;         // parsed XML
   uint pos[MAX_LEVEL];  // Tag position stack
+  uint parent;          // Offset of the parent of the current node
 } MY_XML_USER_DATA;
 
 
-/*
-  Find the parent node
-
-  SYNOPSYS
-    Find the parent node, i.e. a tag or attrubute node on the given level.
-
-  RETURN
-    1 - success
-    0 - failure
-*/
-static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level)
+static bool
+append_node(String *str, MY_XML_NODE *node)
 {
-  if (!nitems)
-    return 0;
-
-  MY_XML_NODE *p, *last= &items[nitems-1];
-  for (p= last; p >= items; p--)
-  {
-    if (p->level == level &&
-        (p->type == MY_XML_NODE_TAG ||
-         p->type == MY_XML_NODE_ATTR))
-    {
-      return p - items;
-    }
-  } 
-  return 0;
+  /*
+   If "str" doesn't have space for a new node,
+   it will allocate two times more space that it has had so far.
+   (2*len+512) is a heuristic value,
+   which gave the best performance during tests.
+   The ideas behind this formula are:
+   - It allows to have a very small number of reallocs:
+     about 10 reallocs on a 1Mb-long XML value.
+   - At the same time, it avoids excessive memory use.
+  */
+  if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512))
+    return TRUE;
+  str->q_append((const char*) node, sizeof(MY_XML_NODE));
+  return FALSE;
 }
 
 
@@ -2661,19 +2653,17 @@ extern "C" int xml_enter(MY_XML_PARSER *
 int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len)
 {
   MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
-  MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
   uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
-  uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
   MY_XML_NODE node;
 
+  node.parent= data->parent; // Set parent for the new node to old parent
+  data->parent= numnodes;    // Remember current node as new parent
   data->pos[data->level]= numnodes;
   node.level= data->level++;
   node.type= st->current_node_type; // TAG or ATTR
   node.beg= attr;
   node.end= attr + len;
-  node.parent= parent;
-  data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
-  return MY_XML_OK;
+  return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
 }
 
 
@@ -2694,18 +2684,14 @@ extern "C" int xml_value(MY_XML_PARSER *
 int xml_value(MY_XML_PARSER *st,const char *attr, size_t len)
 {
   MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
-  MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
-  uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
-  uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
   MY_XML_NODE node;
   
+  node.parent= data->parent; // Set parent for the new text node to old parent
   node.level= data->level;
   node.type= MY_XML_NODE_TEXT;
   node.beg= attr;
   node.end= attr + len;
-  node.parent= parent;
-  data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
-  return MY_XML_OK;
+  return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
 }
 
 
@@ -2730,6 +2716,7 @@ int xml_leave(MY_XML_PARSER *st,const ch
   data->level--;
 
   MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
+  data->parent= nodes[data->parent].parent;
   nodes+= data->pos[data->level];
   nodes->tagend= st->cur;
 
@@ -2760,6 +2747,7 @@ String *Item_xml_str_func::parse_xml(Str
   p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION;
   user_data.level= 0;
   user_data.pxml= parsed_xml_buf;
+  user_data.parent= 0;
   my_xml_set_enter_handler(&p, xml_enter);
   my_xml_set_value_handler(&p, xml_value);
   my_xml_set_leave_handler(&p, xml_leave);
Thread
bk commit into 5.1 tree (bar:1.2571) BUG#27287bar9 Oct
  • Re: bk commit into 5.1 tree (bar:1.2571) BUG#27287Mats Kindahl11 Oct