List:Commits« Previous MessageNext Message »
From:Konstantin Osipov Date:June 3 2010 3:06pm
Subject:bzr push into mysql-trunk-runtime branch (kostja:3031 to 3033)
View as plain text  
 3033 Konstantin Osipov	2010-06-03 [merge]
      Merge trunk-runtime -> trunk-runtime-merge.

 3032 Konstantin Osipov	2010-06-03 [merge]
      Merge trunk-runtime -> trunk-runtime-stage.

    modified:
      sql/mdl.cc
      sql/mdl.h
 3031 Dmitry Lenev	2010-06-02
      Follow-up for draft the patch that changes approach to
      how we acquire metadata locks for DML statements and
      changes the way MDL locks are acquired/granted in
      contended case.
      
      Review fixes in progress.

    modified:
      sql/sql_base.cc
=== modified file 'sql/mdl.cc'
--- a/sql/mdl.cc	2010-06-02 12:45:20 +0000
+++ b/sql/mdl.cc	2010-06-03 15:06:24 +0000
@@ -105,23 +105,46 @@ enum enum_deadlock_weight
 };
 
 
-
 /**
   A context of the recursive traversal through all contexts
   in all sessions in search for deadlock.
 */
 
-class Deadlock_detection_context
+class Deadlock_detection_visitor
 {
 public:
-  Deadlock_detection_context(MDL_context *start_arg)
-    : start(start_arg),
-      victim(NULL),
-      current_search_depth(0)
-  { }
-  MDL_context *start;
-  MDL_context *victim;
-  uint current_search_depth;
+  Deadlock_detection_visitor(MDL_context *start_node_arg)
+    : m_start_node(start_node_arg),
+      m_victim(NULL),
+      m_current_search_depth(0)
+  {}
+  bool enter_node(MDL_context * /* unused */);
+  void leave_node(MDL_context * /* unused */);
+
+  bool inspect_edge(MDL_context *dest);
+
+  MDL_context *get_victim() const { return m_victim; }
+
+  /**
+    Change the deadlock victim to a new one if it has lower deadlock
+    weight.
+  */
+  MDL_context *opt_change_victim_to(MDL_context *new_victim);
+private:
+  /**
+    The context which has initiated the search. There
+    can be multiple searches happening in parallel at the same time.
+  */
+  MDL_context *m_start_node;
+  /** If a deadlock is found, the context that identifies the victim. */
+  MDL_context *m_victim;
+  /** Set to the 0 at start. Increased whenever
+    we descend into another MDL context (aka traverse to the next
+    wait-for graph node). When MAX_SEARCH_DEPTH is reached, we
+    assume that a deadlock is found, even if we have not found a
+    loop.
+  */
+  uint m_current_search_depth;
   /**
     Maximum depth for deadlock searches. After this depth is
     achieved we will unconditionally declare that there is a
@@ -140,6 +163,74 @@ public:
 
 
 /**
+  Enter a node of a wait-for graph. After
+  a node is entered, inspect_edge() will be called
+  for all wait-for destinations of this node. Then
+  leave_node() will be called.
+  We call "enter_node()" for all nodes we inspect, 
+  including the starting node.
+
+  @retval  TRUE  Maximum search depth exceeded.
+  @retval  FALSE OK.
+*/
+
+bool Deadlock_detection_visitor::enter_node(MDL_context * /* unused */)
+{
+  if (++m_current_search_depth >= MAX_SEARCH_DEPTH)
+    return TRUE;
+  return FALSE;
+}
+
+
+/**
+  Done inspecting this node. Decrease the search
+  depth. Clear the node for debug safety.
+*/
+
+void Deadlock_detection_visitor::leave_node(MDL_context * /* unused */)
+{
+  --m_current_search_depth;
+}
+
+
+/**
+  Inspect a wait-for graph edge from one MDL context to another.
+
+  @retval TRUE   A loop is found.
+  @retval FALSE  No loop is found.
+*/
+
+bool Deadlock_detection_visitor::inspect_edge(MDL_context *node)
+{
+  return node == m_start_node;
+}
+
+
+/**
+  Change the deadlock victim to a new one if it has lower deadlock
+  weight.
+
+  @retval new_victim  Victim is not changed.
+  @retval !new_victim New victim became the current.
+*/
+
+MDL_context *
+Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim)
+{
+  if (m_victim == NULL ||
+      m_victim->get_deadlock_weight() >= new_victim->get_deadlock_weight())
+  {
+    /* Swap victims, unlock the old one. */
+    MDL_context *tmp= m_victim;
+    m_victim= new_victim;
+    return tmp;
+  }
+  /* No change, unlock the current context. */
+  return new_victim;
+}
+
+
+/**
   Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps
   and compatibility matrices.
 */
@@ -261,7 +352,7 @@ public:
   void remove_ticket(Ticket_list MDL_lock::*queue, MDL_ticket *ticket);
 
   bool find_deadlock(MDL_ticket *waiting_ticket,
-                     Deadlock_detection_context *deadlock_ctx);
+                     Deadlock_detection_visitor *dvisitor);
 
   /** List of granted tickets for this lock. */
   Ticket_list m_granted;
@@ -397,7 +488,7 @@ mdl_locks_key(const uchar *record, size_
   statement, the design capitalizes on that to later save on
   look ups in the table definition cache. This leads to reduced
   contention overall and on LOCK_open in particular.
-  Please see the description of MDL_context::acquire_shared_lock()
+  Please see the description of MDL_context::acquire_lock_impl()
   for details.
 */
 
@@ -739,13 +830,6 @@ MDL_request::create(MDL_key::enum_mdl_na
 }
 
 
-uint MDL_request::get_deadlock_weight() const
-{
-  return key.mdl_namespace() == MDL_key::GLOBAL ||
-         type > MDL_SHARED_NO_WRITE ?
-    MDL_DEADLOCK_WEIGHT_DDL : MDL_DEADLOCK_WEIGHT_DML;
-}
-
 /**
   Auxiliary functions needed for creation/destruction of MDL_lock objects.
 
@@ -794,6 +878,21 @@ void MDL_ticket::destroy(MDL_ticket *tic
 
 
 /**
+  Return the 'weight' of this ticket for the
+  victim selection algorithm. Requests with 
+  lower weight are preferred to requests
+  with higher weight when choosing a victim.
+*/
+
+uint MDL_ticket::get_deadlock_weight() const
+{
+  return (m_lock->key.mdl_namespace() == MDL_key::GLOBAL ||
+          m_type > MDL_SHARED_NO_WRITE ?
+          MDL_DEADLOCK_WEIGHT_DDL : MDL_DEADLOCK_WEIGHT_DML);
+}
+
+
+/**
   Helper functions and macros to be used for killable waiting in metadata
   locking subsystem.
 
@@ -1670,7 +1769,6 @@ bool MDL_context::acquire_lock_impl(MDL_
 
     mysql_prlock_unlock(&lock->m_rwlock);
 
-    set_deadlock_weight(mdl_request->get_deadlock_weight());
     will_wait_for(ticket);
 
     /* There is a shared or exclusive lock on the object. */
@@ -1904,17 +2002,22 @@ MDL_context::upgrade_shared_lock_to_excl
 
 
 bool MDL_lock::find_deadlock(MDL_ticket *waiting_ticket,
-                             Deadlock_detection_context *deadlock_ctx)
+                             Deadlock_detection_visitor *dvisitor)
 {
   MDL_ticket *ticket;
-  bool result= FALSE;
+  MDL_context *src_ctx= waiting_ticket->get_ctx();
+  bool result= TRUE;
+
+  if (dvisitor->enter_node(src_ctx))
+    return TRUE;
 
   mysql_prlock_rdlock(&m_rwlock);
 
+  /* Must be initialized after taking a read lock. */
   Ticket_iterator granted_it(m_granted);
   Ticket_iterator waiting_it(m_waiting);
 
-  if (waiting_ticket->get_ctx()->peek_signal() != MDL_context::NO_WAKE_UP)
+  if (src_ctx->peek_signal() != MDL_context::NO_WAKE_UP)
   {
     /*
       State of MDL_lock object and MDL_context::m_waiting_for member are
@@ -1953,39 +2056,46 @@ bool MDL_lock::find_deadlock(MDL_ticket 
       peeking signal to the context waiting for it. To avoid races this has to
       be done under protection of MDL_lock::m_rwlock lock.
     */
+    result= FALSE;
     goto end;
   }
 
+  /*
+    We do a breadth-first search first -- that is, inspect all
+    edges of the current node, and only then follow up to the next
+    node. In workloads that involve wait-for graph loops this
+    has proven to be a more efficient strategy [citation missing].
+  */
   while ((ticket= granted_it++))
   {
-    if (ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
-        ticket->get_ctx() != waiting_ticket->get_ctx() &&
-        ticket->get_ctx() == deadlock_ctx->start)
+    /* Filter out edges that point to the same node. */
+    if (ticket->get_ctx() != src_ctx &&
+        ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
+        dvisitor->inspect_edge(ticket->get_ctx()))
     {
-      result= TRUE;
       goto end;
     }
   }
 
   while ((ticket= waiting_it++))
   {
-    if (ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
-        ticket->get_ctx() != waiting_ticket->get_ctx() &&
-        ticket->get_ctx() == deadlock_ctx->start)
+    /* Filter out edges that point to the same node. */
+    if (ticket->get_ctx() != src_ctx &&
+        ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
+        dvisitor->inspect_edge(ticket->get_ctx()))
     {
-      result= TRUE;
       goto end;
     }
   }
 
+  /* Recurse and inspect all adjacent nodes. */
   granted_it.rewind();
   while ((ticket= granted_it++))
   {
-    if (ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
-        ticket->get_ctx() != waiting_ticket->get_ctx() &&
-        ticket->get_ctx()->find_deadlock(deadlock_ctx))
+    if (ticket->get_ctx() != src_ctx &&
+        ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
+        ticket->get_ctx()->find_deadlock(dvisitor))
     {
-      result= TRUE;
       goto end;
     }
   }
@@ -1993,23 +2103,34 @@ bool MDL_lock::find_deadlock(MDL_ticket 
   waiting_it.rewind();
   while ((ticket= waiting_it++))
   {
-    if (ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
-        ticket->get_ctx() != waiting_ticket->get_ctx() &&
-        ticket->get_ctx()->find_deadlock(deadlock_ctx))
+    if (ticket->get_ctx() != src_ctx &&
+        ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
+        ticket->get_ctx()->find_deadlock(dvisitor))
     {
-      result= TRUE;
       goto end;
     }
   }
 
+  result= FALSE;
 end:
   mysql_prlock_unlock(&m_rwlock);
+  dvisitor->leave_node(src_ctx);
   return result;
 }
 
 
-bool MDL_context::find_deadlock(Deadlock_detection_context *deadlock_ctx)
+/**
+  Recursively traverse the wait-for graph of MDL contexts
+  in search for deadlocks.
+
+  @retval TRUE  A deadlock is found. A victim is remembered
+                by the visitor.
+  @retval FALSE
+*/
+
+bool MDL_context::find_deadlock(Deadlock_detection_visitor *dvisitor)
 {
+  MDL_context *m_unlock_ctx= this;
   bool result= FALSE;
 
   mysql_prlock_rdlock(&m_waiting_for_lock);
@@ -2027,70 +2148,64 @@ bool MDL_context::find_deadlock(Deadlock
     */
     if (peek_signal() != VICTIM_WAKE_UP)
     {
-
-      if (++deadlock_ctx->current_search_depth >
-          deadlock_ctx->MAX_SEARCH_DEPTH)
-        result= TRUE;
-      else
-        result= m_waiting_for->m_lock->find_deadlock(m_waiting_for,
-                                                     deadlock_ctx);
-      --deadlock_ctx->current_search_depth;
+      result= m_waiting_for->m_lock->find_deadlock(m_waiting_for, dvisitor);
+      if (result)
+        m_unlock_ctx= dvisitor->opt_change_victim_to(this);
     }
   }
-
-  if (result)
-  {
-    if (! deadlock_ctx->victim)
-      deadlock_ctx->victim= this;
-    else if (deadlock_ctx->victim->m_deadlock_weight >= m_deadlock_weight)
-    {
-      mysql_prlock_unlock(&deadlock_ctx->victim->m_waiting_for_lock);
-      deadlock_ctx->victim= this;
-    }
-    else
-      mysql_prlock_unlock(&m_waiting_for_lock);
-  }
-  else
-    mysql_prlock_unlock(&m_waiting_for_lock);
+  /*
+    We may recurse into the same MDL_context more than once
+    in case this is not the starting node. Make sure we release the
+    read lock as it's been taken, except for 1 read lock for
+    the deadlock victim.
+  */
+  if (m_unlock_ctx)
+    mysql_prlock_unlock(&m_unlock_ctx->m_waiting_for_lock);
 
   return result;
 }
 
 
 /**
-  Check if this context participates in a deadlock and if yes resolve it.
+  Try to find a deadlock. This function produces no errors.
 
   @note If during deadlock resolution context which performs deadlock
         detection is chosen as a victim it will be informed about the
         fact by posting VICTIM_WAKE_UP signal to its signal slot.
+
+  @retval TRUE  A deadlock is found.
+  @retval FALSE No deadlock found.
 */
 
 void MDL_context::find_deadlock()
 {
   while (1)
   {
+    MDL_context *victim;
     /*
-      The fact that we use fresh instance of deadlock_ctx for each
+      The fact that we use fresh instance of dvisitor for each
       search performed by find_deadlock() below is important, code
       responsible for victim selection relies on this.
     */
-    Deadlock_detection_context deadlock_ctx(this);
+    Deadlock_detection_visitor dvisitor(this);
 
-    if (! find_deadlock(&deadlock_ctx))
+    if (! find_deadlock(&dvisitor))
     {
       /* No deadlocks are found! */
       break;
     }
 
-    if (deadlock_ctx.victim != this)
+    victim= dvisitor.get_victim();
+
+    if (victim != this)
     {
       /*
         Failure to post signal to the victim is OK as it means
         that the victim has received some other signal and is
         about to stop its waiting/to break deadlock loop.
       */
-      (void) deadlock_ctx.victim->post_signal(VICTIM_WAKE_UP);
-      mysql_prlock_unlock(&deadlock_ctx.victim->m_waiting_for_lock);
+      (void) victim->post_signal(VICTIM_WAKE_UP);
+      mysql_prlock_unlock(&victim->m_waiting_for_lock);
       /*
         After adding new arc to waiting graph we found that it participates
         in some loop (i.e. there is a deadlock). We decided to destroy this
@@ -2111,9 +2226,9 @@ void MDL_context::find_deadlock()
         Failure to post signal to ourselves is OK as this means that our
         lock request was satisfied and deadlock went away.
       */
-      (void) deadlock_ctx.victim->post_signal(VICTIM_WAKE_UP);
-      DBUG_ASSERT(&deadlock_ctx.victim->m_waiting_for_lock == &m_waiting_for_lock);
-      mysql_prlock_unlock(&deadlock_ctx.victim->m_waiting_for_lock);
+      (void) victim->post_signal(VICTIM_WAKE_UP);
+      DBUG_ASSERT(&victim->m_waiting_for_lock == &m_waiting_for_lock);
+      mysql_prlock_unlock(&victim->m_waiting_for_lock);
       return;
     }
   }

=== modified file 'sql/mdl.h'
--- a/sql/mdl.h	2010-06-02 10:07:23 +0000
+++ b/sql/mdl.h	2010-06-03 14:42:02 +0000
@@ -34,7 +34,7 @@ class THD;
 class MDL_context;
 class MDL_lock;
 class MDL_ticket;
-class Deadlock_detection_context;
+class Deadlock_detection_visitor;
 
 /**
   Type of metadata lock request.
@@ -322,8 +322,6 @@ public:
     DBUG_ASSERT(ticket == NULL);
     type= type_arg;
   }
-  uint get_deadlock_weight() const;
-
   static MDL_request *create(MDL_key::enum_mdl_namespace mdl_namespace,
                              const char *db, const char *name,
                              enum_mdl_type mdl_type, MEM_ROOT *root);
@@ -417,6 +415,8 @@ public:
   bool is_incompatible_when_granted(enum_mdl_type type) const;
   bool is_incompatible_when_waiting(enum_mdl_type type) const;
 
+  /* A helper used to determine which lock request should be aborted. */
+  uint get_deadlock_weight() const;
 private:
   friend class MDL_context;
 
@@ -529,6 +529,9 @@ public:
 
   inline THD *get_thd() const { return m_thd; }
 
+  /** @pre Only valid if we started waiting for lock. */
+  inline uint get_deadlock_weight() const
+  { return m_waiting_for->get_deadlock_weight(); }
   /**
     Post signal to the context (and wake it up if necessary).
 
@@ -580,7 +583,7 @@ public:
     return m_needs_thr_lock_abort;
   }
 
-  bool find_deadlock(Deadlock_detection_context *deadlock_ctx);
+  bool find_deadlock(Deadlock_detection_visitor *dvisitor);
 private:
   /**
     All MDL tickets acquired by this connection.
@@ -692,17 +695,6 @@ private:
     mysql_prlock_unlock(&m_waiting_for_lock);
   }
 
-  void set_deadlock_weight(uint weight)
-  {
-    /*
-      m_deadlock_weight should not be modified while m_waiting_for is
-      non-NULL as in this case this context might participate in deadlock
-      and so m_deadlock_weight can be accessed from other threads.
-    */
-    DBUG_ASSERT(m_waiting_for == NULL);
-    m_deadlock_weight= weight;
-  }
-
   void stop_waiting()
   {
     mysql_prlock_wrlock(&m_waiting_for_lock);


Attachment: [text/bzr-bundle] bzr/kostja@sun.com-20100603150624-ycc2h7xhot3auvym.bundle
Thread
bzr push into mysql-trunk-runtime branch (kostja:3031 to 3033)Konstantin Osipov3 Jun