List:Commits« Previous MessageNext Message »
From:Dmitry Lenev Date:January 28 2010 12:08pm
Subject:bzr commit into mysql-5.6-next-mr branch (dlenev:3069) Bug#46272
View as plain text  
#At file:///home/dlenev/src/bzr/mysql-next-4284-nl-adv-deadlock/ based on revid:kostja@stripped

 3069 Dmitry Lenev	2010-01-28
      Tentative patch implementing new type-of-operation-aware metadata locks.
      Fixes bug #46272 "MySQL 5.4.4, new MDL: unnecessary deadlock" and bug
      #37346 "innodb does not detect deadlock between update and alter table".
      
      Work in progress. Implemented advanced deadlock detector inspired
      by mysys's waiting_threads.c. This step should decrease probability
      of unwarranted ER_LOCK_DEADLOCK errors which has increased in some
      situations after introduction of type-of-operation-aware metadata
      locks.

    modified:
      mysql-test/r/mdl_sync.result
      mysql-test/t/mdl_sync.test
      sql/mdl.cc
      sql/mdl.h
      sql/sql_base.cc
=== modified file 'mysql-test/r/mdl_sync.result'
--- a/mysql-test/r/mdl_sync.result	2010-01-27 16:44:35 +0000
+++ b/mysql-test/r/mdl_sync.result	2010-01-28 12:08:24 +0000
@@ -1790,53 +1790,46 @@ commit;
 #
 # Switching to connection 'deadlock_con1'.
 begin;
-insert into t1 values (1);
-#
-# Switching to connection 'deadlock_con2'.
-begin;
-insert into t3 values (1);
+insert into t2 values (1);
 #
 # Switching to connection 'default'.
-# Send:
-rename table t2 to t0, t3 to t2, t0 to t3;;
+lock table t1 write;
 #
 # Switching to connection 'deadlock_con1'.
-# Wait until the above RENAME TABLE is blocked because it has to wait
-# for 'deadlock_con2' which holds shared metadata lock on 't3'.
 # The below SELECT statement should wait for metadata lock
-# on table 't2' and should not produce ER_LOCK_DEADLOCK
+# on table 't1' and should not produce ER_LOCK_DEADLOCK
 # immediately as no deadlock is possible at the moment.
-select * from t2;;
+select * from t1;;
 #
-# Switching to connection 'deadlock_con3'.
-# Wait until the above SELECT * FROM t2 is starts waiting
-# for an exclusive metadata lock to go away.
+# Switching to connection 'deadlock_con2'.
+# Wait until the above SELECT * FROM t1 is starts waiting
+# for an UNRW metadata lock to go away.
 # Send RENAME TABLE statement that will deadlock with the
 # SELECT statement and thus should abort the latter.
-rename table t1 to t5, t2 to t1, t5 to t2;;
+rename table t1 to t0, t2 to t1, t0 to t2;;
+#
+# Switching to connection 'default'.
+# Wait till above RENAME TABLE is blocked while holding
+# pending X lock on t1.
+# Allow the above RENAME TABLE to acquire lock on t1 and
+# create pending lock on t2 thus creating deadlock.
+unlock tables;
 #
 # Switching to connection 'deadlock_con1'.
 # Since the latest RENAME TABLE entered in deadlock with SELECT
 # statement the latter should be aborted and emit ER_LOCK_DEADLOCK
 # error.
-# Reap SELECT * FROM t2.
+# Reap SELECT * FROM t1.
 ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
 #
 # Again let us check that failure of the SELECT statement has not
-# released metadata lock on table 't1', i.e. that the latest RENAME
+# released metadata lock on table 't2', i.e. that the latest RENAME
 # is blocked.
 # Commit transaction to unblock this RENAME TABLE.
 commit;
 #
 # Switching to connection 'deadlock_con2'.
-# Commit transaction to unblock the first RENAME TABLE.
-commit;
-#
-# Switching to connection 'default'.
-# Reap RENAME TABLE t2 TO t0 ... .
-#
-# Switching to connection 'deadlock_con3'.
-# Reap RENAME TABLE t1 TO t5 ... .
+# Reap RENAME TABLE ... .
 #
 # Switching to connection 'default'.
 drop tables t1, t2, t3, t4;

=== modified file 'mysql-test/t/mdl_sync.test'
--- a/mysql-test/t/mdl_sync.test	2010-01-27 16:44:35 +0000
+++ b/mysql-test/t/mdl_sync.test	2010-01-28 12:08:24 +0000
@@ -2457,47 +2457,47 @@ connection default;
 --echo # Switching to connection 'deadlock_con1'.
 connection deadlock_con1;
 begin;
-insert into t1 values (1);
-
---echo #
---echo # Switching to connection 'deadlock_con2'.
-connection deadlock_con2;
-begin;
-insert into t3 values (1);
+insert into t2 values (1);
 
 --echo #
 --echo # Switching to connection 'default'.
 connection default;
---echo # Send:
---send rename table t2 to t0, t3 to t2, t0 to t3;
+lock table t1 write;
 
 --echo #
 --echo # Switching to connection 'deadlock_con1'.
 connection deadlock_con1;
---echo # Wait until the above RENAME TABLE is blocked because it has to wait
---echo # for 'deadlock_con2' which holds shared metadata lock on 't3'.
-let $wait_condition=
-  select count(*) = 1 from information_schema.processlist
-  where state = "Waiting for table" and info = "rename table t2 to t0, t3 to t2, t0 to t3";
---source include/wait_condition.inc
 --echo # The below SELECT statement should wait for metadata lock
---echo # on table 't2' and should not produce ER_LOCK_DEADLOCK
+--echo # on table 't1' and should not produce ER_LOCK_DEADLOCK
 --echo # immediately as no deadlock is possible at the moment.
---send select * from t2;
+--send select * from t1;
 
 --echo #
---echo # Switching to connection 'deadlock_con3'.
-connection deadlock_con3;
---echo # Wait until the above SELECT * FROM t2 is starts waiting
---echo # for an exclusive metadata lock to go away.
+--echo # Switching to connection 'deadlock_con2'.
+connection deadlock_con2;
+--echo # Wait until the above SELECT * FROM t1 is starts waiting
+--echo # for an UNRW metadata lock to go away.
 let $wait_condition=
   select count(*) = 1 from information_schema.processlist
-  where state = "Waiting for table" and info = "select * from t2";
+  where state = "Waiting for table" and info = "select * from t1";
 --source include/wait_condition.inc
 
 --echo # Send RENAME TABLE statement that will deadlock with the
 --echo # SELECT statement and thus should abort the latter.
---send rename table t1 to t5, t2 to t1, t5 to t2;
+--send rename table t1 to t0, t2 to t1, t0 to t2;
+
+--echo #
+--echo # Switching to connection 'default'.
+connection default;
+--echo # Wait till above RENAME TABLE is blocked while holding
+--echo # pending X lock on t1.
+let $wait_condition=
+  select count(*) = 1 from information_schema.processlist
+  where state = "Waiting for table" and info = "rename table t1 to t0, t2 to t1, t0 to t2";
+--source include/wait_condition.inc
+--echo # Allow the above RENAME TABLE to acquire lock on t1 and
+--echo # create pending lock on t2 thus creating deadlock.
+unlock tables;
 
 --echo #
 --echo # Switching to connection 'deadlock_con1'.
@@ -2505,17 +2505,17 @@ connection deadlock_con1;
 --echo # Since the latest RENAME TABLE entered in deadlock with SELECT
 --echo # statement the latter should be aborted and emit ER_LOCK_DEADLOCK
 --echo # error.
---echo # Reap SELECT * FROM t2.
+--echo # Reap SELECT * FROM t1.
 --error ER_LOCK_DEADLOCK
 --reap
 
 --echo #
 --echo # Again let us check that failure of the SELECT statement has not
---echo # released metadata lock on table 't1', i.e. that the latest RENAME
+--echo # released metadata lock on table 't2', i.e. that the latest RENAME
 --echo # is blocked.
 let $wait_condition=
   select count(*) = 1 from information_schema.processlist
-  where state = "Waiting for table" and info = "rename table t1 to t5, t2 to t1, t5 to t2";
+  where state = "Waiting for table" and info = "rename table t1 to t0, t2 to t1, t0 to t2";
 --source include/wait_condition.inc
 --echo # Commit transaction to unblock this RENAME TABLE.
 commit;
@@ -2523,19 +2523,7 @@ commit;
 --echo #
 --echo # Switching to connection 'deadlock_con2'.
 connection deadlock_con2;
---echo # Commit transaction to unblock the first RENAME TABLE.
-commit;
-
---echo #
---echo # Switching to connection 'default'.
-connection default;
---echo # Reap RENAME TABLE t2 TO t0 ... .
---reap
-
---echo #
---echo # Switching to connection 'deadlock_con3'.
-connection deadlock_con3;
---echo # Reap RENAME TABLE t1 TO t5 ... .
+--echo # Reap RENAME TABLE ... .
 --reap;
 
 --echo #

=== modified file 'sql/mdl.cc'
--- a/sql/mdl.cc	2010-01-28 09:35:54 +0000
+++ b/sql/mdl.cc	2010-01-28 12:08:24 +0000
@@ -50,6 +50,24 @@ private:
 };
 
 
+const static uint MDL_DEADLOCK_WEIGHT_DML= 0;
+const static uint MDL_DEADLOCK_WEIGHT_DDL= 100;
+const static uint MDL_DEADLOCK_MAX_SEARCH_DEPTH = 1000;
+
+
+class Deadlock_ctx
+{
+public:
+  Deadlock_ctx(MDL_context *start_arg)
+    : start(start_arg), max_search_depth(MDL_DEADLOCK_MAX_SEARCH_DEPTH),
+      victim(0)
+  { }
+  MDL_context *start;
+  uint max_search_depth;
+  MDL_context *victim;
+};
+
+
 /**
   Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps
   and compatibility matrices.
@@ -85,8 +103,13 @@ public:
   MDL_key key;
   void   *cached_object;
   mdl_cached_object_release_hook cached_object_release_hook;
-  /** Mutex protecting this lock context. */
-  pthread_mutex_t m_mutex;
+  /**
+    Read-write lock protecting this lock context.
+
+    TODO/FIXME: Replace with RW-lock which will prefer readers
+                on all platforms and not only on Linux.
+  */
+  rw_lock_t m_rwlock;
 
   bool is_empty() const
   {
@@ -112,8 +135,6 @@ public:
     Ticket_iterator it(m_granted);
     MDL_ticket *conflicting_ticket;
 
-    safe_mutex_assert_owner(&m_mutex);
-
     while ((conflicting_ticket= it++))
     {
       if (conflicting_ticket->get_ctx() != ctx)
@@ -133,12 +154,12 @@ public:
     MDL_lock::Ticket_iterator it(m_waiting);
     MDL_ticket *awake_ticket;
 
-    safe_mutex_assert_owner(&m_mutex);
-
     while ((awake_ticket= it++))
-      awake_ticket->get_ctx()->awake();
+      awake_ticket->get_ctx()->awake(MDL_context::NORMAL_WAKE_UP);
   }
 
+  bool find_deadlock(MDL_ticket *waiting_ticket, Deadlock_ctx *deadlock_ctx);
+
 private:
   /** List of granted tickets for this lock. */
   Ticket_list m_granted;
@@ -161,12 +182,12 @@ public:
     m_ref_release(0),
     m_is_destroyed(FALSE)
   {
-    pthread_mutex_init(&m_mutex, NULL);
+    my_rwlock_init(&m_rwlock, NULL);
   }
 
   virtual ~MDL_lock()
   {
-    pthread_mutex_destroy(&m_mutex);
+    rwlock_destroy(&m_rwlock);
   }
   inline static void destroy(MDL_lock *lock);
 public:
@@ -420,7 +441,7 @@ bool MDL_map::move_from_hash_to_lock_mut
   lock->m_ref_usage++;
   pthread_mutex_unlock(&m_mutex);
 
-  pthread_mutex_lock(&lock->m_mutex);
+  rw_wrlock(&lock->m_rwlock);
   lock->m_ref_release++;
   if (unlikely(lock->m_is_destroyed))
   {
@@ -435,7 +456,7 @@ bool MDL_map::move_from_hash_to_lock_mut
     */
     uint ref_usage= lock->m_ref_usage;
     uint ref_release= lock->m_ref_release;
-    pthread_mutex_unlock(&lock->m_mutex);
+    rw_unlock(&lock->m_rwlock);
     if (ref_usage == ref_release)
       MDL_lock::destroy(lock);
     return TRUE;
@@ -454,8 +475,6 @@ void MDL_map::remove(MDL_lock *lock)
 {
   uint ref_usage, ref_release;
 
-  safe_mutex_assert_owner(&lock->m_mutex);
-
   if (lock->cached_object)
     (*lock->cached_object_release_hook)(lock->cached_object);
 
@@ -480,7 +499,7 @@ void MDL_map::remove(MDL_lock *lock)
   lock->m_is_destroyed= TRUE;
   ref_usage= lock->m_ref_usage;
   ref_release= lock->m_ref_release;
-  pthread_mutex_unlock(&lock->m_mutex);
+  rw_unlock(&lock->m_rwlock);
   pthread_mutex_unlock(&m_mutex);
   if (ref_usage == ref_release)
     MDL_lock::destroy(lock);
@@ -495,9 +514,14 @@ void MDL_map::remove(MDL_lock *lock)
 
 MDL_context::MDL_context()
   :m_trans_sentinel(NULL),
-  m_thd(NULL)
-{
-  pthread_cond_init(&m_ctx_wakeup_cond, NULL);
+  m_thd(NULL),
+  m_waiting_for(NULL),
+  m_deadlock_weight(0),
+  m_signal(NO_WAKE_UP)
+{
+  my_rwlock_init(&m_waiting_for_lock, NULL);
+  pthread_mutex_init(&m_signal_lock, NULL);
+  pthread_cond_init(&m_signal_cond, NULL);
 }
 
 
@@ -516,7 +540,10 @@ MDL_context::MDL_context()
 void MDL_context::destroy()
 {
   DBUG_ASSERT(m_tickets.is_empty());
-  pthread_cond_destroy(&m_ctx_wakeup_cond);
+
+  rwlock_destroy(&m_waiting_for_lock);
+  pthread_mutex_destroy(&m_signal_lock);
+  pthread_cond_destroy(&m_signal_cond);
 }
 
 
@@ -711,6 +738,52 @@ static inline void mdl_exit_cond(THD *th
 }
 
 
+MDL_context::mdl_signal_type MDL_context::wait()
+{
+  const char *old_msg;
+  st_my_thread_var *mysys_var= my_thread_var;
+  mdl_signal_type result;
+
+  pthread_mutex_lock(&m_signal_lock);
+
+  old_msg= MDL_ENTER_COND(m_thd, mysys_var, &m_signal_cond, &m_signal_lock);
+
+  while (! m_signal && !mysys_var->abort)
+    pthread_cond_wait(&m_signal_cond, &m_signal_lock);
+
+  result= m_signal;
+
+  MDL_EXIT_COND(m_thd, mysys_var, &m_signal_lock, old_msg);
+
+  return result;
+}
+
+
+MDL_context::mdl_signal_type MDL_context::timed_wait(ulong timeout)
+{
+  struct timespec abstime;
+  const char *old_msg;
+  mdl_signal_type result;
+  st_my_thread_var *mysys_var= my_thread_var;
+
+  pthread_mutex_lock(&m_signal_lock);
+
+  old_msg= MDL_ENTER_COND(m_thd, mysys_var, &m_signal_cond, &m_signal_lock);
+
+  if (! m_signal)
+  {
+    set_timespec(abstime, timeout);
+    pthread_cond_timedwait(&m_signal_cond, &m_signal_lock, &abstime);
+  }
+
+  result= (m_signal != NO_WAKE_UP) ? m_signal : TIMEOUT_WAKE_UP;
+
+  MDL_EXIT_COND(m_thd, mysys_var, &m_signal_lock, old_msg);
+
+  return result;
+}
+
+
 /**
   Clear bit corresponding to the type of metadata lock in bitmap representing
   set of such types if list of tickets does not contain ticket with such type.
@@ -741,8 +814,6 @@ static void clear_bit_if_not_in_list(uch
 
 void MDL_lock::add_pending(MDL_ticket *ticket)
 {
-  safe_mutex_assert_owner(&m_mutex);
-
   m_waiting.push_front(ticket);
   m_waiting_bitmap|= (uchar) MDL_BIT(ticket->get_type());
 }
@@ -755,8 +826,6 @@ void MDL_lock::add_pending(MDL_ticket *t
 
 void MDL_lock::remove_pending(MDL_ticket *ticket)
 {
-  safe_mutex_assert_owner(&m_mutex);
-
   m_waiting.remove(ticket);
   /*
     Check if waiting queue has another ticket with the same type as
@@ -779,8 +848,6 @@ void MDL_lock::remove_pending(MDL_ticket
 
 void MDL_lock::add_granted(MDL_ticket *ticket)
 {
-  safe_mutex_assert_owner(&m_mutex);
-
   m_granted.push_front(ticket);
   m_granted_bitmap|= (uchar) MDL_BIT(ticket->get_type());
 }
@@ -793,8 +860,6 @@ void MDL_lock::add_granted(MDL_ticket *t
 
 void MDL_lock::remove_granted(MDL_ticket *ticket)
 {
-  safe_mutex_assert_owner(&m_mutex);
-
   m_granted.remove(ticket);
 
   /*
@@ -1023,13 +1088,27 @@ bool MDL_lock::has_pending_conflicting_l
 
   safe_mutex_assert_not_owner(&LOCK_open);
 
-  pthread_mutex_lock(&m_mutex);
+  rw_rdlock(&m_rwlock);
   result= (m_waiting_bitmap & get_incompatible_granted_types_bitmap()[type]);
-  pthread_mutex_unlock(&m_mutex);
+  rw_unlock(&m_rwlock);
   return result;
 }
 
 
+bool MDL_ticket::is_incompatible_when_granted(enum_mdl_type type) const
+{
+  return (MDL_BIT(m_type) &
+          m_lock->get_incompatible_granted_types_bitmap()[type]);
+}
+
+
+bool MDL_ticket::is_incompatible_when_waiting(enum_mdl_type type) const
+{
+  return (MDL_BIT(m_type) &
+          m_lock->get_incompatible_waiting_types_bitmap()[type]);
+}
+
+
 /**
   Acquire global intention exclusive lock.
 
@@ -1127,7 +1206,6 @@ MDL_context::acquire_lock(MDL_request *m
   MDL_ticket *ticket;
   MDL_key *key= &mdl_request->key;
   MDL_lock *lock;
-  const char *old_msg;
   st_my_thread_var *mysys_var= my_thread_var;
 
   DBUG_ASSERT(mdl_request->ticket == NULL);
@@ -1155,30 +1233,32 @@ MDL_context::acquire_lock(MDL_request *m
     return TRUE;
   }
 
-  old_msg= MDL_ENTER_COND(m_thd, mysys_var, &m_ctx_wakeup_cond,
-                          &lock->m_mutex);
+  ticket->m_lock= lock;
 
-  if (! lock->can_grant_lock(mdl_request->type, FALSE))
+  while (! lock->can_grant_lock(mdl_request->type, FALSE))
   {
     lock->add_pending(ticket);
 
-    do
-    {
-      pthread_cond_wait(&m_ctx_wakeup_cond, &lock->m_mutex);
-    }
-    while (! lock->can_grant_lock(mdl_request->type, FALSE) &&
-           ! mysys_var->abort);
+    wait_reset();
 
-    if (mysys_var->abort)
-    {
-      /*
-        We have to do MDL_EXIT_COND here and then re-acquire the lock
-        as there is a chance that we will destroy MDL_lock object and
-        won't be able to call MDL_EXIT_COND after it.
-      */
-      MDL_EXIT_COND(m_thd, mysys_var, &lock->m_mutex, old_msg);
+    rw_unlock(&lock->m_rwlock);
 
-      pthread_mutex_lock(&lock->m_mutex);
+    /*
+      Even though deadlocks should not be possible with current usage
+      of this method let us play safe. Check for deadlock presence
+      and react properly if one is detected.
+    */
+    set_deadlock_weight(MDL_DEADLOCK_WEIGHT_DDL);
+    will_wait_for(ticket);
+
+    bool deadlock= (find_deadlock() || wait() == VICTIM_WAKE_UP);
+
+    stop_waiting();
+
+    rw_wrlock(&lock->m_rwlock);
+
+    if (deadlock || mysys_var->abort)
+    {
       /* Get rid of pending ticket. */
       lock->remove_pending(ticket);
       if (lock->is_empty())
@@ -1186,18 +1266,18 @@ MDL_context::acquire_lock(MDL_request *m
       else
       {
         lock->wake_up_waiters();
-        pthread_mutex_unlock(&lock->m_mutex);
+        rw_unlock(&lock->m_rwlock);
       }
       MDL_ticket::destroy(ticket);
+      if (deadlock)
+        my_error(ER_LOCK_DEADLOCK, MYF(0));
       return TRUE;
     }
     lock->remove_pending(ticket);
   }
 
   lock->add_granted(ticket);
-  MDL_EXIT_COND(m_thd, mysys_var, &lock->m_mutex, old_msg);
-
-  ticket->m_lock= lock;
+  rw_unlock(&lock->m_rwlock);
 
   m_tickets.push_front(ticket);
 
@@ -1300,7 +1380,7 @@ MDL_context::try_acquire_lock(MDL_reques
   if (lock->can_grant_lock(mdl_request->type, FALSE))
   {
     lock->add_granted(ticket);
-    pthread_mutex_unlock(&lock->m_mutex);
+    rw_unlock(&lock->m_rwlock);
 
     ticket->m_lock= lock;
 
@@ -1312,7 +1392,7 @@ MDL_context::try_acquire_lock(MDL_reques
   {
     /* We can't get here if we allocated a new lock. */
     DBUG_ASSERT(! lock->is_empty());
-    pthread_mutex_unlock(&lock->m_mutex);
+    rw_unlock(&lock->m_rwlock);
     MDL_ticket::destroy(ticket);
   }
 
@@ -1353,9 +1433,9 @@ MDL_context::clone_ticket(MDL_request *m
   ticket->m_lock= mdl_request->ticket->m_lock;
   mdl_request->ticket= ticket;
 
-  pthread_mutex_lock(&ticket->m_lock->m_mutex);
+  rw_wrlock(&ticket->m_lock->m_rwlock);
   ticket->m_lock->add_granted(ticket);
-  pthread_mutex_unlock(&ticket->m_lock->m_mutex);
+  rw_unlock(&ticket->m_lock->m_rwlock);
 
   m_tickets.push_front(ticket);
 
@@ -1381,13 +1461,9 @@ void notify_shared_lock(THD *thd, MDL_ti
     DBUG_ASSERT(thd != conflicting_thd); /* Self-deadlock */
 
     /*
-      If the thread that holds the conflicting lock is waiting in MDL
-      subsystem it has to be woken up by calling MDL_context::awake().
-    */
-    conflicting_ctx->awake();
-    /*
-      If it is waiting on table-level lock or some other non-MDL resource
-      we delegate its waking up to code outside of MDL.
+      If thread which holds conflicting lock is waiting on table-level
+      lock or some other non-MDL resource we might need to wake it up
+      by calling code outside of MDL.
     */
     mysql_notify_thread_having_shared_lock(thd, conflicting_thd,
                                conflicting_ticket->get_needs_thr_lock_abort());
@@ -1415,7 +1491,6 @@ bool MDL_context::acquire_exclusive_lock
                                               bool is_upgrade)
 {
   MDL_lock *lock;
-  const char *old_msg;
   MDL_ticket *ticket;
   bool not_used;
   st_my_thread_var *mysys_var= my_thread_var;
@@ -1456,10 +1531,9 @@ bool MDL_context::acquire_exclusive_lock
     return TRUE;
   }
 
-  lock->add_pending(ticket);
+  ticket->m_lock= lock;
 
-  old_msg= MDL_ENTER_COND(m_thd, mysys_var, &m_ctx_wakeup_cond,
-                          &lock->m_mutex);
+  lock->add_pending(ticket);
 
   while (!lock->can_grant_lock(mdl_request->type, is_upgrade))
   {
@@ -1469,14 +1543,7 @@ bool MDL_context::acquire_exclusive_lock
         We're about to start waiting. Don't do it if we have
         HANDLER locks (we can't have any other locks here).
         Waiting with locks may lead to a deadlock.
-
-        We have to do MDL_EXIT_COND here and then re-acquire the
-        lock as there is a chance that we will destroy MDL_lock
-        object and won't be able to call MDL_EXIT_COND after it.
       */
-      MDL_EXIT_COND(m_thd, mysys_var, &lock->m_mutex, old_msg);
-
-      pthread_mutex_lock(&lock->m_mutex);
       /* Get rid of pending ticket. */
       lock->remove_pending(ticket);
       if (lock->is_empty())
@@ -1488,46 +1555,32 @@ bool MDL_context::acquire_exclusive_lock
           lock which now might be able to do it. Wake them up!
         */
         lock->wake_up_waiters();
-        pthread_mutex_unlock(&lock->m_mutex);
+        rw_unlock(&lock->m_rwlock);
       }
       MDL_ticket::destroy(ticket);
       my_error(ER_LOCK_DEADLOCK, MYF(0));
       return TRUE;
     }
+    wait_reset();
 
     lock->notify_shared_locks(this);
+    rw_unlock(&lock->m_rwlock);
+
+    set_deadlock_weight(MDL_DEADLOCK_WEIGHT_DDL);
+    will_wait_for(ticket);
 
     /* There is a shared or exclusive lock on the object. */
     DEBUG_SYNC(m_thd, "mdl_acquire_exclusive_locks_wait");
 
-    /*
-      Another thread might have obtained a shared MDL lock on some table
-      but has not yet opened it and/or tried to obtain data lock on it.
-      Also invocation of acquire_exclusive_lock() method and consequently
-      first call to notify_shared_lock() might have happened right after
-      thread holding shared metadata lock in wait_for_lock() method
-      checked that there are no pending conflicting locks but before
-      it has started waiting.
-      In both these cases we need to sleep until these threads will start
-      waiting and try to abort them once again.
+    bool deadlock= (find_deadlock() || timed_wait(1) == VICTIM_WAKE_UP);
 
-      QQ: What is the optimal value for this sleep?
-    */
-    struct timespec abstime;
-    set_timespec(abstime, 1);
-    pthread_cond_timedwait(&m_ctx_wakeup_cond, &lock->m_mutex, &abstime);
+    stop_waiting();
 
-    if (mysys_var->abort)
-    {
-      /*
-        We have to do MDL_EXIT_COND here and then re-acquire the lock
-        as there is a chance that we will destroy MDL_lock object and
-        won't be able to call MDL_EXIT_COND after it.
-      */
-      MDL_EXIT_COND(m_thd, mysys_var, &lock->m_mutex, old_msg);
+    rw_wrlock(&lock->m_rwlock);
 
-      pthread_mutex_lock(&lock->m_mutex);
-      /* Get rid of the pending ticket. */
+    if (deadlock || mysys_var->abort)
+    {
+      /* Get rid of pending ticket. */
       lock->remove_pending(ticket);
       if (lock->is_empty())
         mdl_locks.remove(lock);
@@ -1538,9 +1591,11 @@ bool MDL_context::acquire_exclusive_lock
           lock which now might be able to do it. Wake them up!
         */
         lock->wake_up_waiters();
-        pthread_mutex_unlock(&lock->m_mutex);
+        rw_unlock(&lock->m_rwlock);
       }
       MDL_ticket::destroy(ticket);
+      if (deadlock)
+        my_error(ER_LOCK_DEADLOCK, MYF(0));
       return TRUE;
     }
   }
@@ -1552,9 +1607,7 @@ bool MDL_context::acquire_exclusive_lock
     (*lock->cached_object_release_hook)(lock->cached_object);
   lock->cached_object= NULL;
 
-  MDL_EXIT_COND(m_thd, mysys_var, &lock->m_mutex, old_msg);
-
-  ticket->m_lock= lock;
+  rw_unlock(&lock->m_rwlock);
 
   m_tickets.push_front(ticket);
 
@@ -1709,7 +1762,7 @@ MDL_context::upgrade_shared_lock_to_excl
   is_new_ticket= ! has_lock(mdl_svp, mdl_xlock_request.ticket);
 
   /* Merge the acquired and the original lock. @todo: move to a method. */
-  pthread_mutex_lock(&mdl_ticket->m_lock->m_mutex);
+  rw_wrlock(&mdl_ticket->m_lock->m_rwlock);
   if (is_new_ticket)
     mdl_ticket->m_lock->remove_granted(mdl_xlock_request.ticket);
   /*
@@ -1721,7 +1774,7 @@ MDL_context::upgrade_shared_lock_to_excl
   mdl_ticket->m_type= MDL_EXCLUSIVE;
   mdl_ticket->m_lock->add_granted(mdl_ticket);
 
-  pthread_mutex_unlock(&mdl_ticket->m_lock->m_mutex);
+  rw_unlock(&mdl_ticket->m_lock->m_rwlock);
 
   if (is_new_ticket)
   {
@@ -1733,35 +1786,144 @@ MDL_context::upgrade_shared_lock_to_excl
 }
 
 
-/**
-  Implement a simple deadlock detection heuristic: check if there
-  are any pending exclusive locks which conflict with shared locks
-  held by this thread. In that case waiting can be circular,
-  i.e. lead to a deadlock.
+bool MDL_lock::find_deadlock(MDL_ticket *waiting_ticket,
+                             Deadlock_ctx *deadlock_ctx)
+{
+  MDL_ticket *ticket;
+  bool result= FALSE;
+
+  rw_rdlock(&m_rwlock);
+
+  Ticket_iterator granted_it(m_granted);
+  Ticket_iterator waiting_it(m_waiting);
+
+  while ((ticket= granted_it++))
+  {
+    if (ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
+        ticket->get_ctx() != waiting_ticket->get_ctx() &&
+        ticket->get_ctx() == deadlock_ctx->start)
+    {
+      result= TRUE;
+      goto end;
+    }
+  }
+
+  while ((ticket= waiting_it++))
+  {
+    if (ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
+        ticket->get_ctx() != waiting_ticket->get_ctx() &&
+        ticket->get_ctx() == deadlock_ctx->start)
+    {
+      result= TRUE;
+      goto end;
+    }
+  }
+
+  granted_it.rewind();
+  while ((ticket= granted_it++))
+  {
+    if (ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
+        ticket->get_ctx() != waiting_ticket->get_ctx() &&
+        ticket->get_ctx()->find_deadlock(deadlock_ctx))
+    {
+      result= TRUE;
+      goto end;
+    }
+  }
+
+  waiting_it.rewind();
+  while ((ticket= waiting_it++))
+  {
+    if (ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
+        ticket->get_ctx() != waiting_ticket->get_ctx() &&
+        ticket->get_ctx()->find_deadlock(deadlock_ctx))
+    {
+      result= TRUE;
+      goto end;
+    }
+  }
+
+end:
+  rw_unlock(&m_rwlock);
+  return result;
+}
 
-  @return TRUE   If there are any pending conflicting locks.
-          FALSE  Otherwise.
-*/
 
-bool MDL_context::can_wait_lead_to_deadlock() const
+bool MDL_context::find_deadlock(Deadlock_ctx *deadlock_ctx)
 {
-  Ticket_iterator ticket_it(m_tickets);
-  MDL_ticket *ticket;
+  bool result= FALSE;
 
-  while ((ticket= ticket_it++))
+  rw_rdlock(&m_waiting_for_lock);
+
+  if (m_waiting_for)
   {
     /*
-      In MySQL we never call this method while holding exclusive or
-      upgradeable shared metadata locks.
-      Otherwise we would also have to check for the presence of pending
-      requests for conflicting types of global lock.
-      In addition MDL_ticket::has_pending_conflicting_lock()
-      won't work properly for exclusive type of lock.
+      QQ: should we rather be checking for NO_WAKE_UP ?
+
+      We want to do check signal only when m_waiting_for is set
+      to avoid reading left-overs from previous kills.
     */
-    DBUG_ASSERT(! ticket->is_upgradable_or_exclusive());
+    if (peek_signal() != VICTIM_WAKE_UP)
+    {
+
+      if (! --deadlock_ctx->max_search_depth)
+        result= TRUE;
+      else
+        result= m_waiting_for->m_lock->find_deadlock(m_waiting_for, deadlock_ctx);
+      ++deadlock_ctx->max_search_depth;
+    }
+  }
+
+  if (result)
+  {
+    if (! deadlock_ctx->victim)
+      deadlock_ctx->victim= this;
+    else if (deadlock_ctx->victim->m_deadlock_weight > m_deadlock_weight)
+    {
+      rw_unlock(&deadlock_ctx->victim->m_waiting_for_lock);
+      deadlock_ctx->victim= this;
+    }
+    else
+      rw_unlock(&m_waiting_for_lock);
+  }
+  else
+    rw_unlock(&m_waiting_for_lock);
+
+  return result;
+}
+
+
+bool MDL_context::find_deadlock()
+{
+  Deadlock_ctx deadlock_ctx(this);
 
-    if (ticket->has_pending_conflicting_lock())
+  while (1)
+  {
+    if (! find_deadlock(&deadlock_ctx))
+    {
+      /* No deadlocks are found! */
+      break;
+    }
+
+    if (deadlock_ctx.victim != this)
+    {
+      deadlock_ctx.victim->awake(VICTIM_WAKE_UP);
+      rw_unlock(&deadlock_ctx.victim->m_waiting_for_lock);
+      /*
+        After adding new arc to waiting graph we found that it participates
+        in some loop (i.e. there is a deadlock). We decided to destroy this
+        loop by removing some arc other than newly added. Since this doesn't
+        guarantee that all loops created by addition of this arc are
+        destroyed we have to repeat search.
+      */
+      continue;
+    }
+    else
+    {
+      DBUG_ASSERT(&deadlock_ctx.victim->m_waiting_for_lock == &m_waiting_for_lock);
+      rw_unlock(&deadlock_ctx.victim->m_waiting_for_lock);
       return TRUE;
+    }
   }
   return FALSE;
 }
@@ -1784,7 +1946,6 @@ bool
 MDL_context::wait_for_lock(MDL_request *mdl_request)
 {
   MDL_lock *lock;
-  const char *old_msg;
   st_my_thread_var *mysys_var= my_thread_var;
 
   safe_mutex_assert_not_owner(&LOCK_open);
@@ -1805,24 +1966,6 @@ MDL_context::wait_for_lock(MDL_request *
     */
     mysql_ha_flush(m_thd);
 
-    /*
-      In cases when we wait while still holding some metadata
-      locks deadlocks are possible.
-      To avoid them we use the following simple empiric - don't
-      wait for new lock request to be satisfied if for one of the
-      locks which are already held by this connection there is
-      a conflicting request (i.e. this connection should not wait
-      if someone waits for it).
-      This empiric should work well (e.g. give low number of false
-      negatives) in situations when conflicts are rare (in our
-      case this is true since DDL statements should be rare).
-    */
-    if (can_wait_lead_to_deadlock())
-    {
-      my_error(ER_LOCK_DEADLOCK, MYF(0));
-      return TRUE;
-    }
-
     MDL_key *key= &mdl_request->key;
 
     /* The below call implicitly locks MDL_lock::m_mutex on success. */
@@ -1831,37 +1974,43 @@ MDL_context::wait_for_lock(MDL_request *
 
     if (lock->can_grant_lock(mdl_request->type, FALSE))
     {
-      pthread_mutex_unlock(&lock->m_mutex);
+      rw_unlock(&lock->m_rwlock);
       return FALSE;
     }
 
     MDL_ticket *pending_ticket;
     if (! (pending_ticket= MDL_ticket::create(this, mdl_request->type)))
     {
-      pthread_mutex_unlock(&lock->m_mutex);
+      rw_unlock(&lock->m_rwlock);
       return TRUE;
     }
+
+    pending_ticket->m_lock= lock;
+
     lock->add_pending(pending_ticket);
 
-    old_msg= MDL_ENTER_COND(m_thd, mysys_var, &m_ctx_wakeup_cond,
-                            &lock->m_mutex);
+    wait_reset();
+    rw_unlock(&lock->m_rwlock);
 
-    pthread_cond_wait(&m_ctx_wakeup_cond, &lock->m_mutex);
+    set_deadlock_weight(MDL_DEADLOCK_WEIGHT_DML);
+    will_wait_for(pending_ticket);
 
-    /*
-      We have to do MDL_EXIT_COND here and then re-acquire the lock
-      as there is a chance that we will destroy MDL_lock object and
-      won't be able to call MDL_EXIT_COND after it.
-    */
-    MDL_EXIT_COND(m_thd, mysys_var, &lock->m_mutex, old_msg);
+    bool deadlock= (find_deadlock() || wait() == VICTIM_WAKE_UP);
 
-    pthread_mutex_lock(&lock->m_mutex);
+    stop_waiting();
+
+    rw_wrlock(&lock->m_rwlock);
     lock->remove_pending(pending_ticket);
     if (lock->is_empty())
       mdl_locks.remove(lock);
     else
-      pthread_mutex_unlock(&lock->m_mutex);
+      rw_unlock(&lock->m_rwlock);
     MDL_ticket::destroy(pending_ticket);
+    if (deadlock)
+    {
+      my_error(ER_LOCK_DEADLOCK, MYF(0));
+      return TRUE;
+    }
   }
   return mysys_var->abort;
 }
@@ -1886,7 +2035,7 @@ void MDL_context::release_lock(MDL_ticke
   if (ticket == m_trans_sentinel)
     m_trans_sentinel= ++Ticket_list::Iterator(m_tickets, ticket);
 
-  pthread_mutex_lock(&lock->m_mutex);
+  rw_wrlock(&lock->m_rwlock);
 
   lock->remove_granted(ticket);
 
@@ -1895,7 +2044,7 @@ void MDL_context::release_lock(MDL_ticke
   else
   {
     lock->wake_up_waiters();
-    pthread_mutex_unlock(&lock->m_mutex);
+    rw_unlock(&lock->m_rwlock);
   }
 
   m_tickets.remove(ticket);
@@ -1992,7 +2141,7 @@ void MDL_ticket::downgrade_exclusive_loc
   if (m_type != MDL_EXCLUSIVE)
     return;
 
-  pthread_mutex_lock(&m_lock->m_mutex);
+  rw_wrlock(&m_lock->m_rwlock);
   /*
     SNRW type of lock is used here since downgrade of metadata locks
     happens in most cases (QQ) under LOCK TABLES.
@@ -2006,7 +2155,7 @@ void MDL_ticket::downgrade_exclusive_loc
   m_type= MDL_SHARED_NO_READ_WRITE;
   m_lock->add_granted(this);
   m_lock->wake_up_waiters();
-  pthread_mutex_unlock(&m_lock->m_mutex);
+  rw_unlock(&m_lock->m_rwlock);
 }
 
 

=== modified file 'sql/mdl.h'
--- a/sql/mdl.h	2010-01-28 09:35:54 +0000
+++ b/sql/mdl.h	2010-01-28 12:08:24 +0000
@@ -26,6 +26,7 @@ class THD;
 class MDL_context;
 class MDL_lock;
 class MDL_ticket;
+class Deadlock_ctx;
 
 /**
   Type of metadata lock request.
@@ -413,6 +414,10 @@ public:
   {
     return m_needs_thr_lock_abort;
   }
+
+  bool is_incompatible_when_granted(enum_mdl_type type) const;
+  bool is_incompatible_when_waiting(enum_mdl_type type) const;
+
 private:
   friend class MDL_context;
 
@@ -473,6 +478,11 @@ public:
 
   typedef Ticket_list::Iterator Ticket_iterator;
 
+  enum mdl_signal_type { NO_WAKE_UP = 0,
+                         NORMAL_WAKE_UP,
+                         VICTIM_WAKE_UP,
+                         TIMEOUT_WAKE_UP };
+
   MDL_context();
   void destroy();
 
@@ -525,8 +535,6 @@ public:
   void release_transactional_locks();
   void rollback_to_savepoint(MDL_ticket *mdl_savepoint);
 
-  bool can_wait_lead_to_deadlock() const;
-
   inline THD *get_thd() const { return m_thd; }
 
   bool acquire_global_intention_exclusive_lock(MDL_request *mdl_request);
@@ -534,12 +542,19 @@ public:
   /**
     Wake up context which is waiting for a change of MDL_lock state.
   */
-  void awake()
+  void awake(mdl_signal_type signal)
   {
-    pthread_cond_signal(&m_ctx_wakeup_cond);
+    pthread_mutex_lock(&m_signal_lock);
+    m_signal= signal;
+    pthread_cond_signal(&m_signal_cond);
+    pthread_mutex_unlock(&m_signal_lock);
   }
 
   void init(THD *thd_arg) { m_thd= thd_arg; }
+
+  bool find_deadlock();
+  bool find_deadlock(Deadlock_ctx *deadlock_ctx);
+
 private:
   /**
     All MDL tickets acquired by this connection.
@@ -603,6 +618,16 @@ private:
   */
   MDL_ticket *m_trans_sentinel;
   THD *m_thd;
+
+  /**
+    Read-write lock protecting m_waiting_for member.
+
+    TODO/FIXME: Replace with RW-lock which will prefer readers
+                on all platforms and not only on Linux.
+  */
+  rw_lock_t m_waiting_for_lock;
+  MDL_ticket *m_waiting_for;
+  uint m_deadlock_weight;
   /**
     Condvar which is used for waiting until this context's pending
     request can be satisfied or this thread has to perform actions
@@ -610,13 +635,60 @@ private:
     notification by adding a ticket corresponding to the request
     to an appropriate queue of waiters).
   */
-  pthread_cond_t m_ctx_wakeup_cond;
+  pthread_mutex_t m_signal_lock;
+  pthread_cond_t m_signal_cond;
+  mdl_signal_type m_signal;
 
 private:
   MDL_ticket *find_ticket(MDL_request *mdl_req,
                           bool *is_transactional);
   void release_locks_stored_before(MDL_ticket *sentinel);
   bool acquire_exclusive_lock_impl(MDL_request *mdl_request, bool is_upgrade);
+
+  void will_wait_for(MDL_ticket *pending_ticket)
+  {
+    rw_wrlock(&m_waiting_for_lock);
+    m_waiting_for= pending_ticket;
+    rw_unlock(&m_waiting_for_lock);
+  }
+
+  void set_deadlock_weight(uint weight)
+  {
+    /*
+      m_deadlock_weight should not be modified while m_waiting_for is
+      non-NULL as in this case this context might participate in deadlock
+      and so m_deadlock_weight can be accessed from other threads.
+    */
+    DBUG_ASSERT(m_waiting_for == NULL);
+    m_deadlock_weight= weight;
+  }
+
+  void stop_waiting()
+  {
+    rw_wrlock(&m_waiting_for_lock);
+    m_waiting_for= NULL;
+    rw_unlock(&m_waiting_for_lock);
+  }
+
+  void wait_reset()
+  {
+    pthread_mutex_lock(&m_signal_lock);
+    m_signal= NO_WAKE_UP;
+    pthread_mutex_unlock(&m_signal_lock);
+  }
+
+  mdl_signal_type wait();
+  mdl_signal_type timed_wait(ulong timeout);
+
+  mdl_signal_type peek_signal()
+  {
+    mdl_signal_type result;
+    pthread_mutex_lock(&m_signal_lock);
+    result= m_signal;
+    pthread_mutex_unlock(&m_signal_lock);
+    return result;
+  }
+
 private:
   MDL_context(const MDL_context &rhs);          /* not implemented */
   MDL_context &operator=(MDL_context &rhs);     /* not implemented */

=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc	2010-01-27 21:44:28 +0000
+++ b/sql/sql_base.cc	2010-01-28 12:08:24 +0000
@@ -8682,24 +8682,6 @@ tdc_wait_for_old_versions(THD *thd, MDL_
     */
     mysql_ha_flush(thd);
 
-    /*
-      Check if there is someone waiting for one of metadata locks
-      held by this connection and return an error if that's the
-      case, since this situation may lead to a deadlock.
-      This can happen, when, for example, this connection is
-      waiting for an old version of some table to go away and
-      another connection is trying to upgrade its shared
-      metadata lock to exclusive, and thus is waiting
-      for this to release its lock. We must check for
-      the condition on each iteration of the loop to remove
-      any window for a race.
-    */
-    if (thd->mdl_context.can_wait_lead_to_deadlock())
-    {
-      my_error(ER_LOCK_DEADLOCK, MYF(0));
-      return TRUE;
-    }
-
     pthread_mutex_lock(&LOCK_open);
 
     MDL_request_list::Iterator it(*mdl_requests);


Attachment: [text/bzr-bundle] bzr/dlenev@mysql.com-20100128120824-vlwx6l1vvyorxbeo.bundle
Thread
bzr commit into mysql-5.6-next-mr branch (dlenev:3069) Bug#46272Dmitry Lenev28 Jan