MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Dmitry Lenev Date:February 10 2010 3:46pm
Subject:bzr commit into mysql-5.5-next-mr branch (dlenev:3088) Bug#50998
View as plain text  
#At file:///home/dlenev/src/bzr/mysql-next-4284-bg50998/ based on revid:alik@stripped

 3088 Dmitry Lenev	2010-02-10
      Fix for bug #50998 "Deadlock in MDL code during test 
      rqg_mdl_stability".
      
      When start of statement's waiting on a metadata lock 
      created more than one loop in waiters graph server might 
      have entered deadlock condition.
      
      The problem was that in the case described above MDL deadlock 
      detector had to perform several searches for deadlock but
      forgot to reset Deadlock_detection_context before performing 
      new search. 
      Failure to do so has broken assumption in code resposible for 
      choosing victim that if Deadlock_detection_context::victim
      is set we also have read lock on m_waiting_for_lock for this
      context. As result this lock could have been unlocked more
      times than it was acquired which corrupted rwlock's state
      which led to server deadlock.
      
      This fix ensures that such reset is done before each attempt
      to find a deadlock.
     @ mysql-test/r/mdl_sync.result
        Added test for bug #50998 "Deadlock in MDL code during test
        rqg_mdl_stability" as well as coverage for the case when
        addition of statement waiting for metadata lock adds several
        loops in the waiters graph and therefore several searches
        for deadlock should be performed by MDL deadlock detector.
     @ mysql-test/t/mdl_sync.test
        Added test for bug #50998 "Deadlock in MDL code during test
        rqg_mdl_stability" as well as coverage for the case when
        addition of statement waiting for metadata lock adds several
        loops in the waiters graph and therefore several searches
        for deadlock should be performed by MDL deadlock detector.
     @ sql/mdl.cc
        Ensure that in cases when MDL deadlock detector had to
        perform several searches for deadlock because several loops
        in waiters graph are possible we reset
        Deadlock_detection_context before performing each search.
        Failure to do so has broken assumption in code resposible
        for choosing victim that if Deadlock_detection_context::victim
        is set we also have read lock on m_waiting_for_lock for this
        context. As result this lock could have been unlocked more
        times than it was acquired which corrupted rwlock's state
        (no one was able to acquire write lock on it anymore).

    modified:
      mysql-test/r/mdl_sync.result
      mysql-test/t/mdl_sync.test
      sql/mdl.cc
=== modified file 'mysql-test/r/mdl_sync.result'
--- a/mysql-test/r/mdl_sync.result	2010-02-08 20:19:55 +0000
+++ b/mysql-test/r/mdl_sync.result	2010-02-10 15:46:03 +0000
@@ -2322,3 +2322,56 @@ set debug_sync= 'now SIGNAL go';
 # Reaping TRUNCATE TABLE.
 set debug_sync= 'RESET';
 drop table t1;
+#
+# Test for bug #50998 "Deadlock in MDL code during test
+#                      rqg_mdl_stability".
+# Also provides coverage for the case when addition of
+# waiting statement adds several loops in the waiters
+# graph and therefore several searches for deadlock
+# should be performed.
+drop table if exists t1;
+set debug_sync= 'RESET';
+create table t1 (i int);
+# Switching to connection 'con1'.
+begin;
+select * from t1;
+i
+# Switching to connection 'con2'.
+begin;
+select * from t1;
+i
+# Switching to connection 'default'.
+# Start ALTER TABLE which will acquire SNW lock and
+# table lock and get blocked on sync point.
+set debug_sync= 'thr_multi_lock_after_thr_lock SIGNAL parked WAIT_FOR go';
+# Sending:
+alter table t1 add column j int;
+# Switching to connection 'con1'.
+# Wait until ALTER TABLE gets blocked on a sync point.
+set debug_sync= 'now WAIT_FOR parked';
+# Sending:
+insert into t1 values (1);
+# Switching to connection 'con2'.
+# Sending:
+insert into t1 values (1);
+# Switching to connection 'con3'.
+# Wait until both 'con1' and 'con2' are blocked trying to acquire
+# SW lock on the table.
+# Unblock ALTER TABLE. Since it will try to upgrade SNW to X lock
+# deadlock with two loops in waiting graph will occur. Both loops
+# should be found and DML statements in both 'con1' and 'con2'
+# should be aborted with ER_LOCK_DEADLOCK errors.
+set debug_sync= 'now SIGNAL go';
+# Switching to connection 'con1'.
+# Reaping INSERT. It should end with ER_LOCK_DEADLOCK error and
+# not wait indefinitely (as it happened before the bugfix).
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+commit;
+# Switching to connection 'con2'.
+# Reaping INSERT.
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+commit;
+# Switching to connection 'default'.
+# Reap ALTER TABLE.
+set debug_sync= 'RESET';
+drop table t1;

=== modified file 'mysql-test/t/mdl_sync.test'
--- a/mysql-test/t/mdl_sync.test	2010-02-08 20:19:55 +0000
+++ b/mysql-test/t/mdl_sync.test	2010-02-10 15:46:03 +0000
@@ -3374,6 +3374,95 @@ set debug_sync= 'RESET';
 drop table t1;
 
 
+--echo #
+--echo # Test for bug #50998 "Deadlock in MDL code during test
+--echo #                      rqg_mdl_stability".
+--echo # Also provides coverage for the case when addition of
+--echo # waiting statement adds several loops in the waiters
+--echo # graph and therefore several searches for deadlock
+--echo # should be performed.
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+set debug_sync= 'RESET';
+connect (con1,localhost,root);
+connect (con2,localhost,root);
+connect (con3,localhost,root);
+connection default;
+create table t1 (i int);
+
+--echo # Switching to connection 'con1'.
+connection con1;
+begin;
+select * from t1;
+
+--echo # Switching to connection 'con2'.
+connection con2;
+begin;
+select * from t1;
+
+--echo # Switching to connection 'default'.
+connection default;
+--echo # Start ALTER TABLE which will acquire SNW lock and
+--echo # table lock and get blocked on sync point.
+set debug_sync= 'thr_multi_lock_after_thr_lock SIGNAL parked WAIT_FOR go';
+--echo # Sending:
+--send alter table t1 add column j int
+
+--echo # Switching to connection 'con1'.
+connection con1;
+--echo # Wait until ALTER TABLE gets blocked on a sync point.
+set debug_sync= 'now WAIT_FOR parked';
+--echo # Sending:
+--send insert into t1 values (1)
+
+--echo # Switching to connection 'con2'.
+connection con2;
+--echo # Sending:
+--send insert into t1 values (1)
+
+--echo # Switching to connection 'con3'.
+connection con3;
+--echo # Wait until both 'con1' and 'con2' are blocked trying to acquire
+--echo # SW lock on the table.
+let $wait_condition=
+  select count(*) = 2 from information_schema.processlist
+  where state = "Waiting for table" and info = "insert into t1 values (1)";
+--source include/wait_condition.inc
+--echo # Unblock ALTER TABLE. Since it will try to upgrade SNW to X lock
+--echo # deadlock with two loops in waiting graph will occur. Both loops
+--echo # should be found and DML statements in both 'con1' and 'con2'
+--echo # should be aborted with ER_LOCK_DEADLOCK errors.
+set debug_sync= 'now SIGNAL go';
+
+--echo # Switching to connection 'con1'.
+connection con1;
+--echo # Reaping INSERT. It should end with ER_LOCK_DEADLOCK error and
+--echo # not wait indefinitely (as it happened before the bugfix).
+--error ER_LOCK_DEADLOCK
+--reap
+commit;
+
+--echo # Switching to connection 'con2'.
+connection con2;
+--echo # Reaping INSERT.
+--error ER_LOCK_DEADLOCK
+--reap
+commit;
+
+--echo # Switching to connection 'default'.
+connection default;
+--echo # Reap ALTER TABLE.
+--reap
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+connection default;
+set debug_sync= 'RESET';
+drop table t1;
+
+
 # Check that all connections opened by test cases in this file are really
 # gone so execution of other tests won't be affected by their presence.
 --source include/wait_until_count_sessions.inc

=== modified file 'sql/mdl.cc'
--- a/sql/mdl.cc	2010-02-06 09:44:03 +0000
+++ b/sql/mdl.cc	2010-02-10 15:46:03 +0000
@@ -1755,10 +1755,15 @@ bool MDL_context::find_deadlock(Deadlock
 
 bool MDL_context::find_deadlock()
 {
-  Deadlock_detection_context deadlock_ctx(this);
-
   while (1)
   {
+    /*
+      The fact that we use fresh instance of deadlock_ctx for each
+      search performed by find_deadlock() below is important, code
+      responsible for victim selection relies on this.
+    */
+    Deadlock_detection_context deadlock_ctx(this);
+
     if (! find_deadlock(&deadlock_ctx))
     {
       /* No deadlocks are found! */


Attachment: [text/bzr-bundle] bzr/dlenev@mysql.com-20100210154603-9hux05vnrgxonb9t.bundle
Thread
bzr commit into mysql-5.5-next-mr branch (dlenev:3088) Bug#50998Dmitry Lenev10 Feb