#At file:///export/home/z/mysql-6.0-codebase-bugfixing-bug48541/ based on revid:alik@stripped
3728 Jon Olav Hauglid 2009-11-24
Bug #48541 Server-wide deadlock involving LOCK_thread_count
The bug title is misleading. This is a deadlock between LOCK_open and LOCK_mdl.
The reason for the deadlock was an improper exit from
MDL_context::wait_for_locks() which caused mysys_var->current_mutex to remain
LOCK_mdl even though LOCK_mdl was no longer held by that connection.
This could lead to a deadlock in the following way:
1) INSERT DELAYED tries to open a table but fails, and trying to recover it
calls wait_for_locks().
2) Due to a pending exclusive request, wait_for_locks() fails and exits without
resetting mysys_var->current_mutex for the delayed insert handler thread. So it
continues to point to LOCK_mdl.
3) The handler thread manages to open a table.
4) A different connection takes LOCK_open and tries to take LOCK_mdl.
5) FLUSH TABLES from a third connection notices that the handler thread has a
table open, and tries to kill it. This involves locking mysys_var->current_mutex
while having LOCK_open locked. Since current_mutex mistakenly points to LOCK_mdl,
we have a deadlock.
This patch makes sure MDL_EXIT_COND() is called before exiting wait_for_locks().
This clears mysys->current_mutex which resolves the issue. The patch also removes
the unnecessary locking of mysys_var->current_mutex in kill_delayed_threads() and
kill_delayed_threads_for_table().
An assert is added to recover_from_failed_open_table_attempt() after
wait_for_locks() is called, to check that current_mutex is indeed reset.
With this assert in place, existing tests in (e.g.) mdl_sync.test will fail
without this patch.
modified:
sql/mdl.cc
sql/sql_base.cc
sql/sql_insert.cc
=== modified file 'sql/mdl.cc'
--- a/sql/mdl.cc 2009-10-25 13:41:27 +0000
+++ b/sql/mdl.cc 2009-11-24 12:22:57 +0000
@@ -1255,7 +1255,8 @@ MDL_context::wait_for_locks(MDL_request_
}
if (!mdl_request)
{
- pthread_mutex_unlock(&LOCK_mdl);
+ /* As a side-effect MDL_EXIT_COND() unlocks LOCK_mdl. */
+ MDL_EXIT_COND(m_thd, mysys_var, old_msg);
break;
}
pthread_cond_wait(&COND_mdl, &LOCK_mdl);
=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc 2009-11-23 12:36:34 +0000
+++ b/sql/sql_base.cc 2009-11-24 12:22:57 +0000
@@ -899,11 +899,7 @@ static void kill_delayed_threads_for_tab
in_use->killed= THD::KILL_CONNECTION;
pthread_mutex_lock(&in_use->mysys_var->mutex);
if (in_use->mysys_var->current_cond)
- {
- pthread_mutex_lock(in_use->mysys_var->current_mutex);
pthread_cond_broadcast(in_use->mysys_var->current_cond);
- pthread_mutex_unlock(in_use->mysys_var->current_mutex);
- }
pthread_mutex_unlock(&in_use->mysys_var->mutex);
}
}
@@ -3700,6 +3696,7 @@ recover_from_failed_open_table_attempt(T
case OT_WAIT:
result= (thd->mdl_context.wait_for_locks(&m_mdl_requests) ||
tdc_wait_for_old_versions(thd, &m_mdl_requests));
+ DBUG_ASSERT(thd->mysys_var->current_mutex == 0);
break;
case OT_DISCOVER:
{
=== modified file 'sql/sql_insert.cc'
--- a/sql/sql_insert.cc 2009-11-23 12:36:34 +0000
+++ b/sql/sql_insert.cc 2009-11-24 12:22:57 +0000
@@ -2326,18 +2326,7 @@ void kill_delayed_threads(void)
{
pthread_mutex_lock(&di->thd.mysys_var->mutex);
if (di->thd.mysys_var->current_cond)
- {
- /*
- We need the following test because the main mutex may be locked
- in handle_delayed_insert()
- */
- if (&di->mutex != di->thd.mysys_var->current_mutex)
- my_pthread_mutex_lock(di->thd.mysys_var->current_mutex,
- MYF_NO_DEADLOCK_DETECTION);
- pthread_cond_broadcast(di->thd.mysys_var->current_cond);
- if (&di->mutex != di->thd.mysys_var->current_mutex)
- pthread_mutex_unlock(di->thd.mysys_var->current_mutex);
- }
+ pthread_cond_broadcast(di->thd.mysys_var->current_cond);
pthread_mutex_unlock(&di->thd.mysys_var->mutex);
}
}
Attachment: [text/bzr-bundle] bzr/jon.hauglid@sun.com-20091124122257-pofekiu81bvdd54n.bundle