#At file:///home/dlenev/src/bzr/mysql-trunk-runtime-stage/ based on revid:dlenev@stripped
3048 Dmitry Lenev 2010-07-09
Patch that adds information about waits for FLUSH TABLES
to MDL deadlock detector and makes it possible to detect
cross-subsystem deadlocks.
Work in progress.
modified:
sql/sql_parse.cc
sql/sql_yacc.yy
sql/table.cc
sql/table.h
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc 2010-07-05 11:11:32 +0000
+++ b/sql/sql_parse.cc 2010-07-09 13:15:43 +0000
@@ -1796,12 +1796,15 @@ static bool flush_tables_with_read_lock(
goto error;
}
+ /*
+ Acquire SNW locks on tables to be flushed. We can't use
+ lock_table_names() here as this call will also acquire global IX
+ and database-scope IX locks on the tables, and this will make
+ this statement incompatible with FLUSH TABLES WITH READ LOCK.
+ */
for (table_list= all_tables; table_list;
table_list= table_list->next_global)
- {
- table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE);
mdl_requests.push_front(&table_list->mdl_request);
- }
if (thd->mdl_context.acquire_locks(&mdl_requests,
thd->variables.lock_wait_timeout))
@@ -1813,8 +1816,7 @@ static bool flush_tables_with_read_lock(
TABLE_SHARE *share;
struct timespec abstime;
set_timespec(abstime, thd->variables.lock_wait_timeout);
-
- /* Remove the table from cache. */
+ /* Request removal of table from cache. */
mysql_mutex_lock(&LOCK_open);
tdc_remove_table(thd, TDC_RT_REMOVE_UNUSED,
table_list->db,
@@ -1824,17 +1826,14 @@ static bool flush_tables_with_read_lock(
table_list->table_name)))
{
mysql_mutex_unlock(&LOCK_open);
- continue;
+ }
+ else
+ {
+ /* The below method will unlock LOCK_open and frees share's memory. */
+ if (share->wait_until_flushed(&thd->mdl_context, &abstime))
+ return TRUE;
}
- /* The below method will unlock LOCK_open and frees share's memory. */
- if (share->wait_until_flushed(&thd->mdl_context, &abstime))
- return TRUE;
- }
-
- for (table_list= all_tables; table_list;
- table_list= table_list->next_global)
- {
/* Skip views and temporary tables. */
table_list->required_type= FRMTYPE_TABLE; /* Don't try to flush views. */
table_list->open_type= OT_BASE_ONLY; /* Ignore temporary tables. */
@@ -1849,20 +1848,12 @@ static bool flush_tables_with_read_lock(
goto error;
}
-#if 0
/*
- Downgrade the exclusive locks.
- Use MDL_SHARED_NO_WRITE as the intended
- post effect of this call is identical
- to LOCK TABLES <...> READ, and we didn't use
- thd->in_lock_talbes and thd->sql_command= SQLCOM_LOCK_TABLES
- hacks to enter the LTM.
- @todo: release the global IX lock here!!!
+ We don't downgrade MDL_SHARED_NO_WRITE here as the intended
+ post effect of this call is identical to LOCK TABLES <...> READ,
+ and we didn't use thd->in_lock_talbes and
+ thd->sql_command= SQLCOM_LOCK_TABLES hacks to enter the LTM.
*/
- for (table_list= all_tables; table_list;
- table_list= table_list->next_global)
- table_list->mdl_request.ticket->downgrade_exclusive_lock(MDL_SHARED_NO_WRITE);
-#endif
return FALSE;
=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy 2010-07-01 13:53:46 +0000
+++ b/sql/sql_yacc.yy 2010-07-09 13:15:43 +0000
@@ -11204,9 +11204,8 @@ opt_with_read_lock:
{
TABLE_LIST *tables= Lex->query_tables;
Lex->type|= REFRESH_READ_LOCK;
- /* We acquire an X lock currently and then downgrade. */
for (; tables; tables= tables->next_global)
- tables->mdl_request.set_type(MDL_EXCLUSIVE);
+ tables->mdl_request.set_type(MDL_SHARED_NO_WRITE);
}
;
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-07-05 11:11:32 +0000
+++ b/sql/table.cc 2010-07-09 13:15:43 +0000
@@ -438,19 +438,34 @@ void free_table_share(TABLE_SHARE *share
if (share->m_flush_tickets.is_empty())
{
- /* We must copy mem_root from share because share is allocated through it */
+ /*
+ There are no threads waiting for this share to be flushed. So
+ we can immediately release memory associated with it. We must
+ copy mem_root from share because share is allocated through it.
+ */
memcpy((char*) &mem_root, (char*) &share->mem_root, sizeof(mem_root));
free_root(&mem_root, MYF(0)); // Free's share
}
else
{
- I_P_List_iterator<Flush_ticket,
- I_P_List_adapter<Flush_ticket,
- &Flush_ticket::next_in_share,
- &Flush_ticket::prev_in_share> >
- it(share->m_flush_tickets);
+ /*
+ If there are threads waiting for this share to be flushed we
+ don't free share memory here. Instead we notify waiting threads
+ and delegate freeing share's memory to them.
+ At this point a) all resources except memory associated with share
+ were already released b) share should have been already removed
+ from table definition cache. So it is OK to proceed without waiting
+ for these threads to finish their work.
+ */
+ Flush_ticket_list::Iterator it(share->m_flush_tickets);
Flush_ticket *ticket;
+ /*
+ To avoid problems due to threads being wake up concurrently modifying
+ flush ticket list we must hold LOCK_open here.
+ */
+ mysql_mutex_assert_owner(&LOCK_open);
+
while ((ticket= it++))
(void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
}
@@ -3036,6 +3051,18 @@ uint Flush_ticket::get_deadlock_weight()
}
+/**
+ Traverse portion of wait-for graph which is reachable through this
+ table share in search for deadlocks.
+
+ @param waiting_ticket Ticket representing wait for this share.
+ @param dvisitor Deadlock detection visitor.
+
+ @retval TRUE A deadlock is found. A victim is remembered
+ by the visitor.
+ @retval FALSE
+*/
+
bool TABLE_SHARE::find_deadlock(Flush_ticket *waiting_ticket,
Deadlock_detection_visitor *dvisitor)
{
@@ -3146,6 +3173,13 @@ bool TABLE_SHARE::wait_until_flushed(MDL
mysql_mutex_assert_owner(&LOCK_open);
+ /*
+ We should enter this method only then share's version is not
+ up to date and the share is referenced. Otherwise there is
+ no guarantee that our thread will be waken-up from wait.
+ */
+ DBUG_ASSERT(version != refresh_version && ref_count != 0);
+
if (! (ticket= new Flush_ticket(mdl_context, this)))
{
mysql_mutex_unlock(&LOCK_open);
@@ -3171,6 +3205,23 @@ bool TABLE_SHARE::wait_until_flushed(MDL
m_flush_tickets.remove(ticket);
+ /*
+ If our thread was the last one waiting for table share to be flushed
+ we can finish destruction of share object by releasing its memory
+ (share object was allocated on share's own MEM_ROOT).
+
+ In cases when our wait was aborted due KILL statement, deadlock or
+ timeout share still might be referenced, so we don't free its memory
+ in this case. Note that we can't rely on checking wait_status to
+ determine this condition as, for example, timeout can happen even
+ when there are no references to table share so memory should be
+ released.
+
+ QQ: Here we assume that table share with ref_count == 0 and
+ flush tickets can't be present in table_def_cache, or
+ referenced by other threads. Is there any better way to
+ solve this problem?
+ */
if (m_flush_tickets.is_empty() && ! ref_count)
{
MEM_ROOT mem_root_copy;
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-07-05 11:11:32 +0000
+++ b/sql/table.h 2010-07-09 13:15:43 +0000
@@ -509,6 +509,12 @@ public:
};
+/**
+ Class representing the fact that some thread waits for table
+ share to be flushed. Is used to represent information about
+ such waits in MDL deadlock detector.
+*/
+
class Flush_ticket : public Wait_for_edge
{
MDL_context *m_ctx;
@@ -520,14 +526,25 @@ public:
MDL_context *get_ctx() const { return m_ctx; }
- virtual bool find_deadlock(Deadlock_detection_visitor *dvisitor);
+ bool find_deadlock(Deadlock_detection_visitor *dvisitor);
- virtual uint get_deadlock_weight() const;
+ uint get_deadlock_weight() const;
+ /**
+ Pointers for participating in the list of waiters for table share.
+ */
Flush_ticket *next_in_share;
Flush_ticket **prev_in_share;
};
+
+typedef I_P_List <Flush_ticket,
+ I_P_List_adapter<Flush_ticket,
+ &Flush_ticket::next_in_share,
+ &Flush_ticket::prev_in_share> >
+ Flush_ticket_list;
+
+
/*
This structure is shared between different table objects. There is one
instance of table share per one table in the database.
@@ -682,10 +699,10 @@ struct TABLE_SHARE
/** Instrumentation for this table share. */
PSI_table_share *m_psi;
- I_P_List <Flush_ticket,
- I_P_List_adapter<Flush_ticket,
- &Flush_ticket::next_in_share,
- &Flush_ticket::prev_in_share> > m_flush_tickets;
+ /**
+ List of tickets representing threads waiting for the share to be flushed.
+ */
+ Flush_ticket_list m_flush_tickets;
/*
Set share's table cache key and update its db and table name appropriately.
Attachment: [text/bzr-bundle] bzr/dlenev@mysql.com-20100709131543-w9mrkq6z3xr6wno2.bundle
| Thread |
|---|
| • bzr commit into mysql-trunk-runtime branch (dlenev:3048) | Dmitry Lenev | 9 Jul |