List:Commits« Previous MessageNext Message »
From:Dmitry Lenev Date:July 9 2010 1:15pm
Subject:bzr commit into mysql-trunk-runtime branch (dlenev:3048)
View as plain text  
#At file:///home/dlenev/src/bzr/mysql-trunk-runtime-stage/ based on revid:dlenev@stripped

 3048 Dmitry Lenev	2010-07-09
      Patch that adds information about waits for FLUSH TABLES
      to MDL deadlock detector and makes it possible to detect
      cross-subsystem deadlocks.
      
      Work in progress.

    modified:
      sql/sql_parse.cc
      sql/sql_yacc.yy
      sql/table.cc
      sql/table.h
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc	2010-07-05 11:11:32 +0000
+++ b/sql/sql_parse.cc	2010-07-09 13:15:43 +0000
@@ -1796,12 +1796,15 @@ static bool flush_tables_with_read_lock(
     goto error;
   }
 
+  /*
+    Acquire SNW locks on tables to be flushed. We can't use
+    lock_table_names() here as this call will also acquire global IX
+    and database-scope IX locks on the tables, and this will make
+    this statement incompatible with FLUSH TABLES WITH READ LOCK.
+  */
   for (table_list= all_tables; table_list;
        table_list= table_list->next_global)
-  {
-    table_list->mdl_request.set_type(MDL_SHARED_NO_WRITE);
     mdl_requests.push_front(&table_list->mdl_request);
-  }
 
   if (thd->mdl_context.acquire_locks(&mdl_requests,
                                      thd->variables.lock_wait_timeout))
@@ -1813,8 +1816,7 @@ static bool flush_tables_with_read_lock(
     TABLE_SHARE *share;
     struct timespec abstime;
     set_timespec(abstime, thd->variables.lock_wait_timeout);
-
-    /* Remove the table from cache. */
+    /* Request removal of table from cache. */
     mysql_mutex_lock(&LOCK_open);
     tdc_remove_table(thd, TDC_RT_REMOVE_UNUSED,
                      table_list->db,
@@ -1824,17 +1826,14 @@ static bool flush_tables_with_read_lock(
                                          table_list->table_name)))
     {
       mysql_mutex_unlock(&LOCK_open);
-      continue;
+    }
+    else
+    {
+      /* The below method will unlock LOCK_open and frees share's memory. */
+      if (share->wait_until_flushed(&thd->mdl_context, &abstime))
+        return TRUE;
     }
 
-    /* The below method will unlock LOCK_open and frees share's memory. */
-    if (share->wait_until_flushed(&thd->mdl_context, &abstime))
-      return TRUE;
-  }
-
-  for (table_list= all_tables; table_list;
-       table_list= table_list->next_global)
-  {
     /* Skip views and temporary tables. */
     table_list->required_type= FRMTYPE_TABLE; /* Don't try to flush views. */
     table_list->open_type= OT_BASE_ONLY;      /* Ignore temporary tables. */
@@ -1849,20 +1848,12 @@ static bool flush_tables_with_read_lock(
     goto error;
   }
 
-#if 0
   /*
-    Downgrade the exclusive locks.
-    Use MDL_SHARED_NO_WRITE as the intended
-    post effect of this call is identical
-    to LOCK TABLES <...> READ, and we didn't use
-    thd->in_lock_talbes and thd->sql_command= SQLCOM_LOCK_TABLES
-    hacks to enter the LTM.
-    @todo: release the global IX lock here!!!
+    We don't downgrade MDL_SHARED_NO_WRITE here as the intended
+    post effect of this call is identical to LOCK TABLES <...> READ,
+    and we didn't use thd->in_lock_talbes and
+    thd->sql_command= SQLCOM_LOCK_TABLES hacks to enter the LTM.
   */
-  for (table_list= all_tables; table_list;
-       table_list= table_list->next_global)
-    table_list->mdl_request.ticket->downgrade_exclusive_lock(MDL_SHARED_NO_WRITE);
-#endif
 
   return FALSE;
 

=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy	2010-07-01 13:53:46 +0000
+++ b/sql/sql_yacc.yy	2010-07-09 13:15:43 +0000
@@ -11204,9 +11204,8 @@ opt_with_read_lock:
           {
             TABLE_LIST *tables= Lex->query_tables;
             Lex->type|= REFRESH_READ_LOCK;
-            /* We acquire an X lock currently and then downgrade. */
             for (; tables; tables= tables->next_global)
-              tables->mdl_request.set_type(MDL_EXCLUSIVE);
+              tables->mdl_request.set_type(MDL_SHARED_NO_WRITE);
           }
         ;
 

=== modified file 'sql/table.cc'
--- a/sql/table.cc	2010-07-05 11:11:32 +0000
+++ b/sql/table.cc	2010-07-09 13:15:43 +0000
@@ -438,19 +438,34 @@ void free_table_share(TABLE_SHARE *share
 
   if (share->m_flush_tickets.is_empty())
   {
-    /* We must copy mem_root from share because share is allocated through it */
+    /*
+      There are no threads waiting for this share to be flushed. So
+      we can immediately release memory associated with it. We must
+      copy mem_root from share because share is allocated through it.
+    */
     memcpy((char*) &mem_root, (char*) &share->mem_root, sizeof(mem_root));
     free_root(&mem_root, MYF(0));                 // Free's share
   }
   else
   {
-    I_P_List_iterator<Flush_ticket,
-                      I_P_List_adapter<Flush_ticket,
-                                       &Flush_ticket::next_in_share,
-                                       &Flush_ticket::prev_in_share> >
-                      it(share->m_flush_tickets);
+    /*
+      If there are threads waiting for this share to be flushed we
+      don't free share memory here. Instead we notify waiting threads
+      and delegate freeing share's memory to them.
+      At this point a) all resources except memory associated with share
+      were already released b) share should have been already removed
+      from table definition cache. So it is OK to proceed without waiting
+      for these threads to finish their work.
+    */
+    Flush_ticket_list::Iterator it(share->m_flush_tickets);
     Flush_ticket *ticket;
 
+    /*
+      To avoid problems due to threads being wake up concurrently modifying
+      flush ticket list we must hold LOCK_open here.
+    */
+    mysql_mutex_assert_owner(&LOCK_open);
+
     while ((ticket= it++))
       (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
   }
@@ -3036,6 +3051,18 @@ uint Flush_ticket::get_deadlock_weight()
 }
 
 
+/**
+  Traverse portion of wait-for graph which is reachable through this
+  table share in search for deadlocks.
+
+  @param waiting_ticket  Ticket representing wait for this share.
+  @param dvisitor        Deadlock detection visitor.
+
+  @retval TRUE  A deadlock is found. A victim is remembered
+                by the visitor.
+  @retval FALSE
+*/
+
 bool TABLE_SHARE::find_deadlock(Flush_ticket *waiting_ticket,
                                 Deadlock_detection_visitor *dvisitor)
 {
@@ -3146,6 +3173,13 @@ bool TABLE_SHARE::wait_until_flushed(MDL
 
   mysql_mutex_assert_owner(&LOCK_open);
 
+  /*
+    We should enter this method only then share's version is not
+    up to date and the share is referenced. Otherwise there is
+    no guarantee that our thread will be waken-up from wait.
+  */
+  DBUG_ASSERT(version != refresh_version && ref_count != 0);
+
   if (! (ticket= new Flush_ticket(mdl_context, this)))
   {
     mysql_mutex_unlock(&LOCK_open);
@@ -3171,6 +3205,23 @@ bool TABLE_SHARE::wait_until_flushed(MDL
 
   m_flush_tickets.remove(ticket);
 
+  /*
+    If our thread was the last one waiting for table share to be flushed
+    we can finish destruction of share object by releasing its memory
+    (share object was allocated on share's own MEM_ROOT).
+
+    In cases when our wait was aborted due KILL statement, deadlock or
+    timeout share still might be referenced, so we don't free its memory
+    in this case. Note that we can't rely on checking wait_status to
+    determine this condition as, for example, timeout can happen even
+    when there are no references to table share so memory should be
+    released.
+
+    QQ: Here we assume that table share with ref_count == 0 and
+        flush tickets can't be present in table_def_cache, or
+        referenced by other threads. Is there any better way to
+        solve this problem?
+  */
   if (m_flush_tickets.is_empty() && ! ref_count)
   {
     MEM_ROOT mem_root_copy;

=== modified file 'sql/table.h'
--- a/sql/table.h	2010-07-05 11:11:32 +0000
+++ b/sql/table.h	2010-07-09 13:15:43 +0000
@@ -509,6 +509,12 @@ public:
 };
 
 
+/**
+  Class representing the fact that some thread waits for table
+  share to be flushed. Is used to represent information about
+  such waits in MDL deadlock detector.
+*/
+
 class Flush_ticket : public Wait_for_edge
 {
   MDL_context *m_ctx;
@@ -520,14 +526,25 @@ public:
 
   MDL_context *get_ctx() const { return m_ctx; }
 
-  virtual bool find_deadlock(Deadlock_detection_visitor *dvisitor);
+  bool find_deadlock(Deadlock_detection_visitor *dvisitor);
 
-  virtual uint get_deadlock_weight() const;
+  uint get_deadlock_weight() const;
 
+  /**
+    Pointers for participating in the list of waiters for table share.
+  */
   Flush_ticket *next_in_share;
   Flush_ticket **prev_in_share;
 };
 
+
+typedef I_P_List <Flush_ticket,
+                  I_P_List_adapter<Flush_ticket,
+                                   &Flush_ticket::next_in_share,
+                                   &Flush_ticket::prev_in_share> >
+                 Flush_ticket_list;
+
+
 /*
   This structure is shared between different table objects. There is one
   instance of table share per one table in the database.
@@ -682,10 +699,10 @@ struct TABLE_SHARE
   /** Instrumentation for this table share. */
   PSI_table_share *m_psi;
 
-  I_P_List <Flush_ticket,
-            I_P_List_adapter<Flush_ticket,
-                             &Flush_ticket::next_in_share,
-                             &Flush_ticket::prev_in_share> > m_flush_tickets;
+  /**
+    List of tickets representing threads waiting for the share to be flushed.
+  */
+  Flush_ticket_list m_flush_tickets;
 
   /*
     Set share's table cache key and update its db and table name appropriately.


Attachment: [text/bzr-bundle] bzr/dlenev@mysql.com-20100709131543-w9mrkq6z3xr6wno2.bundle
Thread
bzr commit into mysql-trunk-runtime branch (dlenev:3048)Dmitry Lenev9 Jul