List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:October 14 2009 11:44am
Subject:bzr commit into mysql-5.1-telco-7.0 branch (jonas:3123) Bug#47966
View as plain text  
#At file:///home/jonas/src/telco-6.4/ based on revid:jonas@stripped

 3123 Jonas Oreland	2009-10-14
      ndb - bug#47966
        ndbmtd can over allocate undo-buffer, prevent this by keeping track
          of how much has been promised, but not yet consumed

    modified:
      storage/ndb/src/kernel/blocks/lgman.cpp
      storage/ndb/src/kernel/blocks/lgman.hpp
      storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
=== modified file 'storage/ndb/src/kernel/blocks/lgman.cpp'
--- a/storage/ndb/src/kernel/blocks/lgman.cpp	2009-10-08 11:15:24 +0000
+++ b/storage/ndb/src/kernel/blocks/lgman.cpp	2009-10-14 11:44:05 +0000
@@ -54,6 +54,8 @@ extern EventLogger * g_eventLogger;
 #define DEBUG_UNDO_EXECUTION 0
 #define DEBUG_SEARCH_LOG_HEAD 0
 
+#define FREE_BUFFER_MARGIN (2 * File_formats::UNDO_PAGE_WORDS)
+
 Lgman::Lgman(Block_context & ctx) :
   SimulatedBlock(LGMAN, ctx),
   m_tup(0),
@@ -347,30 +349,49 @@ Lgman::execNODE_FAILREP(Signal* signal)
 void
 Lgman::execDUMP_STATE_ORD(Signal* signal){
   jamEntry();
-  if(signal->theData[0] == 12001)
+  if (signal->theData[0] == 12001 || signal->theData[0] == 12002)
   {
+    char tmp[1024];
     Ptr<Logfile_group> ptr;
     m_logfile_group_list.first(ptr);
     while(!ptr.isNull())
     {
-      infoEvent("lfg %d state: %x fs: %d lsn "
-		"[ last: %lld s(req): %lld s:ed: %lld lcp: %lld ] waiters: %d %d",
-		ptr.p->m_logfile_group_id, ptr.p->m_state, 
-		ptr.p->m_outstanding_fs,
-		ptr.p->m_last_lsn, ptr.p->m_last_sync_req_lsn,
-		ptr.p->m_last_synced_lsn, ptr.p->m_last_lcp_lsn,
-		!ptr.p->m_log_buffer_waiters.isEmpty(),
-		!ptr.p->m_log_sync_waiters.isEmpty());
+      BaseString::snprintf(tmp, sizeof(tmp),
+                           "lfg %u state: %x fs: %u lsn "
+                           " [ last: %llu s(req): %llu s:ed: %llu lcp: %llu ] "
+                           " waiters: %d %d",
+                           ptr.p->m_logfile_group_id, ptr.p->m_state,
+                           ptr.p->m_outstanding_fs,
+                           ptr.p->m_last_lsn, ptr.p->m_last_sync_req_lsn,
+                           ptr.p->m_last_synced_lsn, ptr.p->m_last_lcp_lsn,
+                           !ptr.p->m_log_buffer_waiters.isEmpty(),
+                           !ptr.p->m_log_sync_waiters.isEmpty());
+      if (signal->theData[0] == 12001)
+        infoEvent(tmp);
+      ndbout_c(tmp);
+
+      BaseString::snprintf(tmp, sizeof(tmp),
+                           "   callback_buffer_words: %u"
+                           " free_buffer_words: %u free_file_words: %llu",
+                           ptr.p->m_callback_buffer_words,
+                           ptr.p->m_free_buffer_words,
+                           ptr.p->m_free_file_words);
+      if (signal->theData[0] == 12001)
+        infoEvent(tmp);
+      ndbout_c(tmp);
       if (!ptr.p->m_log_buffer_waiters.isEmpty())
       {
 	Ptr<Log_waiter> waiter;
 	Local_log_waiter_list 
 	  list(m_log_waiter_pool, ptr.p->m_log_buffer_waiters);
 	list.first(waiter);
-	infoEvent("  free_buffer_words: %d head(waiters).sz: %d %d",
-		  ptr.p->m_free_buffer_words,
-		  waiter.p->m_size,
-		  2*File_formats::UNDO_PAGE_WORDS);
+        BaseString::snprintf(tmp, sizeof(tmp),
+                             "  head(waiters).sz: %u %u",
+                             waiter.p->m_size,
+                             FREE_BUFFER_MARGIN);
+        if (signal->theData[0] == 12001)
+          infoEvent(tmp);
+        ndbout_c(tmp);
       }
       if (!ptr.p->m_log_sync_waiters.isEmpty())
       {
@@ -378,14 +399,18 @@ Lgman::execDUMP_STATE_ORD(Signal* signal
 	Local_log_waiter_list 
 	  list(m_log_waiter_pool, ptr.p->m_log_sync_waiters);
 	list.first(waiter);
-	infoEvent("  m_last_synced_lsn: %lld head(waiters %x).m_sync_lsn: %lld",
-		  ptr.p->m_last_synced_lsn,
-		  waiter.i,
-		  waiter.p->m_sync_lsn);
+        BaseString::snprintf(tmp, sizeof(tmp),
+                             "  m_last_synced_lsn: %llu head(waiters %x).m_sync_lsn: %llu",
+                             ptr.p->m_last_synced_lsn,
+                             waiter.i,
+                             waiter.p->m_sync_lsn);
+        if (signal->theData[0] == 12001)
+          infoEvent(tmp);
+        ndbout_c(tmp);
 	
 	while(!waiter.isNull())
 	{
-	  ndbout_c("ptr: %x %p lsn: %lld next: %x", 
+	  ndbout_c("ptr: %x %p lsn: %llu next: %x",
 		   waiter.i, waiter.p, waiter.p->m_sync_lsn, waiter.p->nextList);
 	  list.next(waiter);
 	}
@@ -997,6 +1022,8 @@ Lgman::Logfile_group::Logfile_group(cons
 
   m_free_file_words = 0;
   m_free_buffer_words = 0;
+  m_callback_buffer_words = 0;
+
   m_pos[CONSUMER].m_current_page.m_ptr_i = RNIL;// { m_buffer_pages, idx }
   m_pos[CONSUMER].m_current_pos.m_ptr_i = RNIL; // { page ptr.i, m_words_used}
   m_pos[PRODUCER].m_current_page.m_ptr_i = RNIL;// { m_buffer_pages, idx }
@@ -1436,9 +1463,12 @@ Logfile_client::get_log_buffer(Signal* s
   Ptr<Lgman::Logfile_group> ptr;
   if(m_lgman->m_logfile_group_hash.find(ptr, key))
   {
-    if(ptr.p->m_free_buffer_words >= (sz + 2*File_formats::UNDO_PAGE_WORDS)&& 
-       ptr.p->m_log_buffer_waiters.isEmpty())
+    Uint32 callback_buffer = ptr.p->m_callback_buffer_words;
+    Uint32 free_buffer = ptr.p->m_free_buffer_words;
+    if (free_buffer >= (sz + callback_buffer + FREE_BUFFER_MARGIN) &&
+        ptr.p->m_log_buffer_waiters.isEmpty())
     {
+      ptr.p->m_callback_buffer_words = callback_buffer + sz;
       return 1;
     }
     
@@ -1492,8 +1522,10 @@ Lgman::flush_log(Signal* signal, Ptr<Log
  
   jamEntry();
 
-  if(consumer.m_current_page == producer.m_current_page)
+  if (consumer.m_current_page == producer.m_current_page)
   {
+    jam();
+    Buffer_idx pos = producer.m_current_pos;
 
 #if 0
     if (force)
@@ -1509,13 +1541,18 @@ Lgman::flush_log(Signal* signal, Ptr<Log
     {
       jam();
 
-      if (ptr.p->m_log_buffer_waiters.isEmpty() || ptr.p->m_outstanding_fs)
+      if (ptr.p->m_log_buffer_waiters.isEmpty() || pos.m_idx == 0)
       {
         jam();
 	force =  0;
       }
-      
-      if (force < 2)
+      else if (ptr.p->m_free_buffer_words < FREE_BUFFER_MARGIN)
+      {
+        jam();
+        force = 2;
+      }
+
+      if (force < 2 || ptr.p->m_outstanding_fs)
       {
         jam();
 	signal->theData[0] = LgmanContinueB::FLUSH_LOG;
@@ -1528,12 +1565,14 @@ Lgman::flush_log(Signal* signal, Ptr<Log
       else
       {
         jam();
-	Buffer_idx pos= producer.m_current_pos;
 	GlobalPage *page = m_shared_page_pool.getPtr(pos.m_ptr_i);
 	
 	Uint32 free= File_formats::UNDO_PAGE_WORDS - pos.m_idx;
 
-	ndbout_c("force flush %d %d", pos.m_idx, ptr.p->m_free_buffer_words);
+	ndbout_c("force flush %d %d outstanding: %u isEmpty(): %u",
+                 pos.m_idx, ptr.p->m_free_buffer_words,
+                 ptr.p->m_outstanding_fs,
+                 ptr.p->m_log_buffer_waiters.isEmpty());
 	
 	ndbrequire(pos.m_idx); // don't flush empty page...
 	Uint64 lsn= ptr.p->m_last_lsn - 1;
@@ -1683,11 +1722,13 @@ void
 Lgman::process_log_buffer_waiters(Signal* signal, Ptr<Logfile_group> ptr)
 {
   Uint32 free_buffer= ptr.p->m_free_buffer_words;
+  Uint32 callback_buffer = ptr.p->m_callback_buffer_words;
   Local_log_waiter_list 
     list(m_log_waiter_pool, ptr.p->m_log_buffer_waiters);
 
-  if(list.isEmpty())
+  if (list.isEmpty())
   {
+    jam();
     ptr.p->m_state &= ~(Uint32)Logfile_group::LG_WAITERS_THREAD;
     return;
   }
@@ -1697,18 +1738,21 @@ Lgman::process_log_buffer_waiters(Signal
   list.first(waiter);
   Uint32 sz  = waiter.p->m_size;
   Uint32 logfile_group_id = ptr.p->m_logfile_group_id;
-  if(sz + 2*File_formats::UNDO_PAGE_WORDS < free_buffer)
+  if (sz + callback_buffer + FREE_BUFFER_MARGIN < free_buffer)
   {
+    jam();
     removed= true;
     Uint32 block = waiter.p->m_block;
     CallbackPtr & callback = waiter.p->m_callback;
+    ptr.p->m_callback_buffer_words += sz;
     sendCallbackConf(signal, block, callback, logfile_group_id);
 
     list.releaseFirst(waiter);
   }
   
-  if(removed && !list.isEmpty())
+  if (removed && !list.isEmpty())
   {
+    jam();
     ptr.p->m_state |= Logfile_group::LG_WAITERS_THREAD;
     signal->theData[0] = LgmanContinueB::PROCESS_LOG_BUFFER_WAITERS;
     signal->theData[1] = ptr.i;
@@ -1716,6 +1760,7 @@ Lgman::process_log_buffer_waiters(Signal
   }
   else
   {
+    jam();
     ptr.p->m_state &= ~(Uint32)Logfile_group::LG_WAITERS_THREAD;
   }
 }
@@ -2204,6 +2249,7 @@ Logfile_client::add_entry(const Change* 
     Ptr<Lgman::Logfile_group> ptr;
     if(m_lgman->m_logfile_group_hash.find(ptr, key))
     {
+      Uint32 callback_buffer = ptr.p->m_callback_buffer_words;
       Uint64 last_lsn_filegroup= ptr.p->m_last_lsn;
       if(last_lsn_filegroup == last_lsn
 #ifdef VM_TRACE
@@ -2219,6 +2265,7 @@ Logfile_client::add_entry(const Change* 
 	}
 	* (dst - 1) |= File_formats::Undofile::UNDO_NEXT_LSN << 16;
 	ptr.p->m_free_file_words += 2;
+        tot += 2; // for callback_buffer
 	m_lgman->validate_logfile_group(ptr);
       }
       else
@@ -2232,6 +2279,11 @@ Logfile_client::add_entry(const Change* 
 	  dst += src[i].len;
 	}
       }
+      if (unlikely(! (tot <= callback_buffer)))
+      {
+        abort();
+      }
+      ptr.p->m_callback_buffer_words = callback_buffer - tot;
     }
     
     m_lgman->m_last_lsn = ptr.p->m_last_lsn = last_lsn + 1;

=== modified file 'storage/ndb/src/kernel/blocks/lgman.hpp'
--- a/storage/ndb/src/kernel/blocks/lgman.hpp	2009-10-08 11:15:24 +0000
+++ b/storage/ndb/src/kernel/blocks/lgman.hpp	2009-10-14 11:44:05 +0000
@@ -215,7 +215,9 @@ public:
     Undofile_list::Head m_files;     // Files in log
     Undofile_list::Head m_meta_files;// Files being created or dropped
     
-    Uint32 m_free_buffer_words;    // Free buffer page words
+    Uint32 m_free_buffer_words;     // Free buffer page words
+    Uint32 m_callback_buffer_words; // buffer words that has been
+                                    // returned to user, but not yet consumed
     Log_waiter_list::Head m_log_buffer_waiters;
     Page_map::Head m_buffer_pages; // Pairs of { ptr.i, count }
     struct Position {

=== modified file 'storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2009-10-08 11:15:24 +0000
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2009-10-14 11:44:05 +0000
@@ -222,6 +222,13 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* s
     signal->theData[0] = 7025;
     EXECUTE_DIRECT(DBDIH, GSN_DUMP_STATE_ORD, signal, 1);
     jamEntry();
+
+    {
+      signal->theData[0] = 12002;
+      EXECUTE_DIRECT(LGMAN, GSN_DUMP_STATE_ORD, signal, 1, 0);
+    }
+
+    jamEntry();
     break;
   }
   case SystemError::CopyFragRefError:


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20091014114405-fvx1hpkib4j3f0i7.bundle
Thread
bzr commit into mysql-5.1-telco-7.0 branch (jonas:3123) Bug#47966Jonas Oreland14 Oct