MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Martin Skold Date:February 20 2008 12:52pm
Subject:bk commit into 5.1 tree (mskold:1.2531)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of mskold.  When mskold does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2008-02-20 13:52:06+01:00, mskold@stripped +5 -0
  WL#4083 Resource shortage handling in event mechanism: Added support for handling of buffer overflow in Suma during node failure and disconnect of lagging subscribers (subscribers that do not acknowledge GCP's fast enough), redid implementation for micro-GCPs

  storage/ndb/include/mgmapi/mgmapi_config_parameters.h@stripped, 2008-02-20 13:51:58+01:00, mskold@stripped +1 -1
    WL#4083 Resource shortage handling in event mechanism: Added support for handling of buffer overflow in Suma during node failure and disconnect of lagging subscribers (subscribers that do not acknowledge GCP's fast enough), redid implementation for micro-GCPs

  storage/ndb/src/common/debugger/EventLogger.cpp@stripped, 2008-02-20 13:51:58+01:00, mskold@stripped +3 -3
    WL#4083 Resource shortage handling in event mechanism: Added support for handling of buffer overflow in Suma during node failure and disconnect of lagging subscribers (subscribers that do not acknowledge GCP's fast enough), redid implementation for micro-GCPs

  storage/ndb/src/kernel/blocks/suma/Suma.cpp@stripped, 2008-02-20 13:51:58+01:00, mskold@stripped +37 -54
    WL#4083 Resource shortage handling in event mechanism: Added support for handling of buffer overflow in Suma during node failure and disconnect of lagging subscribers (subscribers that do not acknowledge GCP's fast enough), redid implementation for micro-GCPs

  storage/ndb/src/kernel/blocks/suma/Suma.hpp@stripped, 2008-02-20 13:51:58+01:00, mskold@stripped +5 -4
    WL#4083 Resource shortage handling in event mechanism: Added support for handling of buffer overflow in Suma during node failure and disconnect of lagging subscribers (subscribers that do not acknowledge GCP's fast enough), redid implementation for micro-GCPs

  storage/ndb/src/mgmsrv/ConfigInfo.cpp@stripped, 2008-02-20 13:51:58+01:00, mskold@stripped +1 -1
    WL#4083 Resource shortage handling in event mechanism: Added support for handling of buffer overflow in Suma during node failure and disconnect of lagging subscribers (subscribers that do not acknowledge GCP's fast enough), redid implementation for micro-GCPs

diff -Nrup a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h
--- a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2008-02-11 14:24:16 +01:00
+++ b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2008-02-20 13:51:58 +01:00
@@ -123,7 +123,7 @@
 #define CFG_DB_MICRO_GCP_INTERVAL     170 /* micro gcp */
 #define CFG_DB_MICRO_GCP_TIMEOUT      171
 
-#define CFG_DB_MAX_BUFFERED_GCP       180 /* subscriptions */
+#define CFG_DB_MAX_BUFFERED_EPOCHS    182 /* subscriptions */
 
 #define CFG_DB_SGA                    198 /* super pool mem */
 #define CFG_DB_DATA_MEM_2             199 /* used in special build in 5.1 */
diff -Nrup a/storage/ndb/src/common/debugger/EventLogger.cpp b/storage/ndb/src/common/debugger/EventLogger.cpp
--- a/storage/ndb/src/common/debugger/EventLogger.cpp	2008-02-13 11:39:47 +01:00
+++ b/storage/ndb/src/common/debugger/EventLogger.cpp	2008-02-20 13:51:58 +01:00
@@ -996,10 +996,10 @@ void getTextSubscriptionStatus(QQQQ)
   case(1): // SubscriptionStatus::DISCONNECTED
     BaseString::snprintf(m_text, m_text_len,
                          "Disconnecting node %u because it has "
-                         "exceeded MaxBufferedEpochs (%llu > %u), gci %llu",
+                         "exceeded MaxBufferedEpochs (%u > %u), gci %llu",
                          theData[2],
-                         make_uint64(theData[5], theData[6]),
-                         theData[7],
+                         theData[5],
+                         theData[6],
                          make_uint64(theData[3], theData[4]));
     break;
   case(2): // SubscriptionStatus::INCONSISTENT
diff -Nrup a/storage/ndb/src/kernel/blocks/suma/Suma.cpp b/storage/ndb/src/kernel/blocks/suma/Suma.cpp
--- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2008-02-19 09:10:33 +01:00
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2008-02-20 13:51:58 +01:00
@@ -165,13 +165,13 @@ Suma::execREAD_CONFIG_REQ(Signal* signal
   ndbrequire(p != 0);
 
   // SumaParticipant
-  Uint32 noTables, noAttrs, maxBufferedGcp;
+  Uint32 noTables, noAttrs, maxBufferedEpochs;
   ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES,  
 			    &noTables);
   ndb_mgm_get_int_parameter(p, CFG_DB_NO_ATTRIBUTES,  
 			    &noAttrs);
-  ndb_mgm_get_int_parameter(p, CFG_DB_MAX_BUFFERED_GCP,
-                            &maxBufferedGcp);
+  ndb_mgm_get_int_parameter(p, CFG_DB_MAX_BUFFERED_EPOCHS,
+                            &maxBufferedEpochs);
 
   c_tablePool.setSize(noTables);
   c_tables.setSize(noTables);
@@ -183,7 +183,7 @@ Suma::execREAD_CONFIG_REQ(Signal* signal
   c_syncPool.setSize(2);
   c_dataBufferPool.setSize(noAttrs);
 
-  c_maxBufferedGcp = maxBufferedGcp;
+  c_maxBufferedEpochs = maxBufferedEpochs;
 
   // Calculate needed gcp pool as 10 records + the ones needed
   // during a possible api timeout
@@ -783,7 +783,7 @@ Suma::execNODE_FAILREP(Signal* signal){
     {
       ndbout_c("Inserting API_FAILREQ node: %u", node);
       signal->theData[0] = node;
-      EXECUTE_DIRECT(QMGR, GSN_API_FAILREQ, signal, 1);
+      sendSignal(QMGR_REF, GSN_API_FAILREQ, signal, 1, JBA);
     }
   }
   
@@ -3561,73 +3561,56 @@ Suma::execFIRE_TRIG_ORD(Signal* signal)
 }
 
 void
-Suma::checkMaxBufferedGCP(Signal *signal)
+Suma::checkMaxBufferedEpochs(Signal *signal)
 {
   /*
    * Check if any subscribers are exceeding the MaxBufferedEpochs
    */
+  Ptr<Gcp_record> gcp;
   jamEntry();
   if (c_gcp_list.isEmpty())
   {
     jam();
     return;
   }
-  Ptr<Gcp_record> gcp;
   c_gcp_list.first(gcp);
   if (ERROR_INSERTED(13035))
   {
     jam();
     CLEAR_ERROR_INSERT_VALUE;
     ndbout_c("Simulating exceeding the MaxBufferedEpochs %u(%llu,%llu,%llu)",
-            c_maxBufferedGcp, m_max_seen_gci,
+            c_maxBufferedEpochs, m_max_seen_gci,
             m_last_complete_gci, gcp.p->m_gci);
-    c_maxBufferedGcp = 1;
   }
-  if (m_max_seen_gci - gcp.p->m_gci >= (Uint64) c_maxBufferedGcp)
+  else if (c_gcp_list.count() < c_maxBufferedEpochs)
   {
-    NodeBitmask subs = c_subscriber_nodes;
-    jam();
-    // Disconnect lagging subscribers
-    for(; !gcp.isNull(); c_gcp_list.next(gcp))
-    {
-      Uint64 lag = m_max_seen_gci - gcp.p->m_gci;
-      jam();
-      if (lag >= (Uint64) c_maxBufferedGcp)
-      {
-        jam();
-        for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId++)
-        {
-          if (subs.get(nodeId))
-          {
-           jam();
-           subs.clear(nodeId);
-           // Disconnecting node
-           signal->theData[0] = NDB_LE_SubscriptionStatus;
-           signal->theData[1] = 1; // DISCONNECTED;
-           signal->theData[2] = nodeId;
-           signal->theData[3] = (Uint32) gcp.p->m_gci;
-           signal->theData[4] = (Uint32) (gcp.p->m_gci >> 32);
-           signal->theData[5] = (Uint32) lag;
-           signal->theData[6] = (Uint32) (lag >> 32); 
-           signal->theData[7] = c_maxBufferedGcp;
-           sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 8, JBB);
-
-            /**
-             * Force API_FAILREQ
-            */
-           signal->theData[0] = nodeId;
-            EXECUTE_DIRECT(QMGR, GSN_API_FAILREQ, signal, 1);
-          }
-        }
-      }
-      else
-      {
-        /*
-         * We have found a newer gci that still is
-         * allowed to be buffered
-         */
-        break;
-      }
+    return;
+  }
+  NodeBitmask subs = gcp.p->m_subscribers;
+  jam();
+  // Disconnect lagging subscribers waiting for oldest epoch
+  ndbout_c("Found lagging epoch %llu", gcp.p->m_gci);
+  for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId++)
+  {
+    if (subs.get(nodeId))
+    {
+      jam();
+      subs.clear(nodeId);
+      // Disconnecting node
+      signal->theData[0] = NDB_LE_SubscriptionStatus;
+      signal->theData[1] = 1; // DISCONNECTED;
+      signal->theData[2] = nodeId;
+      signal->theData[3] = (Uint32) gcp.p->m_gci;
+      signal->theData[4] = (Uint32) (gcp.p->m_gci >> 32);
+      signal->theData[5] = (Uint32) c_gcp_list.count();
+      signal->theData[6] = c_maxBufferedEpochs;
+      sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 8, JBB);
+      
+      /**
+       * Force API_FAILREQ
+       */
+      signal->theData[0] = nodeId;
+      sendSignal(QMGR_REF, GSN_API_FAILREQ, signal, 1, JBA);
     }
   }
 }
@@ -3671,7 +3654,7 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* s
 #endif
 
   m_last_complete_gci = gci;
-  //checkMaxBufferedGCP(signal);
+  checkMaxBufferedEpochs(signal);
   m_max_seen_gci = (gci > m_max_seen_gci ? gci : m_max_seen_gci);
 
   /**
diff -Nrup a/storage/ndb/src/kernel/blocks/suma/Suma.hpp b/storage/ndb/src/kernel/blocks/suma/Suma.hpp
--- a/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2008-02-11 14:24:16 +01:00
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2008-02-20 13:51:58 +01:00
@@ -23,7 +23,7 @@
 
 #include <SLList.hpp>
 #include <DLList.hpp>
-#include <DLFifoList.hpp>
+#include <DLCFifoList.hpp>
 #include <KeyTable.hpp>
 #include <DataBuffer.hpp>
 #include <SignalCounter.hpp>
@@ -349,7 +349,7 @@ public:
   ArrayPool<SyncRecord> c_syncPool;
   DataBuffer<15>::DataBufferPool c_dataBufferPool;
 
-  Uint32 c_maxBufferedGcp;
+  Uint32 c_maxBufferedEpochs;
 
   NodeBitmask c_failedApiNodes;
   
@@ -602,7 +602,8 @@ private:
 
   Uint64 get_current_gci(Signal*);
 
-  void checkMaxBufferedGCP(Signal *signal);
+  void checkMaxBufferedEpochs
+(Signal *signal);
 
   Uint64 m_max_seen_gci;      // FIRE_TRIG_ORD
   Uint64 m_max_sent_gci;      // FIRE_TRIG_ORD -> send
@@ -622,7 +623,7 @@ private:
     Uint32 prevList;
   };
   ArrayPool<Gcp_record> c_gcp_pool;
-  DLFifoList<Gcp_record> c_gcp_list;
+  DLCFifoList<Gcp_record> c_gcp_list;
 
   struct Page_chunk
   {
diff -Nrup a/storage/ndb/src/mgmsrv/ConfigInfo.cpp b/storage/ndb/src/mgmsrv/ConfigInfo.cpp
--- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2008-02-11 15:05:02 +01:00
+++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2008-02-20 13:51:58 +01:00
@@ -917,7 +917,7 @@ const ConfigInfo::ParamInfo ConfigInfo::
     "32000" },
 
   {
-    CFG_DB_MAX_BUFFERED_GCP,
+    CFG_DB_MAX_BUFFERED_EPOCHS,
     "MaxBufferedEpochs",
     DB_TOKEN,
     "Allowed numbered of epochs that a subscribing node can lag behind (unprocessed epochs).  Exceeding will cause lagging subscribers to be disconnected.",
Thread
bk commit into 5.1 tree (mskold:1.2531)Martin Skold20 Feb