List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:August 24 2010 2:04pm
Subject:bzr commit into mysql-5.1-telco-6.3 branch (jonas:3261) Bug#55641
View as plain text  
#At file:///home/jonas/src/telco-6.3/ based on revid:jonas@stripped

 3261 Jonas Oreland	2010-08-24
      ndb - bug#55641 - commit to test

    modified:
      storage/ndb/include/kernel/signaldata/StopMe.hpp
      storage/ndb/include/kernel/signaldata/SumaImpl.hpp
      storage/ndb/include/mgmapi/ndbd_exit_codes.h
      storage/ndb/include/ndb_version.h.in
      storage/ndb/src/kernel/blocks/ERROR_codes.txt
      storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
      storage/ndb/src/kernel/blocks/suma/Suma.cpp
      storage/ndb/src/kernel/blocks/suma/Suma.hpp
      storage/ndb/src/kernel/blocks/suma/SumaInit.cpp
      storage/ndb/src/kernel/error/ndbd_exit_codes.c
      storage/ndb/src/ndbapi/Ndb.cpp
      storage/ndb/src/ndbapi/Ndbif.cpp
      storage/ndb/src/ndbapi/TransporterFacade.hpp
      storage/ndb/test/ndbapi/test_event.cpp
      storage/ndb/test/run-test/daily-basic-tests.txt
=== modified file 'storage/ndb/include/kernel/signaldata/StopMe.hpp'
--- a/storage/ndb/include/kernel/signaldata/StopMe.hpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/include/kernel/signaldata/StopMe.hpp	2010-08-24 14:04:02 +0000
@@ -28,27 +28,28 @@
  * @see StartMeReq
  * @see StartPermReq
  */
-class StopMeReq {
+struct StopMeReq
+{
   
   /**
    * Sender(s) / Reciver(s)
    */
   friend class Dbdih;
+  friend class Suma;
   
   /**
    * Sender
    */
   friend class Ndbcntr;
 
-public:
   STATIC_CONST( SignalLength = 2 );
-private:
   
   Uint32 senderRef;
   Uint32 senderData;
 };
 
-class StopMeConf {
+struct StopMeConf
+{
 
   /**
    * Sender(s) / Reciver(s)
@@ -60,10 +61,8 @@ class StopMeConf {
    */
   friend class Ndbcntr;
 
-public:
   STATIC_CONST( SignalLength = 2 );
   
-private:
   Uint32 senderRef;
   Uint32 senderData;
 };

=== modified file 'storage/ndb/include/kernel/signaldata/SumaImpl.hpp'
--- a/storage/ndb/include/kernel/signaldata/SumaImpl.hpp	2010-05-27 08:51:31 +0000
+++ b/storage/ndb/include/kernel/signaldata/SumaImpl.hpp	2010-08-24 14:04:02 +0000
@@ -572,18 +572,28 @@ struct SumaStartMeConf {
   Uint32 unused;
 };
 
-struct SumaHandoverReq {
-  STATIC_CONST( SignalLength = 3 );
+struct SumaHandoverReq
+{
+  STATIC_CONST( SignalLength = 4 );
   Uint32 gci;
   Uint32 nodeId;
   Uint32 theBucketMask[1];
+  Uint32 requestType;
+
+  enum RequestType
+  {
+    RT_START_NODE = 0,
+    RT_STOP_NODE = 1
+  };
 };
 
-struct SumaHandoverConf {
-  STATIC_CONST( SignalLength = 3 );
+struct SumaHandoverConf
+{
+  STATIC_CONST( SignalLength = 4 );
   Uint32 gci;
   Uint32 nodeId;
   Uint32 theBucketMask[1];
+  Uint32 requestType;
 };
 
 struct SumaContinueB

=== modified file 'storage/ndb/include/mgmapi/ndbd_exit_codes.h'
--- a/storage/ndb/include/mgmapi/ndbd_exit_codes.h	2009-10-26 14:21:02 +0000
+++ b/storage/ndb/include/mgmapi/ndbd_exit_codes.h	2010-08-24 14:04:02 +0000
@@ -123,6 +123,8 @@ typedef ndbd_exit_classification_enum nd
 #define NDBD_EXIT_NO_RESTORABLE_REPLICA       6303
 #define NDBD_EXIT_UNSUPPORTED_VERSION         6304
 
+#define NDBD_EXIT_GRACEFUL_SHUTDOWN_ERROR     6305
+
 /* ACC 6600-> */
 #define NDBD_EXIT_SR_OUT_OF_INDEXMEMORY       6600
 /* TUP 6800-> */

=== modified file 'storage/ndb/include/ndb_version.h.in'
--- a/storage/ndb/include/ndb_version.h.in	2010-06-16 07:16:01 +0000
+++ b/storage/ndb/include/ndb_version.h.in	2010-08-24 14:04:02 +0000
@@ -336,4 +336,29 @@ ndb_check_hb_order_version(Uint32 x)
   }
 }
 
+#define NDBD_SUMA_STOP_ME_63 NDB_MAKE_VERSION(6,3,37)
+#define NDBD_SUMA_STOP_ME_70 NDB_MAKE_VERSION(7,0,18)
+#define NDBD_SUMA_STOP_ME_71 NDB_MAKE_VERSION(7,1,7)
+
+static
+inline
+int
+ndbd_suma_stop_me(Uint32 x)
+{
+  {
+    const Uint32 major = (x >> 16) & 0xFF;
+    const Uint32 minor = (x >>  8) & 0xFF;
+
+    if (major == 6)
+    {
+      return x >= NDBD_SUMA_STOP_ME_63;
+    }
+    if (major == 7 && minor == 0)
+    {
+      return x >= NDBD_SUMA_STOP_ME_70;
+    }
+    return x >= NDBD_SUMA_STOP_ME_71;
+  }
+}
+
 #endif

=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2010-08-17 10:07:41 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2010-08-24 14:04:02 +0000
@@ -11,7 +11,7 @@ Next CMVMI 9000
 Next BACKUP 10041
 Next DBUTIL 11002
 Next DBTUX 12008
-Next SUMA 13043
+Next SUMA 13044
 Next LGMAN 15001
 Next TSMAN 16001
 

=== modified file 'storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2010-01-25 15:36:04 +0000
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2010-08-24 14:04:02 +0000
@@ -3056,11 +3056,17 @@ Ndbcntr::StopRecord::checkLqhTimeout_1(S
   cntr.sendSignalWithDelay(cntr.reference(), GSN_CONTINUEB, signal, 100, 1);
 }
 
-void Ndbcntr::execCHANGE_NODE_STATE_CONF(Signal* signal){
+void
+Ndbcntr::execCHANGE_NODE_STATE_CONF(Signal* signal)
+{
   jamEntry();
+
+  /**
+   * stop replication stream
+   */
   signal->theData[0] = reference();
   signal->theData[1] = 12;
-  sendSignal(DBDIH_REF, GSN_STOP_ME_REQ, signal, 2, JBB);
+  sendSignal(SUMA_REF, GSN_STOP_ME_REQ, signal, 2, JBB);
 }
 
 void Ndbcntr::execSTOP_ME_REF(Signal* signal){
@@ -3072,6 +3078,18 @@ void Ndbcntr::execSTOP_ME_REF(Signal* si
 void Ndbcntr::execSTOP_ME_CONF(Signal* signal){
   jamEntry();
 
+  const StopMeConf * conf = CAST_CONSTPTR(StopMeConf, signal->getDataPtr());
+  if (conf->senderData == 12)
+  {
+    /**
+     * Remove node from transactions
+     */
+    signal->theData[0] = reference();
+    signal->theData[1] = 13;
+    sendSignal(DBDIH_REF, GSN_STOP_ME_REQ, signal, 2, JBB);
+    return;
+  }
+
   NodeState newState(NodeState::SL_STOPPING_4, 
 		     StopReq::getSystemStop(c_stopRec.stopReq.requestInfo));
   updateNodeState(signal, newState);

=== modified file 'storage/ndb/src/kernel/blocks/suma/Suma.cpp'
--- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2010-06-10 07:00:44 +0000
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2010-08-24 14:04:02 +0000
@@ -47,6 +47,7 @@
 #include <signaldata/AlterTab.hpp>
 #include <signaldata/DihFragCount.hpp>
 #include <signaldata/SystemError.hpp>
+#include <signaldata/StopMe.hpp>
 
 #include <signaldata/DictLock.hpp>
 #include <ndbapi/NdbDictionary.hpp>
@@ -618,12 +619,12 @@ Suma::check_start_handover(Signal* signa
     }
     
     c_startup.m_wait_handover= false;
-    send_handover_req(signal);
+    send_handover_req(signal, SumaHandoverReq::RT_START_NODE);
   }
 }
 
 void
-Suma::send_handover_req(Signal* signal)
+Suma::send_handover_req(Signal* signal, Uint32 type)
 {
   c_startup.m_handover_nodes.assign(c_alive_nodes);
   c_startup.m_handover_nodes.bitAND(c_nodes_in_nodegroup_mask);
@@ -633,12 +634,15 @@ Suma::send_handover_req(Signal* signal)
   SumaHandoverReq* req= (SumaHandoverReq*)signal->getDataPtrSend();
   char buf[255];
   c_startup.m_handover_nodes.getText(buf);
-  infoEvent("Suma: initiate handover with nodes %s GCI: %d",
-	    buf, gci);
+  infoEvent("Suma: initiate handover for %s with nodes %s GCI: %d",
+            (type == SumaHandoverReq::RT_START_NODE ? "startup" : "shutdown"),
+            buf,
+            gci);
 
   req->gci = gci;
   req->nodeId = getOwnNodeId();
-  
+  req->requestType = type;
+
   NodeReceiverGroup rg(SUMA, c_startup.m_handover_nodes);
   sendSignal(rg, GSN_SUMA_HANDOVER_REQ, signal, 
 	     SumaHandoverReq::SignalLength, JBB);
@@ -1097,6 +1101,14 @@ Suma::execNODE_FAILREP(Signal* signal){
 	  progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, 
 		    "Nodefailure during SUMA takeover");
 	}
+        else if (state & Bucket::BUCKET_SHUTDOWN)
+        {
+          jam();
+          c_buckets[i].m_state &= ~Uint32(Bucket::BUCKET_SHUTDOWN);
+          m_switchover_buckets.clear(i);
+          ndbrequire(get_responsible_node(i, tmp) == getOwnNodeId());
+          start_resend(signal, i);
+        }
       }
       else if(get_responsible_node(i, tmp) == getOwnNodeId())
       {
@@ -3791,6 +3803,7 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* s
   {
     NdbNodeBitmask takeover_nodes;
     NdbNodeBitmask handover_nodes;
+    NdbNodeBitmask shutdown_nodes;
     Uint32 i = m_switchover_buckets.find(0);
     for(; i != Bucket_mask::NotFound; i = m_switchover_buckets.find(i + 1))
     {
@@ -3840,6 +3853,27 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* s
 	  c_buckets[i].m_state &= ~(Uint32)Bucket::BUCKET_TAKEOVER;
 	  takeover_nodes.set(c_buckets[i].m_switchover_node);
 	}
+        else if (state & Bucket::BUCKET_SHUTDOWN)
+        {
+          Uint32 nodeId = c_buckets[i].m_switchover_node;
+          if (nodeId == getOwnNodeId())
+          {
+            jam();
+            m_active_buckets.clear(i);
+            shutdown_nodes.set(nodeId);
+          }
+          else
+          {
+            jam();
+            NdbNodeBitmask nodegroup = c_nodes_in_nodegroup_mask;
+            nodegroup.clear(nodeId);
+            ndbrequire(get_responsible_node(i) == nodeId &&
+                       get_responsible_node(i, nodegroup) == getOwnNodeId());
+            m_active_buckets.set(i);
+            takeover_nodes.set(nodeId);
+          }
+          c_buckets[i].m_state &= ~(Uint32)Bucket::BUCKET_SHUTDOWN;
+        }
 	else
 	{
 	  /**
@@ -3854,14 +3888,28 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* s
     }
     ndbassert(handover_nodes.count() == 0 || 
 	      m_gcp_complete_rep_count > handover_nodes.count());
-    m_gcp_complete_rep_count -= handover_nodes.count();
     m_gcp_complete_rep_count += takeover_nodes.count();
+    m_gcp_complete_rep_count -= handover_nodes.count();
+    m_gcp_complete_rep_count -= shutdown_nodes.count();
 
-    if(getNodeState().startLevel == NodeState::SL_STARTING && 
-       m_switchover_buckets.isclear() && 
-       c_startup.m_handover_nodes.isclear())
+    if (m_switchover_buckets.isclear() && c_startup.m_handover_nodes.isclear())
     {
-      sendSTTORRY(signal);
+      if(getNodeState().startLevel == NodeState::SL_STARTING)
+      {
+        sendSTTORRY(signal);
+      }
+      else if (getNodeState().startLevel >= NodeState::SL_STOPPING_1)
+      {
+        jam();
+        ndbrequire(c_shutdown.m_wait_handover);
+        StopMeConf * conf = CAST_PTR(StopMeConf, signal->getDataPtrSend());
+        conf->senderData = c_shutdown.m_senderData;
+        conf->senderRef = reference();
+        sendSignal(c_shutdown.m_senderRef, GSN_STOP_ME_CONF, signal,
+                   StopMeConf::SignalLength, JBB);
+        c_shutdown.m_wait_handover = false;
+        infoEvent("Suma: handover complete");
+      }
     }
   }
 
@@ -4763,47 +4811,90 @@ Suma::execSUMA_HANDOVER_REQ(Signal* sign
   jamEntry();
   DBUG_ENTER("Suma::execSUMA_HANDOVER_REQ");
   //  Uint32 sumaRef = signal->getSendersBlockRef();
-  SumaHandoverReq const * req = (SumaHandoverReq *)signal->getDataPtr();
+  const SumaHandoverReq * req = CAST_CONSTPTR(SumaHandoverReq,
+                                              signal->getDataPtr());
 
   Uint32 gci = req->gci;
   Uint32 nodeId = req->nodeId;
   Uint32 new_gci = (m_last_complete_gci >> 32) + MAX_CONCURRENT_GCP + 1;
+  Uint32 requestType = req->requestType;
   
   Uint32 start_gci = (gci > new_gci ? gci : new_gci);
   // mark all active buckets really belonging to restarting SUMA
 
-  c_alive_nodes.set(nodeId);
-  
   Bucket_mask tmp;
-  for( Uint32 i = 0; i < c_no_of_buckets; i++) 
+  if (requestType == SumaHandoverReq::RT_START_NODE)
   {
-    if(get_responsible_node(i) == nodeId)
+    jam();
+    c_alive_nodes.set(nodeId);
+
+    for( Uint32 i = 0; i < c_no_of_buckets; i++)
     {
-      if (m_active_buckets.get(i))
+      if(get_responsible_node(i) == nodeId)
       {
-	// I'm running this bucket but it should really be the restarted node
-	tmp.set(i);
-	m_active_buckets.clear(i);
-	m_switchover_buckets.set(i);
-	c_buckets[i].m_switchover_gci = (Uint64(start_gci) << 32) - 1;
-	c_buckets[i].m_state |= Bucket::BUCKET_HANDOVER;
-	c_buckets[i].m_switchover_node = nodeId;
-	ndbout_c("prepare to handover bucket: %d", i);
+        if (m_active_buckets.get(i))
+        {
+          // I'm running this bucket but it should really be the restarted node
+          tmp.set(i);
+          m_active_buckets.clear(i);
+          m_switchover_buckets.set(i);
+          c_buckets[i].m_switchover_gci = (Uint64(start_gci) << 32) - 1;
+          c_buckets[i].m_state |= Bucket::BUCKET_HANDOVER;
+          c_buckets[i].m_switchover_node = nodeId;
+          ndbout_c("prepare to handover bucket: %d", i);
+        }
+        else if(m_switchover_buckets.get(i))
+        {
+          ndbout_c("dont handover bucket: %d %d", i, nodeId);
+        }
       }
-      else if(m_switchover_buckets.get(i))
+    }
+  }
+  else if (requestType == SumaHandoverReq::RT_STOP_NODE)
+  {
+    jam();
+
+    for( Uint32 i = 0; i < c_no_of_buckets; i++)
+    {
+      NdbNodeBitmask nodegroup = c_nodes_in_nodegroup_mask;
+      nodegroup.clear(nodeId);
+      if(get_responsible_node(i) == nodeId &&
+         get_responsible_node(i, nodegroup) == getOwnNodeId())
       {
-	ndbout_c("dont handover bucket: %d %d", i, nodeId);
+        // I'm will be running this bucket when nodeId shutdown
+        jam();
+        tmp.set(i);
+        m_switchover_buckets.set(i);
+        c_buckets[i].m_switchover_gci = (Uint64(start_gci) << 32) - 1;
+        c_buckets[i].m_state |= Bucket::BUCKET_SHUTDOWN;
+        c_buckets[i].m_switchover_node = nodeId;
+        ndbout_c("prepare to takeover bucket: %d", i);
       }
     }
   }
-  
-  SumaHandoverConf* conf= (SumaHandoverConf*)signal->getDataPtrSend();
-  tmp.copyto(BUCKET_MASK_SIZE, conf->theBucketMask);
-  conf->gci = start_gci;
-  conf->nodeId = getOwnNodeId();
-  sendSignal(calcSumaBlockRef(nodeId), GSN_SUMA_HANDOVER_CONF, signal,
-	     SumaHandoverConf::SignalLength, JBB);
-  
+  else
+  {
+    jam();
+    goto ref;
+  }
+
+  {
+    SumaHandoverConf *conf= CAST_PTR(SumaHandoverConf,signal->getDataPtrSend());
+    tmp.copyto(BUCKET_MASK_SIZE, conf->theBucketMask);
+    conf->gci = start_gci;
+    conf->nodeId = getOwnNodeId();
+    conf->requestType = requestType;
+    sendSignal(calcSumaBlockRef(nodeId), GSN_SUMA_HANDOVER_CONF, signal,
+               SumaHandoverConf::SignalLength, JBB);
+  }
+
+  DBUG_VOID_RETURN;
+
+ref:
+  signal->theData[0] = 111;
+  signal->theData[1] = getOwnNodeId();
+  signal->theData[2] = nodeId;
+  sendSignal(calcSumaBlockRef(nodeId), GSN_SUMA_HANDOVER_REF, signal, 3, JBB);
   DBUG_VOID_RETURN;
 }
 
@@ -4819,34 +4910,101 @@ Suma::execSUMA_HANDOVER_CONF(Signal* sig
   jamEntry();
   DBUG_ENTER("Suma::execSUMA_HANDOVER_CONF");
 
-  SumaHandoverConf const * conf = (SumaHandoverConf *)signal->getDataPtr();
+  const SumaHandoverConf * conf = CAST_CONSTPTR(SumaHandoverConf,
+                                                signal->getDataPtr());
+
+  CRASH_INSERTION(13043);
 
   Uint32 gci = conf->gci;
   Uint32 nodeId = conf->nodeId;
+  Uint32 requestType = conf->requestType;
   Bucket_mask tmp;
   tmp.assign(BUCKET_MASK_SIZE, conf->theBucketMask);
 #ifdef HANDOVER_DEBUG
   ndbout_c("Suma::execSUMA_HANDOVER_CONF, gci = %u", gci);
 #endif
 
-  for( Uint32 i = 0; i < c_no_of_buckets; i++) 
+  if (requestType == SumaHandoverReq::RT_START_NODE)
   {
-    if (tmp.get(i))
+    jam();
+    for (Uint32 i = 0; i < c_no_of_buckets; i++)
     {
-      ndbrequire(get_responsible_node(i) == getOwnNodeId());
-      // We should run this bucket, but _nodeId_ is
-      c_buckets[i].m_switchover_gci = (Uint64(gci) << 32) - 1;
-      c_buckets[i].m_state |= Bucket::BUCKET_STARTING;
+      if (tmp.get(i))
+      {
+        ndbrequire(get_responsible_node(i) == getOwnNodeId());
+        // We should run this bucket, but _nodeId_ is
+        c_buckets[i].m_switchover_gci = (Uint64(gci) << 32) - 1;
+        c_buckets[i].m_state |= Bucket::BUCKET_STARTING;
+      }
     }
+
+    char buf[255];
+    tmp.getText(buf);
+    infoEvent("Suma: handover from node %d gci: %d buckets: %s (%d)",
+              nodeId, gci, buf, c_no_of_buckets);
+    m_switchover_buckets.bitOR(tmp);
+    c_startup.m_handover_nodes.clear(nodeId);
+    DBUG_VOID_RETURN;
   }
+  else if (requestType == SumaHandoverReq::RT_STOP_NODE)
+  {
+    jam();
+    for (Uint32 i = 0; i < c_no_of_buckets; i++)
+    {
+      if (tmp.get(i))
+      {
+        ndbrequire(get_responsible_node(i) == getOwnNodeId());
+        // We should run this bucket, but _nodeId_ is
+        c_buckets[i].m_switchover_node = getOwnNodeId();
+        c_buckets[i].m_switchover_gci = (Uint64(gci) << 32) - 1;
+        c_buckets[i].m_state |= Bucket::BUCKET_SHUTDOWN;
+      }
+    }
   
-  char buf[255];
-  tmp.getText(buf);
-  infoEvent("Suma: handover from node %d gci: %d buckets: %s (%d)",
-	    nodeId, gci, buf, c_no_of_buckets);
-  m_switchover_buckets.bitOR(tmp);
-  c_startup.m_handover_nodes.clear(nodeId);
-  DBUG_VOID_RETURN;
+    char buf[255];
+    tmp.getText(buf);
+    infoEvent("Suma: handover to node %d gci: %d buckets: %s (%d)",
+              nodeId, gci, buf, c_no_of_buckets);
+    m_switchover_buckets.bitOR(tmp);
+    c_startup.m_handover_nodes.clear(nodeId);
+    DBUG_VOID_RETURN;
+  }
+}
+
+void
+Suma::execSTOP_ME_REQ(Signal* signal)
+{
+  jam();
+  StopMeReq req = * CAST_CONSTPTR(StopMeReq, signal->getDataPtr());
+
+  ndbrequire(refToNode(req.senderRef) == getOwnNodeId());
+  ndbrequire(c_shutdown.m_wait_handover == false);
+  c_shutdown.m_wait_handover = true;
+  c_shutdown.m_senderRef = req.senderRef;
+  c_shutdown.m_senderData = req.senderData;
+
+  for (Uint32 i = 0; i != c_nodes_in_nodegroup_mask.NotFound ;
+       i = c_nodes_in_nodegroup_mask.find(i + 1))
+  {
+    /**
+     * Check that all SUMA nodes support graceful shutdown...
+     *   and it's too late to stop it...
+     * Shutdown instead...
+     */
+    if (!ndbd_suma_stop_me(getNodeInfo(i).m_version))
+    {
+      jam();
+      char buf[255];
+      BaseString::snprintf(buf, sizeof(buf),
+			   "Not all versions support graceful shutdown (suma)."
+			   " Shutdown directly instead");
+      progError(__LINE__,
+		NDBD_EXIT_GRACEFUL_SHUTDOWN_ERROR,
+		buf);
+      ndbrequire(false);
+    }
+  }
+  send_handover_req(signal, SumaHandoverReq::RT_STOP_NODE);
 }
 
 #ifdef NOT_USED

=== modified file 'storage/ndb/src/kernel/blocks/suma/Suma.hpp'
--- a/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2010-05-27 08:51:31 +0000
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2010-08-24 14:04:02 +0000
@@ -464,6 +464,8 @@ public:
   void execSUMA_START_ME_REF(Signal* signal);
   void execSUMA_START_ME_CONF(Signal* signal);
 
+  void execSTOP_ME_REQ(Signal*);
+
   void copySubscription(Signal* signal, DLHashTable<Subscription>::Iterator);
   void sendSubCreateReq(Signal* signal, Ptr<Subscription>);
   void copySubscriber(Signal*, Ptr<Subscription>, Ptr<Subscriber>);
@@ -508,6 +510,16 @@ private:
     NdbNodeBitmask m_handover_nodes;
   } c_startup;
 
+  /**
+   * for graceful shutdown
+   */
+  struct Shutdown
+  {
+    bool m_wait_handover;
+    Uint32 m_senderRef;
+    Uint32 m_senderData;
+  } c_shutdown;
+
   struct Restart
   {
     Uint16 m_abort;
@@ -535,7 +547,7 @@ private:
   void send_dict_lock_req(Signal* signal);
   void send_start_me_req(Signal* signal);
   void check_start_handover(Signal* signal);
-  void send_handover_req(Signal* signal);
+  void send_handover_req(Signal* signal, Uint32 type);
 
   Uint32 get_responsible_node(Uint32 B) const;
   Uint32 get_responsible_node(Uint32 B, const NdbNodeBitmask& mask) const;
@@ -558,6 +570,7 @@ private:
       ,BUCKET_HANDOVER = 0x2 // On running node
       ,BUCKET_TAKEOVER = 0x4 // On takeing over node
       ,BUCKET_RESEND   = 0x8 // On takeing over node
+      ,BUCKET_SHUTDOWN = 0x10 // Graceful shutdown
     };
     Uint16 m_state;
     Uint16 m_switchover_node;

=== modified file 'storage/ndb/src/kernel/blocks/suma/SumaInit.cpp'
--- a/storage/ndb/src/kernel/blocks/suma/SumaInit.cpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/kernel/blocks/suma/SumaInit.cpp	2010-08-24 14:04:02 +0000
@@ -62,6 +62,9 @@ Suma::Suma(Block_context& ctx) :
   addRecSignal(GSN_SUB_GCP_COMPLETE_ACK, 
 	       &Suma::execSUB_GCP_COMPLETE_ACK);
   
+  addRecSignal(GSN_STOP_ME_REQ,
+               &Suma::execSTOP_ME_REQ);
+
   /**
    * SUMA participant if
    */
@@ -122,6 +125,7 @@ Suma::Suma(Block_context& ctx) :
   c_current_seq = 0;
   c_restart.m_ref = 0;
   c_startup.m_restart_server_node_id = RNIL; // Server for my NR
+  c_shutdown.m_wait_handover = false;
 
 #ifdef VM_TRACE
   m_gcp_monitor = 0;

=== modified file 'storage/ndb/src/kernel/error/ndbd_exit_codes.c'
--- a/storage/ndb/src/kernel/error/ndbd_exit_codes.c	2009-10-26 14:21:02 +0000
+++ b/storage/ndb/src/kernel/error/ndbd_exit_codes.c	2010-08-24 14:04:02 +0000
@@ -171,7 +171,9 @@ static const ErrStruct errArray[] =
    {NDBD_EXIT_INSUFFICENT_NODES, XRE, "Insufficent nodes for system restart" },
    
    {NDBD_EXIT_UNSUPPORTED_VERSION, XRE, "Unsupported version" },
-   
+   {NDBD_EXIT_GRACEFUL_SHUTDOWN_ERROR, XNE,
+    "Graceful shutdown not 100% possible due to mixed ndbd versions" },
+
    /* Sentinel */
    {0, XUE,
     "No message slogan found (please report a bug if you get this error code)"}

=== modified file 'storage/ndb/src/ndbapi/Ndb.cpp'
--- a/storage/ndb/src/ndbapi/Ndb.cpp	2010-06-10 07:00:44 +0000
+++ b/storage/ndb/src/ndbapi/Ndb.cpp	2010-08-24 14:04:02 +0000
@@ -151,6 +151,14 @@ Ndb::NDB_connect(Uint32 tNode) 
 
   DBUG_ENTER("Ndb::NDB_connect");
 
+  {
+    TransporterFacade *tp = theImpl->m_transporter_facade;
+    if (tp->get_node_stopping(tNode))
+    {
+      DBUG_RETURN(0);
+    }
+  }
+
   NdbTransaction * tConArray = theConnectionArray[tNode];
   if (tConArray != NULL) {
     DBUG_RETURN(2);

=== modified file 'storage/ndb/src/ndbapi/Ndbif.cpp'
--- a/storage/ndb/src/ndbapi/Ndbif.cpp	2010-01-18 16:31:48 +0000
+++ b/storage/ndb/src/ndbapi/Ndbif.cpp	2010-08-24 14:04:02 +0000
@@ -1166,13 +1166,9 @@ Ndb::sendPrepTrans(int forceSend)
     NdbTransaction * a_con = thePreparedTransactionsArray[i];
     thePreparedTransactionsArray[i] = NULL;
     Uint32 node_id = a_con->getConnectedNodeId();
-    if (((tp->getNodeSequence(node_id) == a_con->theNodeSequence) &&
-         tp->get_node_alive(node_id)) ||
-        ((tp->get_node_stopping(node_id) && 
-          ((a_con->theSendStatus == NdbTransaction::sendABORT) ||
-           (a_con->theSendStatus == NdbTransaction::sendABORTfail) ||
-           (a_con->theSendStatus == NdbTransaction::sendCOMMITstate) ||
-           (a_con->theSendStatus == NdbTransaction::sendCompleted))))) {
+    if ((tp->getNodeSequence(node_id) == a_con->theNodeSequence) &&
+        (tp->get_node_alive(node_id) || tp->get_node_stopping(node_id)))
+    {
       /*
       We will send if
       1) Node is alive and sequences are correct OR
@@ -1218,27 +1214,15 @@ Ndb::sendPrepTrans(int forceSend)
 #ifdef VM_TRACE
       a_con->printState();
 #endif
-      if ((tp->getNodeSequence(node_id) == a_con->theNodeSequence) &&
-          tp->get_node_stopping(node_id)) {
-        /*
-        The node we are connected to is currently in an early stopping phase
-        of a graceful stop. We will not send the prepared transactions. We
-        will simply refuse and let the application code handle the abort.
-        */
-        TRACE_DEBUG("Abort a transaction when stopping a node");
-        a_con->setOperationErrorCodeAbort(4023);
-        a_con->theCommitStatus = NdbTransaction::NeedAbort;
-      } else {
-        /*
+      /*
         The node is hard dead and we cannot continue. We will also release
         the connection to the free pool.
-        */
-        TRACE_DEBUG("The node was stone dead, inform about abort");
-        a_con->setOperationErrorCodeAbort(4025);
-        a_con->theReleaseOnClose = true;
-        a_con->theTransactionIsStarted = false;
-        a_con->theCommitStatus = NdbTransaction::Aborted;
-      }//if
+      */
+      TRACE_DEBUG("The node was stone dead, inform about abort");
+      a_con->setOperationErrorCodeAbort(4025);
+      a_con->theReleaseOnClose = true;
+      a_con->theTransactionIsStarted = false;
+      a_con->theCommitStatus = NdbTransaction::Aborted;
     }//if
     a_con->theReturnStatus = NdbTransaction::ReturnFailure;
     a_con->theCompletionStatus = NdbTransaction::CompletedFailure;

=== modified file 'storage/ndb/src/ndbapi/TransporterFacade.hpp'
--- a/storage/ndb/src/ndbapi/TransporterFacade.hpp	2009-12-15 15:37:38 +0000
+++ b/storage/ndb/src/ndbapi/TransporterFacade.hpp	2010-08-24 14:04:02 +0000
@@ -377,8 +377,7 @@ bool
 TransporterFacade::get_node_stopping(NodeId n) const {
   const ClusterMgr::Node & node = theClusterMgr->getNodeInfo(n);
   return (!node.m_state.getSingleUserMode() &&
-          ((node.m_state.startLevel == NodeState::SL_STOPPING_1) ||
-           (node.m_state.startLevel == NodeState::SL_STOPPING_2)));
+          node.m_state.startLevel >= NodeState::SL_STOPPING_1);
 }
 
 inline

=== modified file 'storage/ndb/test/ndbapi/test_event.cpp'
--- a/storage/ndb/test/ndbapi/test_event.cpp	2010-08-20 10:18:47 +0000
+++ b/storage/ndb/test/ndbapi/test_event.cpp	2010-08-24 14:04:02 +0000
@@ -1011,6 +1011,7 @@ int runRestarter(NDBT_Context* ctx, NDBT
   NdbRestarter restarter;
   int i = 0;
   int lastId = 0;
+  bool abort = ctx->getProperty("Graceful", Uint32(0)) == 0;
 
   if (restarter.getNumDbNodes() < 2){
     ctx->stopTest();
@@ -1027,7 +1028,12 @@ int runRestarter(NDBT_Context* ctx, NDBT
     int id = lastId % restarter.getNumDbNodes();
     int nodeId = restarter.getDbNodeId(id);
     ndbout << "Restart node " << nodeId << endl; 
-    if(restarter.restartOneDbNode(nodeId, false, false, true) != 0){
+    if (abort == false && ((i % 3) == 0))
+    {
+      restarter.insertErrorInNode(nodeId, 13043);
+    }
+
+    if(restarter.restartOneDbNode(nodeId, false, false, abort) != 0){
       g_err << "Failed to restartNextDbNode" << endl;
       result = NDBT_FAILED;
       break;
@@ -3345,6 +3351,20 @@ TESTCASE("EventOperationApplier_NR", 
   FINALIZER(runVerify);
   FINALIZER(runDropShadowTable);
 }
+TESTCASE("EventOperationApplier_NS",
+	 "Verify that if we apply the data we get from event "
+	 "operation is the same as the original table"
+	 "NOTE! No errors are allowed!" ){
+  TC_PROPERTY("Graceful", 1);
+  INITIALIZER(runCreateEvent);
+  INITIALIZER(runCreateShadowTable);
+  STEP(runEventApplier);
+  STEP(runEventMixedLoad);
+  STEP(runRestarter);
+  FINALIZER(runDropEvent);
+  FINALIZER(runVerify);
+  FINALIZER(runDropShadowTable);
+}
 TESTCASE("MergeEventOperationApplier", 
 	 "Verify that if we apply the data we get from merged event "
 	 "operation is the same as the original table"

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2010-08-17 10:07:41 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2010-08-24 14:04:02 +0000
@@ -1008,6 +1008,11 @@ cmd: test_event
 args: -n EventOperationApplier_NR -l 2
 
 #
+max-time: 600
+cmd: test_event
+args: -n EventOperationApplier_NS T1
+
+#
 max-time: 3600
 cmd: test_event
 args: -n MergeEventOperationApplier_NR -l 2


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20100824140402-plz748e54w6dpvo4.bundle
Thread
bzr commit into mysql-5.1-telco-6.3 branch (jonas:3261) Bug#55641Jonas Oreland24 Aug