List:Commits« Previous MessageNext Message »
From:jonas oreland Date:April 15 2011 1:52pm
Subject:bzr commit into mysql-5.1-telco-7.0 branch (jonas:4315) Bug#59213
Bug#11766167
View as plain text  
#At file:///home/jonas/src/telco-7.0/ based on revid:magnus.blaudd@stripped

 4315 jonas oreland	2011-04-15
      ndb - bug#59213 / bug#11766167
        Add better support for nodes configured with Nodegroup=65536
          (i.e added to config to later be added)
      
        Add new config variable StartNoNodegroupTimeout(default=15s) 
          which determines to to wait for node wo/ nodegroup, 
          before treating them as if they had been added to "nowait-nodes"
      
        E.g 1) initial start will wait this amout of time, before doing a start
               Note: in an initial start, the values from config.ini is used to 
                     determine that nodes have no nodegroup
      
            2) system restart will wait this amout of time, before continueing
               with other timeouts (such as StartPartial/StartPartitioned-timeout)
               Note: in an system restart, the values read from disk is used to 
                     determine that nodes have nodegroup (since they can have been
                     added to a nodegroup after initial start)
       

    added:
      storage/ndb/include/kernel/signaldata/DihRestart.hpp
    modified:
      storage/ndb/include/mgmapi/mgmapi_config_parameters.h
      storage/ndb/src/common/debugger/EventLogger.cpp
      storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
      storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
      storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
      storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
      storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
      storage/ndb/src/mgmsrv/ConfigInfo.cpp
=== added file 'storage/ndb/include/kernel/signaldata/DihRestart.hpp'
--- a/storage/ndb/include/kernel/signaldata/DihRestart.hpp	1970-01-01 00:00:00 +0000
+++ b/storage/ndb/include/kernel/signaldata/DihRestart.hpp	2011-04-15 13:52:53 +0000
@@ -0,0 +1,51 @@
+/*
+   Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#ifndef DIH_RESTART_HPP
+#define DIH_RESTART_HPP
+
+#include "SignalData.hpp"
+
+struct DihRestartReq
+{
+  STATIC_CONST( SignalLength = 1 );
+  Uint32 senderRef;
+
+  /**
+   * Qmgr checks if it can continue...using EXECUTE_DIRECT
+   *   and fields below, setting senderRef == 0
+   */
+  STATIC_CONST( CheckLength = 1 + NdbNodeBitmask::Size + MAX_NDB_NODES);
+  Uint32 nodemask[NdbNodeBitmask::Size];
+  Uint32 node_gcis[MAX_NDB_NODES];
+};
+
+struct DihRestartRef
+{
+  STATIC_CONST( SignalLength = NdbNodeBitmask::Size );
+  Uint32 no_nodegroup_mask[NdbNodeBitmask::Size];
+};
+
+struct DihRestartConf
+{
+  STATIC_CONST( SignalLength = 2 + NdbNodeBitmask::Size );
+  Uint32 unused;
+  Uint32 latest_gci;
+  Uint32 no_nodegroup_mask[NdbNodeBitmask::Size];
+};
+
+#endif

=== modified file 'storage/ndb/include/mgmapi/mgmapi_config_parameters.h'
--- a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2011-04-09 15:48:21 +0000
+++ b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2011-04-15 13:52:53 +0000
@@ -184,6 +184,8 @@
 
 #define CFG_DB_CONNECT_CHECK_DELAY    618
 
+#define CFG_DB_START_NO_NODEGROUP_TIMEOUT 619
+
 #define CFG_NODE_ARBIT_RANK           200
 #define CFG_NODE_ARBIT_DELAY          201
 #define CFG_RESERVED_SEND_BUFFER_MEMORY 202

=== modified file 'storage/ndb/src/common/debugger/EventLogger.cpp'
--- a/storage/ndb/src/common/debugger/EventLogger.cpp	2011-04-09 15:48:21 +0000
+++ b/storage/ndb/src/common/debugger/EventLogger.cpp	2011-04-15 13:52:53 +0000
@@ -964,7 +964,13 @@ void getTextStartReport(QQQQ) {
     bstr0 = BaseString::getPrettyText(sz, theData + 4 + (0 * sz)), 
     bstr1 = BaseString::getPrettyText(sz, theData + 4 + (1 * sz)), 
     bstr2 = BaseString::getPrettyText(sz, theData + 4 + (2 * sz)), 
-    bstr3 = BaseString::getPrettyText(sz, theData + 4 + (3 * sz));
+    bstr3 = BaseString::getPrettyText(sz, theData + 4 + (3 * sz)),
+    bstr4 = BaseString::getPrettyText(sz, theData + 4 + (4 * sz));
+
+  if (len < 4 + 5 * sz)
+  {
+    bstr4.assign("<unknown>");
+  }
 
   switch(theData[1]){
   case 1: // Wait initial
@@ -1002,6 +1008,24 @@ void getTextStartReport(QQQQ) {
        "nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
        time, bstr0.c_str(), bstr1.c_str(), bstr3.c_str(), bstr2.c_str());
     break;
+  case 6:
+    BaseString::snprintf
+      (m_text, m_text_len,
+       "Initial start, waiting %u for %s to connect, "
+       "nodes [ all: %s connected: %s missing: %s no-wait: %s no-nodegroup: %s ]",
+       time, bstr4.c_str(),
+       bstr0.c_str(), bstr1.c_str(), bstr3.c_str(), bstr2.c_str(),
+       bstr4.c_str());
+    break;
+  case 7: // Wait no-nodes/partial timeout
+    BaseString::snprintf
+      (m_text, m_text_len,
+       "Waiting %u sec for nodes %s to connect, "
+       "nodes [ all: %s connected: %s no-wait: %s no-nodegroup: %s ]",
+       time, bstr3.c_str(), bstr0.c_str(), bstr1.c_str(), bstr2.c_str(),
+       bstr4.c_str());
+    break;
+
   case 0x8000: // Do initial
     BaseString::snprintf
       (m_text, m_text_len,

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2011-02-15 11:41:27 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2011-04-15 13:52:53 +0000
@@ -951,6 +951,7 @@ private:
   void replication(Uint32 noOfReplicas,
                    NodeGroupRecordPtr NGPtr,
                    FragmentstorePtr regFragptr);
+  void sendDihRestartRef(Signal*);
   void selectMasterCandidateAndSend(Signal *);
   void setLcpActiveStatusEnd(Signal*);
   void setLcpActiveStatusStart(Signal *);

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2011-02-23 19:28:26 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2011-04-15 13:52:53 +0000
@@ -79,6 +79,7 @@
 #include <signaldata/DropNodegroupImpl.hpp>
 #include <signaldata/DihGetTabInfo.hpp>
 #include <SectionReader.hpp>
+#include <signaldata/DihRestart.hpp>
 
 #include <EventLogger.hpp>
 extern EventLogger * g_eventLogger;
@@ -1455,15 +1456,18 @@ void Dbdih::execTAB_COMMITREQ(Signal* si
   3.2.1.1    LOADING   O W N   B L O C K  R E F E R E N C E (ABSOLUTE PHASE 1)
   *****************************************************************************
   */
-void Dbdih::execDIH_RESTARTREQ(Signal* signal) 
+void Dbdih::execDIH_RESTARTREQ(Signal* signal)
 {
   jamEntry();
-  if (signal->theData[0])
+  const DihRestartReq* req = CAST_CONSTPTR(DihRestartReq,
+                                           signal->getDataPtr());
+  if (req->senderRef != 0)
   {
     jam();
-    cntrlblockref = signal->theData[0];
-    if(m_ctx.m_config.getInitialStart()){
-      sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
+    cntrlblockref = req->senderRef;
+    if(m_ctx.m_config.getInitialStart())
+    {
+      sendDihRestartRef(signal);
     } else {
       readGciFileLab(signal);
     }
@@ -1476,8 +1480,8 @@ void Dbdih::execDIH_RESTARTREQ(Signal* s
      */
     Uint32 i;
     NdbNodeBitmask mask;
-    mask.assign(NdbNodeBitmask::Size, signal->theData + 1);
-    Uint32 *node_gcis = signal->theData+1+NdbNodeBitmask::Size;
+    mask.assign(NdbNodeBitmask::Size, req->nodemask);
+    const Uint32 *node_gcis = req->node_gcis;
     Uint32 node_group_gcis[MAX_NDB_NODES+1];
     bzero(node_group_gcis, sizeof(node_group_gcis));
     for (i = 0; i<MAX_NDB_NODES; i++)
@@ -4696,24 +4700,65 @@ void Dbdih::closingGcpLab(Signal* signal
     return;
   } else {
     jam();
-    sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
+    sendDihRestartRef(signal);
     return;
   }//if
 }//Dbdih::closingGcpLab()
 
+void
+Dbdih::sendDihRestartRef(Signal* signal)
+{
+  jam();
+
+  /**
+   * We couldn't read P0.Sysfile...
+   *   so compute no_nodegroup_mask from configuration
+   */
+  NdbNodeBitmask no_nodegroup_mask;
+
+  ndb_mgm_configuration_iterator * iter =
+    m_ctx.m_config.getClusterConfigIterator();
+  for(ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
+  {
+    jam();
+    Uint32 nodeId;
+    Uint32 nodeType;
+
+    ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_NODE_ID, &nodeId));
+    ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION,
+                                          &nodeType));
+
+    if (nodeType == NodeInfo::DB)
+    {
+      jam();
+      Uint32 ng;
+      if (ndb_mgm_get_int_parameter(iter, CFG_DB_NODEGROUP, &ng) == 0)
+      {
+        jam();
+        if (ng == NDB_NO_NODEGROUP)
+        {
+          no_nodegroup_mask.set(nodeId);
+        }
+      }
+    }
+  }
+  DihRestartRef * ref = CAST_PTR(DihRestartRef, signal->getDataPtrSend());
+  no_nodegroup_mask.copyto(NdbNodeBitmask::Size, ref->no_nodegroup_mask);
+  sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal,
+             DihRestartRef::SignalLength, JBB);
+}
+
 /* ------------------------------------------------------------------------- */
 /*       SELECT THE MASTER CANDIDATE TO BE USED IN SYSTEM RESTARTS.          */
 /* ------------------------------------------------------------------------- */
 void Dbdih::selectMasterCandidateAndSend(Signal* signal)
 {
   setNodeGroups();
-  signal->theData[0] = getOwnNodeId();
-  signal->theData[1] = SYSFILE->lastCompletedGCI[getOwnNodeId()];
-  sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal, 2, JBB);
-  
+
   NodeRecordPtr nodePtr;
   Uint32 node_groups[MAX_NDB_NODES];
   memset(node_groups, 0, sizeof(node_groups));
+  NdbNodeBitmask no_nodegroup_mask;
   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
     jam();
     if (Sysfile::getNodeStatus(nodePtr.i, SYSFILE->nodeStatus) == Sysfile::NS_NotDefined)
@@ -4722,12 +4767,24 @@ void Dbdih::selectMasterCandidateAndSend
       continue;
     }
     const Uint32 ng = Sysfile::getNodeGroup(nodePtr.i, SYSFILE->nodeGroups);
-    if(ng != NO_NODE_GROUP_ID){
+    if(ng != NO_NODE_GROUP_ID)
+    {
       ndbrequire(ng < MAX_NDB_NODES);
       node_groups[ng]++;
     }
+    else
+    {
+      no_nodegroup_mask.set(nodePtr.i);
+    }
   }
-  
+
+  DihRestartConf * conf = CAST_PTR(DihRestartConf, signal->getDataPtrSend());
+  conf->unused = getOwnNodeId();
+  conf->latest_gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
+  no_nodegroup_mask.copyto(NdbNodeBitmask::Size, conf->no_nodegroup_mask);
+  sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal,
+             DihRestartConf::SignalLength, JBB);
+
   for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
     jam();
     Uint32 count = node_groups[nodePtr.i];
@@ -4767,7 +4824,7 @@ void Dbdih::openingGcpErrorLab(Signal* s
     /*   CANNOT CONTINUE THE RESTART IN THIS CASE. TELL NDBCNTR OF OUR       */
     /*   FAILURE.                                                            */
     /*---------------------------------------------------------------------- */
-    sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
+    sendDihRestartRef(signal);
     return;
   }//if
 }//Dbdih::openingGcpErrorLab()
@@ -4799,7 +4856,7 @@ void Dbdih::closingGcpCrashLab(Signal* s
   /*     WE DISCOVERED A FAILURE WITH THE SECOND FILE AS WELL. THIS IS A     */
   /*     SERIOUS PROBLEM. REPORT FAILURE TO NDBCNTR.                         */
   /* ----------------------------------------------------------------------- */
-  sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
+  sendDihRestartRef(signal);
 }//Dbdih::closingGcpCrashLab()
 
 /*****************************************************************************/

=== modified file 'storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2011-02-23 19:28:26 +0000
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2011-04-15 13:52:53 +0000
@@ -50,6 +50,7 @@
 #include <AttributeHeader.hpp>
 #include <Configuration.hpp>
 #include <DebuggerNames.hpp>
+#include <signaldata/DihRestart.hpp>
 
 #include <NdbOut.hpp>
 #include <NdbTick.h>
@@ -659,9 +660,11 @@ void Ndbcntr::startPhase2Lab(Signal* sig
 {
   c_start.m_lastGci = 0;
   c_start.m_lastGciNodeId = getOwnNodeId();
-  
-  signal->theData[0] = reference();
-  sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB);
+
+  DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
+  req->senderRef = reference();
+  sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
+             DihRestartReq::SignalLength, JBB);
   return;
 }//Ndbcntr::startPhase2Lab()
 
@@ -671,8 +674,10 @@ void Ndbcntr::startPhase2Lab(Signal* sig
 void Ndbcntr::execDIH_RESTARTCONF(Signal* signal) 
 {
   jamEntry();
-  //cmasterDihId = signal->theData[0];
-  c_start.m_lastGci = signal->theData[1];
+
+  const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
+                                              signal->getDataPtrSend());
+  c_start.m_lastGci = conf->latest_gci;
   ctypeOfStart = NodeState::ST_SYSTEM_RESTART;
   cdihStartType = ctypeOfStart;
   ph2ALab(signal);

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2011-04-09 15:48:21 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2011-04-15 13:52:53 +0000
@@ -127,6 +127,7 @@ public:
     NdbNodeBitmask m_skip_nodes;
     NdbNodeBitmask m_starting_nodes;
     NdbNodeBitmask m_starting_nodes_w_log;
+    NdbNodeBitmask m_no_nodegroup_nodes;
 
     Uint16 m_president_candidate;
     Uint32 m_president_candidate_gci;
@@ -508,6 +509,7 @@ private:
   Uint32 c_restartPartialTimeout;
   Uint32 c_restartPartionedTimeout;
   Uint32 c_restartFailureTimeout;
+  Uint32 c_restartNoNodegroupTimeout;
   Uint64 c_start_election_time;
 
   Uint16 creadyDistCom;

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2011-04-09 15:48:21 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2011-04-15 13:52:53 +0000
@@ -39,7 +39,7 @@
 #include <signaldata/EnableCom.hpp>
 #include <signaldata/RouteOrd.hpp>
 #include <signaldata/NodePing.hpp>
-
+#include <signaldata/DihRestart.hpp>
 #include <ndb_version.h>
 
 //#define DEBUG_QMGR_START
@@ -386,14 +386,15 @@ void Qmgr::startphase1(Signal* signal) 
 {
   jamEntry();
 
-  
   NodeRecPtr nodePtr;
   nodePtr.i = getOwnNodeId();
   ptrAss(nodePtr, nodeRec);
   nodePtr.p->phase = ZSTARTING;
-  
-  signal->theData[0] = reference();
-  sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB);
+
+  DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
+  req->senderRef = reference();
+  sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
+             DihRestartReq::SignalLength, JBB);
   return;
 }
 
@@ -402,7 +403,11 @@ Qmgr::execDIH_RESTARTREF(Signal*signal)
 {
   jamEntry();
 
+  const DihRestartRef * ref = CAST_CONSTPTR(DihRestartRef,
+                                            signal->getDataPtr());
   c_start.m_latest_gci = 0;
+  c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
+                                      ref->no_nodegroup_mask);
   execCM_INFOCONF(signal);
 }
 
@@ -410,8 +415,13 @@ void
 Qmgr::execDIH_RESTARTCONF(Signal*signal)
 {
   jamEntry();
-  
-  c_start.m_latest_gci = signal->theData[1];
+
+  const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
+                                              signal->getDataPtr());
+
+  c_start.m_latest_gci = conf->latest_gci;
+  c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
+                                      conf->no_nodegroup_mask);
   execCM_INFOCONF(signal);
 }
 
@@ -1446,6 +1456,12 @@ Qmgr::check_startup(Signal* signal)
   Uint64 now = NdbTick_CurrentMillisecond();
   Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout;
   Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout;
+  Uint64 no_nodegroup_timeout = c_start_election_time +
+    c_restartNoNodegroupTimeout;
+
+  const bool no_nodegroup_active =
+    (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
+    (! c_start.m_no_nodegroup_nodes.isclear());
 
   /**
    * First see if we should wait more...
@@ -1465,25 +1481,60 @@ Qmgr::check_startup(Signal* signal)
   if ((c_start.m_latest_gci == 0) || 
       (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
   {
-    if (!tmp.equal(c_definedNodes))
+    if (tmp.equal(c_definedNodes))
     {
       jam();
-      signal->theData[1] = 1;
-      signal->theData[2] = ~0;
-      report_mask.assign(wait);
-      retVal = 0;
+      signal->theData[1] = 0x8000;
+      report_mask.assign(c_definedNodes);
+      report_mask.bitANDC(c_start.m_starting_nodes);
+      retVal = 1;
       goto start_report;
     }
+    else if (no_nodegroup_active)
+    {
+      if (now < no_nodegroup_timeout)
+      {
+        signal->theData[1] = 6;
+        signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
+        report_mask.assign(wait);
+        retVal = 0;
+        goto start_report;
+      }
+      tmp.bitOR(c_start.m_no_nodegroup_nodes);
+      if (tmp.equal(c_definedNodes))
+      {
+        signal->theData[1] = 0x8000;
+        report_mask.assign(c_definedNodes);
+        report_mask.bitANDC(c_start.m_starting_nodes);
+        retVal = 1;
+        goto start_report;
+      }
+      else
+      {
+        jam();
+        signal->theData[1] = 1;
+        signal->theData[2] = ~0;
+        report_mask.assign(wait);
+        retVal = 0;
+        goto start_report;
+      }
+    }
     else
     {
       jam();
-      signal->theData[1] = 0x8000;
-      report_mask.assign(c_definedNodes);
-      report_mask.bitANDC(c_start.m_starting_nodes);
-      retVal = 1;
+      signal->theData[1] = 1;
+      signal->theData[2] = ~0;
+      report_mask.assign(wait);
+      retVal = 0;
       goto start_report;
     }
   }
+
+  if (now >= no_nodegroup_timeout)
+  {
+    tmp.bitOR(c_start.m_no_nodegroup_nodes);
+  }
+
   {
     const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
@@ -1559,10 +1610,22 @@ Qmgr::check_startup(Signal* signal)
     if (now < partial_timeout)
     {
       jam();
+
       signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
       signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
       report_mask.assign(wait);
       retVal = 0;
+
+      if (no_nodegroup_active && now < no_nodegroup_timeout)
+      {
+        signal->theData[1] = 7;
+        signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
+      }
+      else if (no_nodegroup_active && now >= no_nodegroup_timeout)
+      {
+        report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
+      }
+
       goto start_report;
     }
   
@@ -1595,14 +1658,14 @@ check_log:
   {
     Uint32 save[4+4*NdbNodeBitmask::Size];
     memcpy(save, signal->theData, sizeof(save));
-    
-    signal->theData[0] = 0;
-    c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
-    memcpy(signal->theData+1+NdbNodeBitmask::Size, c_start.m_node_gci,
-	   4*MAX_NDB_NODES);
-    EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal, 
-		   1+NdbNodeBitmask::Size+MAX_NDB_NODES);
-    
+
+    DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
+    req->senderRef = 0;
+    c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
+    memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
+    EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
+		   DihRestartReq::CheckLength);
+
     incompleteng = signal->theData[0];
     memcpy(signal->theData, save, sizeof(save));
 
@@ -1650,8 +1713,9 @@ start_report:
     c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
     c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
     report_mask.copyto(sz, ptr); ptr+= sz;
-    sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 
-	       4+4*NdbNodeBitmask::Size, JBB);
+    c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
+    sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
+	       4+5*NdbNodeBitmask::Size, JBB);
   }
   return retVal;
   
@@ -2434,6 +2498,7 @@ void Qmgr::initData(Signal* signal) 
   c_restartPartialTimeout = 30000;
   c_restartPartionedTimeout = 60000;
   c_restartFailureTimeout = ~0;
+  c_restartNoNodegroupTimeout = 15000;
   ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
@@ -2441,26 +2506,33 @@ void Qmgr::initData(Signal* signal) 
 			    &c_restartPartialTimeout);
   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
 			    &c_restartPartionedTimeout);
+  ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
+			    &c_restartNoNodegroupTimeout);
   ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
 			    &c_restartFailureTimeout);
   ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
                             &ccInterval);
- 
+
   if(c_restartPartialTimeout == 0)
   {
     c_restartPartialTimeout = ~0;
   }
-  
+
   if (c_restartPartionedTimeout ==0)
   {
     c_restartPartionedTimeout = ~0;
   }
-  
+
   if (c_restartFailureTimeout == 0)
   {
     c_restartFailureTimeout = ~0;
   }
 
+  if (c_restartNoNodegroupTimeout == 0)
+  {
+    c_restartNoNodegroupTimeout = ~0;
+  }
+
   setHbDelay(hbDBDB);
   setCCDelay(ccInterval);
   setArbitTimeout(arbitTimeout);

=== modified file 'storage/ndb/src/mgmsrv/ConfigInfo.cpp'
--- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2011-04-09 15:48:21 +0000
+++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2011-04-15 13:52:53 +0000
@@ -977,7 +977,19 @@ const ConfigInfo::ParamInfo ConfigInfo::
     "0",
     "0",
     STR_VALUE(MAX_INT_RNIL) },
-  
+
+  {
+    CFG_DB_START_NO_NODEGROUP_TIMEOUT,
+    "StartNoNodegroupTimeout",
+    DB_TOKEN,
+    "Time to wait for nodes wo/ nodegroup before trying to start (0=forever)",
+    ConfigInfo::CI_USED,
+    0,
+    ConfigInfo::CI_INT,
+    "15000",
+    "0",
+    STR_VALUE(MAX_INT_RNIL) },
+
   {
     CFG_DB_HEARTBEAT_INTERVAL,
     "HeartbeatIntervalDbDb",


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20110415135253-nbj37ue2o70w4b5f.bundle
Thread
bzr commit into mysql-5.1-telco-7.0 branch (jonas:4315) Bug#59213Bug#11766167jonas oreland15 Apr