List:Commits« Previous MessageNext Message »
From:jonas Date:March 5 2007 4:03pm
Subject:bk commit into 5.1 tree (jonas:1.2105) BUG#26481
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-05 16:03:27+01:00, jonas@stripped +6 -0
  ndb - wl2325-5.0
    Bug#26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

  storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp@stripped, 2007-03-05 16:03:25+01:00,
jonas@stripped +1 -0
    Bug #26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

  storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp@stripped, 2007-03-05 16:03:25+01:00,
jonas@stripped +1 -0
    Bug #26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-03-05 16:03:25+01:00,
jonas@stripped +22 -0
    Bug #26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

  storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2007-03-05 16:03:25+01:00,
jonas@stripped +7 -4
    Bug #26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

  storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-03-05 16:03:25+01:00,
jonas@stripped +43 -0
    Bug #26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

  storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-03-05 16:03:25+01:00,
jonas@stripped +4 -0
    Bug #26481 Node failure duing initial node restart can make subsequent node restart
fail with REDO corruption

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop5

--- 1.43/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-05 16:03:30 +01:00
+++ 1.44/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-05 16:03:30 +01:00
@@ -469,6 +469,10 @@
 cmd: testNodeRestart
 args: -n Bug26457 T1
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug26481 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

--- 1.15/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2007-03-05 16:03:30 +01:00
+++ 1.16/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2007-03-05 16:03:30 +01:00
@@ -1368,6 +1368,7 @@
   Uint32 csystemnodes;
   Uint32 currentgcp;
   Uint32 c_newest_restorable_gci;
+  Uint32 c_set_initial_start_flag;
 
   enum GcpMasterTakeOverState {
     GMTOS_IDLE = 0,

--- 1.12/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2007-03-05 16:03:30 +01:00
+++ 1.13/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2007-03-05 16:03:30 +01:00
@@ -72,6 +72,7 @@
   c_blockCommit    = false;
   c_blockCommitNo  = 1;
   cntrlblockref    = RNIL;
+  c_set_initial_start_flag = FALSE;
 }//Dbdih::initData()
 
 void Dbdih::initRecords() 

--- 1.57/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-05 16:03:30 +01:00
+++ 1.58/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-05 16:03:30 +01:00
@@ -666,6 +666,12 @@
   {
     jam();
     memcpy(sysfileData, cdata, sizeof(sysfileData));
+
+    if (c_set_initial_start_flag)
+    {
+      jam();
+      Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
+    }
   }
 
   c_copyGCISlave.m_copyReason = reason;
@@ -1306,6 +1312,11 @@
     // The permission is given by the master node in the alive set.  
     /*-----------------------------------------------------------------------*/
     createMutexes(signal, 0);
+    if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
+    {
+      jam();
+      c_set_initial_start_flag = TRUE; // In sysfile...
+    }
     break;
     
   case ZNDB_SPH3:
@@ -10431,6 +10442,17 @@
   
   sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal, 
 	     LcpCompleteRep::SignalLength, JBB);
+
+  /**
+   * Say that an initial node restart does not need to be redone
+   *   once node has been part of first LCP
+   */
+  if (c_set_initial_start_flag &&
+      c_lcpState.m_participatingLQH.get(getOwnNodeId()))
+  {
+    jam();
+    c_set_initial_start_flag = FALSE;
+  }
 }
 
 /*-------------------------------------------------------------------------- */

--- 1.90/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-05 16:03:30 +01:00
+++ 1.91/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-05 16:03:30 +01:00
@@ -11605,7 +11605,8 @@
     return;
   }
 
-  if(getNodeState().getNodeRestartInProgress()){
+  if(getNodeState().getNodeRestartInProgress() && cstartRecReq < 2)
+  {
     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
     saveRef->dihPtr = dihPtr;
     saveRef->nodeId = getOwnNodeId();
@@ -13749,7 +13750,7 @@
        *  NO MORE FRAGMENTS ARE WAITING FOR SYSTEM RESTART.
        * -------------------------------------------------------------------- */
       lcpPtr.p->lcpState = LcpRecord::LCP_IDLE;
-      if (cstartRecReq == ZTRUE) {
+      if (cstartRecReq == 1) {
         jam();
 	/* ----------------------------------------------------------------
          *  WE HAVE ALSO RECEIVED AN INDICATION THAT NO MORE FRAGMENTS 
@@ -13819,7 +13820,7 @@
   ndbrequire(req->receivingNodeId == cownNodeid);
 
   cnewestCompletedGci = cnewestGci;
-  cstartRecReq = ZTRUE;
+  cstartRecReq = 1;
   for (logPartPtr.i = 0; logPartPtr.i < 4; logPartPtr.i++) {
     ptrAss(logPartPtr, logPartRecord);
     logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci;
@@ -13840,6 +13841,7 @@
   }//if
   if(cstartType == NodeState::ST_INITIAL_NODE_RESTART){
     jam();
+    cstartRecReq = 2;
     StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
     conf->startingNodeId = getOwnNodeId();
     sendSignal(cmasterDihBlockref, GSN_START_RECCONF, signal, 
@@ -15649,6 +15651,7 @@
   } else if ((cstartType == NodeState::ST_NODE_RESTART) ||
              (cstartType == NodeState::ST_SYSTEM_RESTART)) {
     jam();
+    cstartRecReq = 2;
     StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
     conf->startingNodeId = getOwnNodeId();
     sendSignal(cmasterDihBlockref, GSN_START_RECCONF, signal, 
@@ -16611,7 +16614,7 @@
     cCommitBlocked = false;
     ccurrentGcprec = RNIL;
     caddNodeState = ZFALSE;
-    cstartRecReq = ZFALSE;
+    cstartRecReq = 0;
     cnewestGci = (UintR)-1;
     cnewestCompletedGci = (UintR)-1;
     crestartOldestGci = 0;

--- 1.30/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-05 16:03:30 +01:00
+++ 1.31/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-05 16:03:30 +01:00
@@ -1169,6 +1169,46 @@
   return NDBT_OK;
 }
 
+int 
+runBug26481(NDBT_Context* ctx, NDBT_Step* step)
+{
+  
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  NdbRestarter res;
+  
+  int node = res.getRandomNotMasterNodeId(rand());
+  ndbout_c("node: %d", node);
+  if (res.restartOneDbNode(node, true, true, true))
+    return NDBT_FAILED;
+
+  if (res.waitNodesNoStart(&node, 1))
+    return NDBT_FAILED;
+
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+  if (res.dumpStateOneNode(node, val2, 2))
+    return NDBT_FAILED;
+
+  if (res.insertErrorInNode(node, 7018))
+    return NDBT_FAILED;
+
+  if (res.startNodes(&node, 1))
+    return NDBT_FAILED;
+
+  res.waitNodesStartPhase(&node, 1, 3);
+  
+  if (res.waitNodesNoStart(&node, 1))
+    return NDBT_FAILED;
+
+  res.startNodes(&node, 1);
+  
+  if (res.waitClusterStarted())
+    return NDBT_FAILED;
+  
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -1499,6 +1539,9 @@
 }
 TESTCASE("Bug26457", ""){
   INITIALIZER(runBug26457);
+}
+TESTCASE("Bug26481", ""){
+  INITIALIZER(runBug26481);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
 
Thread
bk commit into 5.1 tree (jonas:1.2105) BUG#26481jonas5 Mar