List:Internals« Previous MessageNext Message »
From:jonas Date:December 9 2005 12:51pm
Subject:bk commit into 4.1 tree (jonas:1.2458) BUG#15632
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2458 05/12/09 13:51:12 jonas@stripped +4 -0
  bug#15632 - ndb
    Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a) by also waiting for starting nodes

  ndb/test/ndbapi/testNodeRestart.cpp
    1.12 05/12/09 13:51:10 jonas@stripped +60 -0
    Add testcase for bug#15632

  ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.27 05/12/09 13:51:10 jonas@stripped +35 -3
    Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a)
      by also waiting for starting nodes 

  ndb/src/kernel/blocks/ERROR_codes.txt
    1.11 05/12/09 13:51:10 jonas@stripped +2 -0
    New error code for delaying INCL_NODE_REQ 

  ndb/include/ndb_version.h.in
    1.3 05/12/09 13:51:10 jonas@stripped +3 -0
    Handle upgrade of bug fix

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/mysql-4.1

--- 1.2/ndb/include/ndb_version.h.in	2005-07-14 18:02:04 +02:00
+++ 1.3/ndb/include/ndb_version.h.in	2005-12-09 13:51:10 +01:00
@@ -57,5 +57,8 @@
  */
 /*#define NDB_VERSION_ID 0*/
 
+#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
+#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
+
 #endif
  

--- 1.10/ndb/src/kernel/blocks/ERROR_codes.txt	2005-12-08 15:28:13 +01:00
+++ 1.11/ndb/src/kernel/blocks/ERROR_codes.txt	2005-12-09 13:51:10 +01:00
@@ -61,6 +61,8 @@
 5007:
 Delay GCP_SAVEREQ by 10 secs
 
+7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
+
 ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------
 

--- 1.26/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2005-12-08 15:28:13 +01:00
+++ 1.27/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2005-12-09 13:51:10 +01:00
@@ -215,7 +215,7 @@
   signal->theData[2] = c_nodeStartMaster.failNr;
   signal->theData[3] = 0;
   signal->theData[4] = currentgcp;  
-  sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
+  sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
 }//Dbdih::sendINCL_NODEREQ()
 
 void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
@@ -1857,6 +1857,14 @@
   // global checkpoint id and the correct state. We do not wait for any reply
   // since the starting node will not send any.
   /*-------------------------------------------------------------------------*/
+  Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
+  
+  if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+      (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
+  {
+    c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
+  }
+  
   sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
 }//Dbdih::gcpBlockedLab()
 
@@ -2059,6 +2067,13 @@
   jamEntry();
   Uint32 retRef = signal->theData[0];
   Uint32 nodeId = signal->theData[1];
+  if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
+  {
+    CLEAR_ERROR_INSERT_VALUE;
+    sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
+    return;
+  }
+  
   Uint32 tnodeStartFailNr = signal->theData[2];
   currentgcp = signal->theData[4];
   CRASH_INSERTION(7127);
@@ -2086,6 +2101,15 @@
     // id's and the lcp status.
     /*-----------------------------------------------------------------------*/
     CRASH_INSERTION(7171);
+    Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
+    
+    if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+	(NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
+    {
+      signal->theData[0] = getOwnNodeId();
+      signal->theData[1] = getOwnNodeId();
+      sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
+    }
     return;
   }//if
   if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
@@ -3737,8 +3761,16 @@
   /*------------------------------------------------------------------------*/
   // Verify that a starting node has also crashed. Reset the node start record.
   /*-------------------------------------------------------------------------*/
-  if (c_nodeStartMaster.startNode != RNIL) {
-    ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
+  if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
+  {
+    BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
+    SystemError * const sysErr = (SystemError*)&signal->theData[0];
+    sysErr->errorCode = SystemError::StartInProgressError;
+    sysErr->errorRef = reference();
+    sysErr->data1= 0;
+    sysErr->data2= __LINE__;
+    sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,  SystemError::SignalLength, JBA);
+    nodeResetStart();  
   }//if
 
   /*--------------------------------------------------*/

--- 1.11/ndb/test/ndbapi/testNodeRestart.cpp	2005-12-08 15:28:13 +01:00
+++ 1.12/ndb/test/ndbapi/testNodeRestart.cpp	2005-12-09 13:51:10 +01:00
@@ -446,6 +446,56 @@
   return NDBT_OK;
 }
 
+int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  NdbRestarter restarter;
+  
+  int nodeId = restarter.getDbNodeId(1);
+
+  ndbout << "Restart node " << nodeId << endl; 
+  
+  if (restarter.restartOneDbNode(nodeId,
+				 /** initial */ false, 
+				 /** nostart */ true,
+				 /** abort   */ true))
+    return NDBT_FAILED;
+  
+  if (restarter.waitNodesNoStart(&nodeId, 1))
+    return NDBT_FAILED; 
+   
+  if (restarter.insertErrorInNode(nodeId, 7165))
+    return NDBT_FAILED;
+  
+  if (restarter.startNodes(&nodeId, 1))
+    return NDBT_FAILED;
+
+  if (restarter.waitNodesStarted(&nodeId, 1))
+    return NDBT_FAILED;
+
+  if (restarter.restartOneDbNode(nodeId,
+				 /** initial */ false, 
+				 /** nostart */ true,
+				 /** abort   */ true))
+    return NDBT_FAILED;
+  
+  if (restarter.waitNodesNoStart(&nodeId, 1))
+    return NDBT_FAILED; 
+   
+  if (restarter.insertErrorInNode(nodeId, 7171))
+    return NDBT_FAILED;
+  
+  if (restarter.startNodes(&nodeId, 1))
+    return NDBT_FAILED;
+  
+  if (restarter.waitNodesStarted(&nodeId, 1))
+    return NDBT_FAILED;
+  
+  ctx->stopTest();
+  return NDBT_OK;
+}
+
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -596,6 +646,8 @@
   INITIALIZER(runCheckAllNodesStarted);
   INITIALIZER(runLoadTable);
   STEP(runRestarts);
+  STEP(runPkUpdateUntilStopped);
+  STEP(runScanUpdateUntilStopped);
   FINALIZER(runScanReadVerify);
   FINALIZER(runClearTable);
 }
@@ -685,6 +737,8 @@
   INITIALIZER(runCheckAllNodesStarted);
   INITIALIZER(runLoadTable);
   STEP(runRestarts);
+  STEP(runPkUpdateUntilStopped);
+  STEP(runScanUpdateUntilStopped);
   FINALIZER(runScanReadVerify);
   FINALIZER(runClearTable);
 }
@@ -714,6 +768,12 @@
   INITIALIZER(runLoadTable);
   STEP(runScanUpdateUntilStopped);
   STEP(runBug15587);
+  FINALIZER(runClearTable);
+}
+TESTCASE("Bug15632",
+	 "Test bug with NF during NR"){
+  INITIALIZER(runLoadTable);
+  STEP(runBug15632);
   FINALIZER(runClearTable);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
Thread
bk commit into 4.1 tree (jonas:1.2458) BUG#15632jonas9 Dec