List:Commits« Previous MessageNext Message »
From:jonas Date:October 11 2007 3:24pm
Subject:bk commit into 5.1 tree (jonas:1.2160) BUG#31525
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-10-11 15:24:05+02:00, jonas@stripped +4 -0
  ndb - bug#31525 (recommit to drop6)
    Fix bug regarding node that missed 2 LCP's (that was not included in next LCP after
SR)

  storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-10-11 15:24:04+02:00,
jonas@stripped +4 -1
        add new error codes for bug#31525

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-10-11 15:24:04+02:00,
jonas@stripped +4 -0
    add new error codes for bug#31525
        fix bug, i.e include missing_2 in LCP

  storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-10-11 15:24:04+02:00,
jonas@stripped +77 -0
    add testcase for bug#31525

  storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-10-11 15:24:04+02:00,
jonas@stripped +4 -0
    add testcase for bug#31525

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop6

--- 1.58/storage/ndb/test/run-test/daily-basic-tests.txt	2007-10-11 15:24:09 +02:00
+++ 1.59/storage/ndb/test/run-test/daily-basic-tests.txt	2007-10-11 15:24:09 +02:00
@@ -795,3 +795,7 @@
 cmd: testMgm
 args: -n ApiMgmStructEventTimeout T1
 
+max-time: 600
+cmd: testNodeRestart
+args: -n Bug31525 T1
+

--- 1.26/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-10-11 15:24:09 +02:00
+++ 1.27/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-10-11 15:24:09 +02:00
@@ -5,7 +5,7 @@
 Next DBTUP 4013
 Next DBLQH 5047
 Next DBDICT 6007
-Next DBDIH 7184
+Next DBDIH 7193
 Next DBTC 8040
 Next CMVMI 9000
 Next BACKUP 10022
@@ -155,6 +155,9 @@
 
 7027: Crash in  master when changing state to LCP_TAB_SAVED
 7018: Crash in  master when changing state to LCP_TAB_SAVED
+
+7191: Crash when receiving LCP_COMPLETE_REP
+7192: Crash in setLcpActiveStatusStart - when dead node missed to LCP's
 
 ERROR CODES FOR TESTING NODE FAILURE, FAILURE IN COPY FRAGMENT PROCESS:
 -----------------------------------------------------------------------

--- 1.62/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-10-11 15:24:09 +02:00
+++ 1.63/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-10-11 15:24:09 +02:00
@@ -10626,6 +10626,8 @@
 {
   jamEntry();
 
+  CRASH_INSERTION(7191);
+
 #if 0
   ndbout_c("LCP_COMPLETE_REP"); 
   printLCP_COMPLETE_REP(stdout, 
@@ -13366,6 +13368,7 @@
 	// It must be taken over with the copy fragment process after a system
 	// crash. We indicate this by setting the active status to TAKE_OVER.
 	/*-------------------------------------------------------------------*/
+	c_lcpState.m_participatingLQH.set(nodePtr.i);
         nodePtr.p->activeStatus = Sysfile::NS_TakeOver;
         //break; // Fall through
       case Sysfile::NS_TakeOver:{
@@ -13408,6 +13411,7 @@
         break;
       case Sysfile::NS_ActiveMissed_2:
         jam();
+        CRASH_INSERTION(7192);
         if ((nodePtr.p->nodeStatus == NodeRecord::ALIVE) &&
             (!nodePtr.p->copyCompleted)) {
           jam();

--- 1.41/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-10-11 15:24:09 +02:00
+++ 1.42/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-10-11 15:24:09 +02:00
@@ -1596,6 +1596,80 @@
   return NDBT_OK;
 }
 
+int
+runBug31525(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  Ndb* pNdb = GETNDB(step);
+  NdbRestarter res;
+
+  if (res.getNumDbNodes() < 2)
+  {
+    return NDBT_OK;
+  }
+
+  int nodes[2];
+  nodes[0] = res.getMasterNodeId();
+  nodes[1] = res.getNextMasterNodeId(nodes[0]);
+  
+  while (res.getNodeGroup(nodes[0]) != res.getNodeGroup(nodes[1]))
+  {
+    ndbout_c("Restarting %u as it not in same node group as %u",
+             nodes[1], nodes[0]);
+    if (res.restartOneDbNode(nodes[1], false, true, true))
+      return NDBT_FAILED;
+    
+    if (res.waitNodesNoStart(nodes+1, 1))
+      return NDBT_FAILED;
+    
+    if (res.startNodes(nodes+1, 1))
+      return NDBT_FAILED;
+    
+    if (res.waitClusterStarted())
+      return NDBT_FAILED;
+
+    nodes[1] = res.getNextMasterNodeId(nodes[0]);
+  }
+  
+  ndbout_c("nodes[0]: %u nodes[1]: %u", nodes[0], nodes[1]);
+  
+  int val = DumpStateOrd::DihMinTimeBetweenLCP;
+  if (res.dumpStateAllNodes(&val, 1))
+    return NDBT_FAILED;
+
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };  
+  if (res.dumpStateAllNodes(val2, 2))
+    return NDBT_FAILED;
+  
+  if (res.insertErrorInAllNodes(932))
+    return NDBT_FAILED;
+
+  if (res.insertErrorInNode(nodes[1], 7192))
+    return NDBT_FAILED;
+  
+  if (res.insertErrorInNode(nodes[0], 7191))
+    return NDBT_FAILED;
+  
+  if (res.waitClusterNoStart())
+    return NDBT_FAILED;
+
+  if (res.startAll())
+    return NDBT_FAILED;
+  
+  if (res.waitClusterStarted())
+    return NDBT_FAILED;
+
+  if (res.restartOneDbNode(nodes[1], false, false, true))
+    return NDBT_FAILED;
+
+  if (res.waitClusterStarted())
+    return NDBT_FAILED;
+  
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -1907,6 +1981,9 @@
   INITIALIZER(runLoadTable);
   STEP(runBug20185);
   FINALIZER(runClearTable);
+}
+TESTCASE("Bug31525", ""){
+  INITIALIZER(runBug31525);
 }
 TESTCASE("Bug24717", ""){
   INITIALIZER(runBug24717);
Thread
bk commit into 5.1 tree (jonas:1.2160) BUG#31525jonas11 Oct