List:Commits« Previous MessageNext Message »
From:tomas Date:May 28 2007 1:59pm
Subject:bk commit into 5.0 tree (tomas:1.2404) BUG#28717
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-05-28 15:59:02+02:00, tomas@stripped +5 -0
  bug#28717, make sure only master updates activeStatus
    so that othernodes dont get confused after having recevied status from master
    and then tries to update it self

  ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-05-28 15:59:00+02:00, tomas@stripped +6 -1
    error 1001, delay node_failrep

  ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-05-28 15:59:00+02:00, tomas@stripped +7 -1
    bug#28717, make sure only master updates activeStatus
      so that othernodes dont get confused after having recevied status from master
      and then tries to update it self

  ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp@stripped, 2007-05-28 15:59:00+02:00, tomas@stripped +7 -0
    error 1001, delay node_failrep

  ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-05-28 15:59:01+02:00, tomas@stripped +81 -0
    testcase

  ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-05-28 15:59:01+02:00, tomas@stripped +4 -0
    testcase

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	whalegate.ndb.mysql.com
# Root:	/home/tomas/mysql-5.0-telco-gca

--- 1.47/ndb/test/run-test/daily-basic-tests.txt	2007-04-25 09:23:26 +02:00
+++ 1.48/ndb/test/run-test/daily-basic-tests.txt	2007-05-28 15:59:01 +02:00
@@ -492,6 +492,10 @@
 cmd: testDict
 args: -n CreateAndDrop 
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug28717 T1
+
 max-time: 1500
 cmd: testDict
 args: -n CreateAndDropAtRandom -l 200 T1

--- 1.25/ndb/src/kernel/blocks/ERROR_codes.txt	2007-01-03 06:17:32 +01:00
+++ 1.26/ndb/src/kernel/blocks/ERROR_codes.txt	2007-05-28 15:59:00 +02:00
@@ -1,5 +1,5 @@
 Next QMGR 1
-Next NDBCNTR 1000
+Next NDBCNTR 1002
 Next NDBFS 2000
 Next DBACC 3002
 Next DBTUP 4014
@@ -487,3 +487,8 @@
 6003 Crash in participant @ CreateTabReq::Prepare
 6004 Crash in participant @ CreateTabReq::Commit
 6005 Crash in participant @ CreateTabReq::CreateDrop
+
+Ndbcntr:
+--------
+
+1001: Delay sending NODE_FAILREP (to own node), until error is cleared

--- 1.67/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-07 02:11:07 +01:00
+++ 1.68/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-05-28 15:59:00 +02:00
@@ -4448,12 +4448,18 @@
   jam();
   const Uint32 nodeId = failedNodePtr.i;
 
-  if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){
+  if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
+  {
     /*----------------------------------------------------*/
     /*  THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE   */
     /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT     */
     /* THE NODE HAVE MISSED A LOCAL CHECKPOINT.           */
     /*----------------------------------------------------*/
+
+    /**
+     * Bug#28717, Only master should do this, as this status is copied
+     *   to other nodes
+     */
     switch (failedNodePtr.p->activeStatus) {
     case Sysfile::NS_Active:
       jam();

--- 1.35/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2006-12-23 20:04:16 +01:00
+++ 1.36/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-05-28 15:59:00 +02:00
@@ -1375,6 +1375,13 @@
 {
   jamEntry();
 
+  if (ERROR_INSERTED(1001))
+  {
+    sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100, 
+                        signal->getLength());
+    return;
+  }
+  
   const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0];
   NdbNodeBitmask allFailed; 
   allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);

--- 1.26/ndb/test/ndbapi/testNodeRestart.cpp	2007-01-11 21:12:09 +01:00
+++ 1.27/ndb/test/ndbapi/testNodeRestart.cpp	2007-05-28 15:59:01 +02:00
@@ -1045,6 +1045,84 @@
 }
 
 
+int
+runBug28717(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  Ndb* pNdb = GETNDB(step);
+  NdbRestarter res;
+
+  if (res.getNumDbNodes() < 4)
+  {
+    return NDBT_OK;
+  }
+
+  int master = res.getMasterNodeId();
+  int node0 = res.getRandomNodeOtherNodeGroup(master, rand());
+  int node1 = res.getRandomNodeSameNodeGroup(node0, rand());
+  
+  ndbout_c("master: %d node0: %d node1: %d", master, node0, node1);
+  
+  if (res.restartOneDbNode(node0, false, true, true))
+  {
+    return NDBT_FAILED;
+  }
+
+  {
+    int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
+    NdbLogEventHandle handle = 
+      ndb_mgm_create_logevent_handle(res.handle, filter);
+    
+
+    int dump[] = { DumpStateOrd::DihStartLcpImmediately };
+    struct ndb_logevent event;
+    
+    for (Uint32 i = 0; i<3; i++)
+    {
+      res.dumpStateOneNode(master, dump, 1);
+      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+            event.type != NDB_LE_LocalCheckpointStarted);
+      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+            event.type != NDB_LE_LocalCheckpointCompleted);
+    } 
+  }
+  
+  if (res.waitNodesNoStart(&node0, 1))
+    return NDBT_FAILED;
+  
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+  
+  if (res.dumpStateOneNode(node0, val2, 2))
+    return NDBT_FAILED;
+  
+  if (res.insertErrorInNode(node0, 5010))
+    return NDBT_FAILED;
+  
+  if (res.insertErrorInNode(node1, 1001))
+    return NDBT_FAILED;
+  
+  if (res.startNodes(&node0, 1))
+    return NDBT_FAILED;
+  
+  NdbSleep_SecSleep(3);
+
+  if (res.insertErrorInNode(node1, 0))
+    return NDBT_FAILED;
+
+  if (res.waitNodesNoStart(&node0, 1))
+    return NDBT_FAILED;
+
+  if (res.startNodes(&node0, 1))
+    return NDBT_FAILED;
+
+  if (res.waitClusterStarted())
+    return NDBT_FAILED;
+  
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -1365,6 +1443,9 @@
 }
 TESTCASE("Bug25554", ""){
   INITIALIZER(runBug25554);
+}
+TESTCASE("Bug28717", ""){
+  INITIALIZER(runBug28717);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
 
Thread
bk commit into 5.0 tree (tomas:1.2404) BUG#28717tomas28 May