List:Internals« Previous MessageNext Message »
From:jonas Date:December 8 2005 2:28pm
Subject:bk commit into 4.1 tree (jonas:1.2456) BUG#15587
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2456 05/12/08 15:28:15 jonas@stripped +7 -0
  bug#15587 - ndb
    Fix error in NF during NR

  ndb/test/run-test/daily-basic-tests.txt
    1.22 05/12/08 15:28:13 jonas@stripped +4 -0
    Add testcase for bug#15587

  ndb/test/ndbapi/testNodeRestart.cpp
    1.11 05/12/08 15:28:13 jonas@stripped +45 -0
    Add testcase for bug#15587

  ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
    1.57 05/12/08 15:28:13 jonas@stripped +7 -2
    Add dump for ERROR 5002 with specified table

  ndb/src/kernel/blocks/dblqh/Dblqh.hpp
    1.30 05/12/08 15:28:13 jonas@stripped +1 -0
    Add dump for ERROR 5002 with specified table

  ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.26 05/12/08 15:28:13 jonas@stripped +12 -2
    Run updateNodeInfo if failed node is not in list of storedReplicas

  ndb/src/kernel/blocks/ERROR_codes.txt
    1.10 05/12/08 15:28:13 jonas@stripped +3 -0
    Add dump for ERROR 5002 with specified table

  ndb/include/kernel/signaldata/DumpStateOrd.hpp
    1.5 05/12/08 15:28:13 jonas@stripped +2 -0
    Add dump for ERROR 5002 with specified table

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/mysql-4.1

--- 1.21/ndb/test/run-test/daily-basic-tests.txt	2005-07-20 12:39:42 +02:00
+++ 1.22/ndb/test/run-test/daily-basic-tests.txt	2005-12-08 15:28:13 +01:00
@@ -434,6 +434,10 @@
 cmd: testScan
 args: -l 100 -n Scan-bug8262 T7
 
+max-time: 500
+cmd: testNodeRestart
+args: -n BugBug15587 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

--- 1.4/ndb/include/kernel/signaldata/DumpStateOrd.hpp	2005-10-21 06:41:46 +02:00
+++ 1.5/ndb/include/kernel/signaldata/DumpStateOrd.hpp	2005-12-08 15:28:13 +01:00
@@ -78,6 +78,8 @@
     LqhDumpAllScanRec = 2301,
     LqhDumpAllActiveScanRec = 2302,
     LqhDumpLcpState = 2303,
+    LqhErrorInsert5042 = 2315,
+
     AccDumpOneScanRec = 2400,
     AccDumpAllScanRec = 2401,
     AccDumpAllActiveScanRec = 2402,

--- 1.9/ndb/src/kernel/blocks/ERROR_codes.txt	2005-09-15 14:47:14 +02:00
+++ 1.10/ndb/src/kernel/blocks/ERROR_codes.txt	2005-12-08 15:28:13 +01:00
@@ -155,6 +155,9 @@
 5006:
 Insert node failure handling when receiving ABORTREQ.
 
+5042:
+As 5002, but with specified table (see DumpStateOrd)
+
 These error code can be combined with error codes for testing time-out
 handling in DBTC to ensure that node failures are also well handled in
 time-out handling. They can also be used to test multiple node failure

--- 1.25/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2005-09-02 11:47:09 +02:00
+++ 1.26/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2005-12-08 15:28:13 +01:00
@@ -5187,15 +5187,16 @@
     /**
      * For each of replica record
      */
-    Uint32 replicaNo = 0;
+    bool found = false;
     ReplicaRecordPtr replicaPtr;
     for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
-        replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) {
+        replicaPtr.i = replicaPtr.p->nextReplica) {
       jam();
 
       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
       if(replicaPtr.p->procNode == nodeId){
         jam();
+	found = true;
 	noOfRemovedReplicas++;
 	removeNodeFromStored(nodeId, fragPtr, replicaPtr);
 	if(replicaPtr.p->lcpOngoingFlag){
@@ -5210,6 +5211,15 @@
 	  replicaPtr.p->lcpOngoingFlag = false;
 	}
       }
+    }
+    if (!found)
+    {
+      jam();
+      /**
+       * Run updateNodeInfo to remove any dead nodes from list of activeNodes
+       *  see bug#15587
+       */
+      updateNodeInfo(fragPtr);
     }
     noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
   }

--- 1.29/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2005-09-08 18:24:34 +02:00
+++ 1.30/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2005-12-08 15:28:13 +01:00
@@ -2881,6 +2881,7 @@
   UintR ctransidHash[1024];
   
   Uint32 c_diskless;
+  Uint32 c_error_insert_table_id;
   
 public:
   /**

--- 1.56/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2005-09-08 18:24:34 +02:00
+++ 1.57/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2005-12-08 15:28:13 +01:00
@@ -3532,6 +3532,7 @@
     jam();
     regTcPtr->activeCreat = ZTRUE;
     CRASH_INSERTION(5002);
+    CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
   } else {
     regTcPtr->activeCreat = ZFALSE;
   }//if
@@ -18402,8 +18403,12 @@
     return;
   }
 
-
-
+  if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
+  {
+    c_error_insert_table_id = dumpState->args[1];
+    SET_ERROR_INSERT_VALUE(5042);
+  }
+  
 }//Dblqh::execDUMP_STATE_ORD()
 
 void Dblqh::execSET_VAR_REQ(Signal* signal) 

--- 1.10/ndb/test/ndbapi/testNodeRestart.cpp	2005-05-05 11:08:57 +02:00
+++ 1.11/ndb/test/ndbapi/testNodeRestart.cpp	2005-12-08 15:28:13 +01:00
@@ -21,6 +21,7 @@
 #include <NdbRestarter.hpp>
 #include <NdbRestarts.hpp>
 #include <Vector.hpp>
+#include <signaldata/DumpStateOrd.hpp>
 
 
 int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@@ -409,6 +410,43 @@
   return NDBT_OK;
 }
 
+int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  NdbRestarter restarter;
+  
+  Uint32 tableId = ctx->getTab()->getTableId();
+  int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 };
+  dump[1] = tableId;
+
+  int nodeId = restarter.getDbNodeId(1);
+
+  ndbout << "Restart node " << nodeId << endl; 
+  
+  if (restarter.restartOneDbNode(nodeId,
+				 /** initial */ false, 
+				 /** nostart */ true,
+				 /** abort   */ true))
+    return NDBT_FAILED;
+  
+  if (restarter.waitNodesNoStart(&nodeId, 1))
+    return NDBT_FAILED; 
+   
+  if (restarter.dumpStateOneNode(nodeId, dump, 2))
+    return NDBT_FAILED;
+
+  if (restarter.startNodes(&nodeId, 1))
+    return NDBT_FAILED;
+
+  if (restarter.waitNodesStarted(&nodeId, 1))
+    return NDBT_FAILED;
+  
+  ctx->stopTest();
+  return NDBT_OK;
+}
+
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -669,6 +707,13 @@
 	 "Test commit after node failure"){
   INITIALIZER(runLoadTable);
   STEP(runLateCommit);
+  FINALIZER(runClearTable);
+}
+TESTCASE("Bug15587",
+	 "Test bug with NF during NR"){
+  INITIALIZER(runLoadTable);
+  STEP(runScanUpdateUntilStopped);
+  STEP(runBug15587);
   FINALIZER(runClearTable);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
Thread
bk commit into 4.1 tree (jonas:1.2456) BUG#15587jonas8 Dec