List:Commits« Previous MessageNext Message »
From:jonas Date:June 18 2007 7:42am
Subject:bk commit into 5.1 tree (jonas:1.2152) BUG#29167
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-18 07:42:31+02:00, jonas@stripped +4 -0
  ndb - bug#29167
    Fix case where all node in node group dies before they saved sysfile (wrt gcp)
      and Qmgr incorrectly thinks that "node group is missing"

  storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-06-18 07:42:29+02:00,
jonas@stripped +3 -1
    error code

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-06-18 07:42:29+02:00,
jonas@stripped +13 -0
    fix bug + new error insert

  storage/ndb/test/ndbapi/testSystemRestart.cpp@stripped, 2007-06-18 07:42:29+02:00,
jonas@stripped +48 -0
    testcase

  storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-06-18 07:42:29+02:00,
jonas@stripped +4 -0
    testcase

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop5

--- 1.55/storage/ndb/test/run-test/daily-basic-tests.txt	2007-06-18 07:42:35 +02:00
+++ 1.56/storage/ndb/test/run-test/daily-basic-tests.txt	2007-06-18 07:42:35 +02:00
@@ -497,6 +497,10 @@
 cmd: testSystemRestart
 args: -n Bug27434 T1
 
+max-time: 300
+cmd: testSystemRestart
+args: -n Bug29167 T1
+
 max-time: 1000
 cmd: test_event
 args: -l 10 -n Bug27169 T1

--- 1.24/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-06-18 07:42:35 +02:00
+++ 1.25/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-06-18 07:42:35 +02:00
@@ -5,7 +5,7 @@
 Next DBTUP 4013
 Next DBLQH 5047
 Next DBDICT 6007
-Next DBDIH 7183
+Next DBDIH 7184
 Next DBTC 8040
 Next CMVMI 9000
 Next BACKUP 10022
@@ -73,6 +73,8 @@
 7177: Delay copying of sysfileData in execCOPY_GCIREQ
 
 7180: Crash master during master-take-over in execMASTER_LCPCONF
+
+7183: Crash when receiving COPY_GCIREQ
 
 ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------

--- 1.66/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-06-18 07:42:35 +02:00
+++ 1.67/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-06-18 07:42:35 +02:00
@@ -736,6 +736,8 @@
   }
   ndbrequire(ok);
   
+  CRASH_INSERTION(7183);
+  
   /* ----------------------------------------------------------------------- */
   /*     WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE.           */
   /* ----------------------------------------------------------------------- */
@@ -1199,6 +1201,17 @@
 	Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups);
 	ndbrequire(ng < MAX_NDB_NODES);
 	Uint32 gci = node_gcis[i];
+        if (gci < SYSFILE->lastCompletedGCI[i])
+        {
+          jam();
+          /**
+           * Handle case, where *I* know that node complete GCI
+           *   but node does not...bug#29167
+           *   i.e node died before it wrote own sysfile
+           */
+          gci = SYSFILE->lastCompletedGCI[i];
+        }
+
 	if (gci > node_group_gcis[ng])
 	{
 	  jam();

--- 1.11/storage/ndb/test/ndbapi/testSystemRestart.cpp	2007-06-18 07:42:35 +02:00
+++ 1.12/storage/ndb/test/ndbapi/testSystemRestart.cpp	2007-06-18 07:42:35 +02:00
@@ -1170,6 +1170,48 @@
   return result;
 }
 
+int 
+runBug29167(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int result = NDBT_OK;
+  NdbRestarter restarter;
+  Ndb* pNdb = GETNDB(step);
+  const Uint32 nodeCount = restarter.getNumDbNodes();
+
+  if (nodeCount < 2)
+    return NDBT_OK;
+
+  int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
+  NdbLogEventHandle handle = 
+    ndb_mgm_create_logevent_handle(restarter.handle, filter);
+
+  struct ndb_logevent event;
+  int master = restarter.getMasterNodeId();
+  do {
+    int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
+    int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+    
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };    
+    restarter.dumpStateAllNodes(val2, 2);
+    int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 30000 };
+    restarter.dumpStateAllNodes(dump, 2);
+    
+    while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+          event.type != NDB_LE_GlobalCheckpointCompleted);
+    
+    CHECK(restarter.insertErrorInAllNodes(932) == 0);
+    
+    CHECK(restarter.insertErrorInNode(node1, 7183) == 0);
+    CHECK(restarter.insertErrorInNode(node2, 7183) == 0);
+
+    CHECK(restarter.waitClusterNoStart() == 0);
+    restarter.startAll();
+    CHECK(restarter.waitClusterStarted() == 0);  
+  } while(false);
+  
+  return result;
+}
+
 NDBT_TESTSUITE(testSystemRestart);
 TESTCASE("SR1", 
 	 "Basic system restart test. Focus on testing restart from REDO log.\n"
@@ -1341,6 +1383,12 @@
   INITIALIZER(runWaitStarted);
   STEP(runBug27434);
 }
+TESTCASE("Bug29167", "")
+{
+  INITIALIZER(runWaitStarted);
+  STEP(runBug29167);
+}
+
 NDBT_TESTSUITE_END(testSystemRestart);
 
 int main(int argc, const char** argv){
Thread
bk commit into 5.1 tree (jonas:1.2152) BUG#29167jonas18 Jun