List:Commits« Previous MessageNext Message »
From:jonas Date:March 13 2007 10:29am
Subject:bk commit into 5.0 tree (jonas:1.2295) BUG#27003
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-13 11:29:14+01:00, jonas@stripped +7 -0
  ndb - bug#27003
    Handle random(not in order) LQHKEYREQ failures during node-restart

  ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +12 -0
    Document new error codes

  ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +16 -3
    Handle random(not in order) LQHKEYREQ failures during node-restart

  ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +24 -0
    Error codes for various oom problems

  ndb/src/kernel/blocks/dbtup/DbtupGen.cpp@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +1 -1
    move CLEAR_ERROR_INSERT_VALUE to constructor so that it's reasonable to use it for
    restart testing
    

  ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +1 -0
    Add error insert for CopyFragRef
    

  ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +56 -0
    Testprg for bug#27003

  ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-03-13 11:29:13+01:00, jonas@stripped +4 -0
    add testprg

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/50-work

--- 1.49/ndb/test/run-test/daily-basic-tests.txt	2007-03-13 11:29:18 +01:00
+++ 1.50/ndb/test/run-test/daily-basic-tests.txt	2007-03-13 11:29:18 +01:00
@@ -425,6 +425,10 @@
 cmd: testScan
 args: -n Bug24447 T1
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug27003 T1
+
 max-time: 500
 cmd: testNodeRestart
 args: -n Bug15587 T1

--- 1.26/ndb/src/kernel/blocks/ERROR_codes.txt	2007-03-13 11:29:18 +01:00
+++ 1.27/ndb/src/kernel/blocks/ERROR_codes.txt	2007-03-13 11:29:18 +01:00
@@ -489,3 +489,15 @@
 6003 Crash in participant @ CreateTabReq::Prepare
 6004 Crash in participant @ CreateTabReq::Commit
 6005 Crash in participant @ CreateTabReq::CreateDrop
+
+TUP:
+----
+
+4025: Fail all inserts with out of memory
+4026: Fail one insert with oom
+4027: Fail inserts randomly with oom
+4028: Fail one random insert with oom
+
+NDBCNTR:
+
+1000: Crash insertion on SystemError::CopyFragRef

--- 1.100/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-13 11:29:18 +01:00
+++ 1.101/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-13 11:29:18 +01:00
@@ -9641,6 +9641,15 @@
     closeCopyLab(signal);
     return;
   }//if
+
+  if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY &&
+      scanptr.p->scanErrorCounter)
+  {
+    jam();
+    closeCopyLab(signal);
+    return;
+  }
+  
   if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY) {
     jam();
 /*---------------------------------------------------------------------------*/
@@ -9717,13 +9726,16 @@
 void Dblqh::copyLqhKeyRefLab(Signal* signal) 
 {
   ndbrequire(tcConnectptr.p->transid[1] == signal->theData[4]);
-  tcConnectptr.p->copyCountWords -= signal->theData[3];
+  Uint32 copyWords = signal->theData[3];
   scanptr.i = tcConnectptr.p->tcScanRec;
   c_scanRecordPool.getPtr(scanptr);
   scanptr.p->scanErrorCounter++;
   tcConnectptr.p->errorCode = terrorCode;
-  closeCopyLab(signal);
-  return;
+  
+  LqhKeyConf* conf = (LqhKeyConf*)signal->getDataPtrSend();
+  conf->transId1 = copyWords;
+  conf->transId2 = tcConnectptr.p->transid[1];
+  copyCompletedLab(signal);
 }//Dblqh::copyLqhKeyRefLab()
 
 void Dblqh::closeCopyLab(Signal* signal) 
@@ -9734,6 +9746,7 @@
 // Wait until all of those have arrived until we start the
 // close process.
 /*---------------------------------------------------------------------------*/
+    scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
     jam();
     return;
   }//if

--- 1.22/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp	2007-03-13 11:29:18 +01:00
+++ 1.23/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp	2007-03-13 11:29:18 +01:00
@@ -213,6 +213,30 @@
 //---------------------------------------------------
   PagePtr pagePtr;
   Uint32 pageOffset;
+
+  if (ERROR_INSERTED(4025))
+  {
+    signal->theData[0] = 827;
+    return;
+  }
+  if (ERROR_INSERTED(4026))
+  {
+    CLEAR_ERROR_INSERT_VALUE;
+    signal->theData[0] = 827;
+    return;
+  }
+  if (ERROR_INSERTED(4027) && (rand() % 100) > 25)
+  {
+    signal->theData[0] = 827;
+    return;
+  }
+  if (ERROR_INSERTED(4028) && (rand() % 100) > 25)
+  {
+    CLEAR_ERROR_INSERT_VALUE;
+    signal->theData[0] = 827;
+    return;
+  }
+  
   if (!allocTh(regFragPtr.p,
                regTabPtr.p,
                NORMAL_PAGE,

--- 1.20/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp	2007-03-13 11:29:18 +01:00
+++ 1.21/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp	2007-03-13 11:29:18 +01:00
@@ -66,6 +66,7 @@
   undoPage = 0;
   totNoOfPagesAllocated = 0;
   cnoOfAllocatedPages = 0;
+  CLEAR_ERROR_INSERT_VALUE;
   
   // Records with constant sizes
 }//Dbtup::initData()
@@ -570,7 +571,6 @@
   switch (startPhase) {
   case ZSTARTPHASE1:
     ljam();
-    CLEAR_ERROR_INSERT_VALUE;
     cownref = calcTupBlockRef(0);
     break;
   default:

--- 1.33/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-03-13 11:29:18 +01:00
+++ 1.34/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-03-13 11:29:18 +01:00
@@ -180,6 +180,7 @@
     break;
 
   case SystemError::CopyFragRefError:
+    CRASH_INSERTION(1000);
     BaseString::snprintf(buf, sizeof(buf), 
 			 "Killed by node %d as "
 			 "copyfrag failed, error: %u",

--- 1.28/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-13 11:29:18 +01:00
+++ 1.29/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-13 11:29:18 +01:00
@@ -1125,6 +1125,59 @@
   return NDBT_OK;
 }
 
+int
+runBug27003(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  NdbRestarter res;
+  
+  static const int errnos[] = { 4025, 4026, 4027, 4028, 0 };
+
+  int node = res.getRandomNotMasterNodeId(rand());
+  ndbout_c("node: %d", node);
+  if (res.restartOneDbNode(node, false, true, true))
+    return NDBT_FAILED;
+
+  Uint32 pos = 0;
+  for (Uint32 i = 0; i<loops; i++)
+  {
+    while (errnos[pos] != 0)
+    {
+      ndbout_c("Tesing err: %d", errnos[pos]);
+      
+      if (res.waitNodesNoStart(&node, 1))
+	return NDBT_FAILED;
+
+      if (res.insertErrorInNode(node, 1000))
+	return NDBT_FAILED;
+      
+      if (res.insertErrorInNode(node, errnos[pos]))
+	return NDBT_FAILED;
+      
+      int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+      if (res.dumpStateOneNode(node, val2, 2))
+	return NDBT_FAILED;
+      
+      res.startNodes(&node, 1);
+      res.waitNodesStartPhase(&node, 1, 2);
+      pos++;
+    }
+    pos = 0;
+  }
+
+  if (res.waitNodesNoStart(&node, 1))
+    return NDBT_FAILED;
+  
+  res.startNodes(&node, 1);
+  if (res.waitClusterStarted())
+    return NDBT_FAILED;
+  
+  return NDBT_OK;
+}
+
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -1451,6 +1504,9 @@
 }
 TESTCASE("Bug26481", ""){
   INITIALIZER(runBug26481);
+}
+TESTCASE("Bug27003", ""){
+  INITIALIZER(runBug27003);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
 
Thread
bk commit into 5.0 tree (jonas:1.2295) BUG#27003jonas13 Mar