List:Commits« Previous MessageNext Message »
From:jonas Date:March 27 2007 2:06pm
Subject:bk commit into 5.1 tree (jonas:1.2505) BUG#27434
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-27 16:06:47+02:00, jonas@stripped +14 -0
  ndb - bug#27434
    Add new take-over step, PREPARE_COPY_FRAG

  storage/ndb/include/kernel/GlobalSignalNumbers.h@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +5 -3
    Add new signals

  storage/ndb/include/kernel/signaldata/CopyFrag.hpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +37 -0
    Add new signals

  storage/ndb/include/ndb_version.h.in@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +1 -0
    Add version code for PREPARE_COPY_FRAG

  storage/ndb/src/common/debugger/signaldata/SignalNames.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +4 -0
    Add new signals

  storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +3 -1
    Add new error code

  storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +5 -1
    Add new take-over state (PREPARE_COPY)

  storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +5 -0
    add new signals

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +87 -3
    Add new take-over step, PREPARE_COPY_FRAG

  storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +1 -0
    Add new take-over step, PREPARE_COPY_FRAG

  storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +3 -0
    Add new take-over step, PREPARE_COPY_FRAG

  storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +34 -0
    Add new take-over step, PREPARE_COPY_FRAG

  storage/ndb/test/ndbapi/testSystemRestart.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +54 -0
    testcase

  storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +4 -0
    new testcase

  storage/ndb/test/src/NdbRestarts.cpp@stripped, 2007-03-27 16:06:45+02:00, jonas@stripped +3 -2
    Add new error code to NFDuringNR_codes

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/51-telco

--- 1.17/storage/ndb/include/ndb_version.h.in	2007-03-27 16:06:53 +02:00
+++ 1.18/storage/ndb/include/ndb_version.h.in	2007-03-27 16:06:53 +02:00
@@ -75,6 +75,7 @@
 #define NDBD_NODE_VERSION_REP MAKE_VERSION(6,1,1)
 
 #define NDBD_255_NODES_VERSION MAKE_VERSION(5,1,4)
+#define NDBD_PREPARE_COPY_FRAG_VERSION MAKE_VERSION(6,2,1)
 
 #endif
  

--- 1.87/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-27 16:06:53 +02:00
+++ 1.88/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-27 16:06:53 +02:00
@@ -804,6 +804,10 @@
 cmd: testNodeRestart
 args: -n Bug27466 T1
 
+max-time: 1500
+cmd: testSystemRestart
+args: -n Bug27434 T1
+
 max-time: 1000
 cmd: test_event
 args: -l 10 -n Bug27169 T1

--- 1.35/storage/ndb/include/kernel/GlobalSignalNumbers.h	2007-03-27 16:06:53 +02:00
+++ 1.36/storage/ndb/include/kernel/GlobalSignalNumbers.h	2007-03-27 16:06:53 +02:00
@@ -196,9 +196,11 @@
 /* 132 not unused */
 /* 133 not unused */
 #define GSN_CM_HEARTBEAT                134 /* distr. */
-/* 135 unused */
-/* 136 unused */
-/* 137 unused */
+
+#define GSN_PREPARE_COPY_FRAG_REQ       135
+#define GSN_PREPARE_COPY_FRAG_REF       136
+#define GSN_PREPARE_COPY_FRAG_CONF      137
+
 #define GSN_CM_NODEINFOCONF             138 /* distr. */
 #define GSN_CM_NODEINFOREF              139 /* distr. */
 #define GSN_CM_NODEINFOREQ              140 /* distr. */

--- 1.5/storage/ndb/include/kernel/signaldata/CopyFrag.hpp	2007-03-27 16:06:53 +02:00
+++ 1.6/storage/ndb/include/kernel/signaldata/CopyFrag.hpp	2007-03-27 16:06:53 +02:00
@@ -95,4 +95,41 @@
   STATIC_CONST( SignalLength = 3 );
 };
 
+struct PrepareCopyFragReq
+{
+  STATIC_CONST( SignalLength = 6 );
+
+  Uint32 senderRef;
+  Uint32 senderData;
+  Uint32 tableId;
+  Uint32 fragId;
+  Uint32 copyNodeId;
+  Uint32 startingNodeId;
+};
+
+struct PrepareCopyFragRef
+{
+  Uint32 senderRef;
+  Uint32 senderData;
+  Uint32 tableId;
+  Uint32 fragId;
+  Uint32 copyNodeId;
+  Uint32 startingNodeId;
+  Uint32 errorCode;
+
+  STATIC_CONST( SignalLength = 7 );
+};
+
+struct PrepareCopyFragConf
+{
+  STATIC_CONST( SignalLength = 6 );
+
+  Uint32 senderRef;
+  Uint32 senderData;
+  Uint32 tableId;
+  Uint32 fragId;
+  Uint32 copyNodeId;
+  Uint32 startingNodeId;
+};
+
 #endif

--- 1.19/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp	2007-03-27 16:06:53 +02:00
+++ 1.20/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp	2007-03-27 16:06:53 +02:00
@@ -632,5 +632,9 @@
 
   ,{ GSN_ROUTE_ORD, "ROUTE_ORD" }
   ,{ GSN_NODE_VERSION_REP, "NODE_VERSION_REP" }
+
+  ,{ GSN_PREPARE_COPY_FRAG_REQ,   "PREPARE_COPY_FRAG_REQ" }
+  ,{ GSN_PREPARE_COPY_FRAG_REF,   "PREPARE_COPY_FRAG_REF" }
+  ,{ GSN_PREPARE_COPY_FRAG_CONF,  "PREPARE_COPY_FRAG_CONF" }
 };
 const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);

--- 1.40/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-03-27 16:06:53 +02:00
+++ 1.41/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-03-27 16:06:53 +02:00
@@ -3,7 +3,7 @@
 Next NDBFS 2000
 Next DBACC 3002
 Next DBTUP 4029
-Next DBLQH 5045
+Next DBLQH 5047
 Next DBDICT 6007
 Next DBDIH 7183
 Next DBTC 8040
@@ -177,6 +177,8 @@
 time-out handling. They can also be used to test multiple node failure
 handling.
 
+5045: Crash in PREPARE_COPY_FRAG_REQ
+5046: Crash if LQHKEYREQ (NrCopy) comes when frag-state is incorrect
 
 ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH
 -------------------------------------------------

--- 1.38/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2007-03-27 16:06:53 +02:00
+++ 1.39/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2007-03-27 16:06:53 +02:00
@@ -544,7 +544,8 @@
       TO_WAIT_ENDING = 21,
       ENDING = 22,
       
-      STARTING_LOCAL_FRAGMENTS = 24
+      STARTING_LOCAL_FRAGMENTS = 24,
+      PREPARE_COPY = 25
     };
     enum ToSlaveStatus {
       TO_SLAVE_IDLE = 0,
@@ -671,6 +672,8 @@
   void execNODE_FAILREP(Signal *);
   void execCOPY_FRAGCONF(Signal *);
   void execCOPY_FRAGREF(Signal *);
+  void execPREPARE_COPY_FRAG_REF(Signal*);
+  void execPREPARE_COPY_FRAG_CONF(Signal*);
   void execDIADDTABREQ(Signal *);
   void execDIGETNODESREQ(Signal *);
   void execDIRELEASEREQ(Signal *);
@@ -1113,6 +1116,7 @@
   void sendStartTo(Signal *, Uint32 takeOverPtr);
   void startNextCopyFragment(Signal *, Uint32 takeOverPtr);
   void toCopyFragLab(Signal *, Uint32 takeOverPtr);
+  void toStartCopyFrag(Signal *, TakeOverRecordPtr);
   void startHsAddFragConfLab(Signal *);
   void prepareSendCreateFragReq(Signal *, Uint32 takeOverPtr);
   void sendUpdateTo(Signal *, Uint32 takeOverPtr, Uint32 updateState);

--- 1.23/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2007-03-27 16:06:53 +02:00
+++ 1.24/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2007-03-27 16:06:53 +02:00
@@ -259,6 +259,11 @@
   
   addRecSignal(GSN_START_FRAGREF,
 	       &Dbdih::execSTART_FRAGREF);
+
+  addRecSignal(GSN_PREPARE_COPY_FRAG_REF,
+	       &Dbdih::execPREPARE_COPY_FRAG_REF);
+  addRecSignal(GSN_PREPARE_COPY_FRAG_CONF,
+	       &Dbdih::execPREPARE_COPY_FRAG_CONF);
   
   apiConnectRecord = 0;
   connectRecord = 0;

--- 1.124/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-27 16:06:53 +02:00
+++ 1.125/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-27 16:06:53 +02:00
@@ -3139,6 +3139,81 @@
   TakeOverRecordPtr takeOverPtr;
   RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
 
+  /**
+   * Inform starting node that TakeOver is about to start
+   */
+  Uint32 nodeId = takeOverPtr.p->toStartingNode;
+
+  if (getNodeInfo(nodeId).m_version >= NDBD_PREPARE_COPY_FRAG_VERSION)
+  {
+    jam();
+    TabRecordPtr tabPtr;
+    tabPtr.i = takeOverPtr.p->toCurrentTabref;
+    ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
+
+    FragmentstorePtr fragPtr;
+    getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
+    Uint32 nodes[MAX_REPLICAS];
+    extractNodeInfo(fragPtr.p, nodes);
+    
+    PrepareCopyFragReq* req= (PrepareCopyFragReq*)signal->getDataPtrSend();
+    req->senderRef = reference();
+    req->senderData = takeOverPtrI;
+    req->tableId = takeOverPtr.p->toCurrentTabref;
+    req->fragId = takeOverPtr.p->toCurrentFragid;
+    req->copyNodeId = nodes[0]; // Src
+    req->startingNodeId = takeOverPtr.p->toStartingNode; // Dst
+    Uint32 ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
+    
+    sendSignal(ref, GSN_PREPARE_COPY_FRAG_REQ, signal, 
+	       PrepareCopyFragReq::SignalLength, JBB);
+    
+    takeOverPtr.p->toMasterStatus = TakeOverRecord::PREPARE_COPY;
+    return;
+  }
+  
+  toStartCopyFrag(signal, takeOverPtr);
+}
+
+void
+Dbdih::execPREPARE_COPY_FRAG_REF(Signal* signal)
+{
+  jamEntry();
+  PrepareCopyFragRef ref = *(PrepareCopyFragRef*)signal->getDataPtr();
+
+  TakeOverRecordPtr takeOverPtr;
+  RETURN_IF_TAKE_OVER_INTERRUPTED(ref.senderData, takeOverPtr);
+
+  ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::PREPARE_COPY);
+  
+  /**
+   * Treat this as copy frag ref
+   */
+  CopyFragRef * cfref = (CopyFragRef*)signal->getDataPtrSend();
+  cfref->userPtr = ref.senderData;
+  cfref->startingNodeId = ref.startingNodeId;
+  cfref->errorCode = ref.errorCode;
+  cfref->tableId = ref.tableId;
+  cfref->fragId = ref.fragId;
+  cfref->sendingNodeId = ref.copyNodeId;
+  takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG;
+  execCOPY_FRAGREF(signal);
+}
+
+void
+Dbdih::execPREPARE_COPY_FRAG_CONF(Signal* signal)
+{
+  PrepareCopyFragConf conf = *(PrepareCopyFragConf*)signal->getDataPtr();
+
+  TakeOverRecordPtr takeOverPtr;
+  RETURN_IF_TAKE_OVER_INTERRUPTED(conf.senderData, takeOverPtr);
+  
+  toStartCopyFrag(signal, takeOverPtr);
+}
+
+void
+Dbdih::toStartCopyFrag(Signal* signal, TakeOverRecordPtr takeOverPtr)
+{
   CreateReplicaRecordPtr createReplicaPtr;
   createReplicaPtr.i = 0;
   ptrAss(createReplicaPtr, createReplicaRecord);
@@ -3162,8 +3237,8 @@
   createReplicaPtr.p->hotSpareUse = true;
   createReplicaPtr.p->dataNodeId = takeOverPtr.p->toStartingNode;
 
-  prepareSendCreateFragReq(signal, takeOverPtrI);
-}//Dbdih::toCopyFragLab()
+  prepareSendCreateFragReq(signal, takeOverPtr.i);
+}//Dbdih::toStartCopy()
 
 void Dbdih::prepareSendCreateFragReq(Signal* signal, Uint32 takeOverPtrI)
 {
@@ -4555,12 +4630,21 @@
     ok = true;
     jam();
     //-----------------------------------------------------------------------
-    // The starting node will discover the problem. We will receive either
+    // The copying node will discover the problem. We will receive either
     // COPY_FRAGREQ or COPY_FRAGCONF and then we can release the take over
     // record and end the process. If the copying node should also die then
     // we will try to send prepare create fragment and will then discover
     // that the starting node has failed.
     //-----------------------------------------------------------------------
+    break;
+  case TakeOverRecord::PREPARE_COPY:
+    ok = true;
+    jam();
+    /**
+     * We're waiting for the starting node...which just died...
+     *  endTakeOver
+     */
+    endTakeOver(takeOverPtr.i);
     break;
   case TakeOverRecord::COPY_ACTIVE:
     ok = true;

--- 1.62/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2007-03-27 16:06:53 +02:00
+++ 1.63/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2007-03-27 16:06:53 +02:00
@@ -2148,6 +2148,7 @@
   void execSTORED_PROCCONF(Signal* signal);
   void execSTORED_PROCREF(Signal* signal);
   void execCOPY_FRAGREQ(Signal* signal);
+  void execPREPARE_COPY_FRAG_REQ(Signal* signal);
   void execUPDATE_FRAG_DIST_KEY_ORD(Signal*);
   void execCOPY_ACTIVEREQ(Signal* signal);
   void execCOPY_STATEREQ(Signal* signal);

--- 1.23/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2007-03-27 16:06:53 +02:00
+++ 1.24/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2007-03-27 16:06:53 +02:00
@@ -303,6 +303,9 @@
   addRecSignal(GSN_UPDATE_FRAG_DIST_KEY_ORD, 
 	       &Dblqh::execUPDATE_FRAG_DIST_KEY_ORD);
   
+  addRecSignal(GSN_PREPARE_COPY_FRAG_REQ,
+	       &Dblqh::execPREPARE_COPY_FRAG_REQ);
+  
   initData();
 
 #ifdef VM_TRACE

--- 1.153/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-27 16:06:53 +02:00
+++ 1.154/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-03-27 16:06:53 +02:00
@@ -3622,6 +3622,7 @@
     {
       ndbout_c("fragptr.p->fragStatus: %d",
 	       fragptr.p->fragStatus);
+      CRASH_INSERTION(5046);
     }
     ndbassert(fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION);
     fragptr.p->m_copy_started_state = Fragrecord::AC_NR_COPY;
@@ -9997,6 +9998,39 @@
   
   return md5_hash(Tmp, keyLen);
 }//Dblqh::calculateHash()
+
+/**
+ * PREPARE COPY FRAG REQ
+ */
+void
+Dblqh::execPREPARE_COPY_FRAG_REQ(Signal* signal)
+{
+  jamEntry();
+  PrepareCopyFragReq req = *(PrepareCopyFragReq*)signal->getDataPtr();
+
+  CRASH_INSERTION(5045);
+
+  tabptr.i = req.tableId;
+  ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
+  ndbrequire(getFragmentrec(signal, req.fragId));
+  fragptr.p->m_copy_started_state = Fragrecord::AC_IGNORED;
+  fragptr.p->fragStatus = Fragrecord::ACTIVE_CREATION;
+  fragptr.p->logFlag = Fragrecord::STATE_FALSE;
+  
+  /**
+   *
+   */
+  
+  PrepareCopyFragConf* conf = (PrepareCopyFragConf*)signal->getDataPtrSend();
+  conf->senderData = req.senderData;
+  conf->senderRef = reference();
+  conf->tableId = req.tableId;
+  conf->fragId = req.fragId;
+  conf->copyNodeId = req.copyNodeId;
+  conf->startingNodeId = req.startingNodeId;
+  sendSignal(req.senderRef, GSN_PREPARE_COPY_FRAG_CONF,
+	     signal, PrepareCopyFragConf::SignalLength, JBB);
+}
 
 /* *************************************** */
 /*  COPY_FRAGREQ: Start copying a fragment */

--- 1.13/storage/ndb/test/ndbapi/testSystemRestart.cpp	2007-03-27 16:06:53 +02:00
+++ 1.14/storage/ndb/test/ndbapi/testSystemRestart.cpp	2007-03-27 16:06:53 +02:00
@@ -1219,6 +1219,54 @@
   return result;
 }
 
+int 
+runBug27434(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int result = NDBT_OK;
+  NdbRestarter restarter;
+  Ndb* pNdb = GETNDB(step);
+  const Uint32 nodeCount = restarter.getNumDbNodes();
+
+  if (nodeCount < 2)
+    return NDBT_OK;
+
+  int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
+  int dump[] = { DumpStateOrd::DihStartLcpImmediately };
+
+  int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
+  NdbLogEventHandle handle = 
+    ndb_mgm_create_logevent_handle(restarter.handle, filter);
+
+  struct ndb_logevent event;
+
+  do {
+    int node1 = restarter.getDbNodeId(rand() % nodeCount);
+    CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0);
+    NdbSleep_SecSleep(3);
+    CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
+
+    CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
+
+    for (Uint32 i = 0; i<3; i++)
+    {
+      CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
+      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+	    event.type != NDB_LE_LocalCheckpointStarted);
+      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+	    event.type != NDB_LE_LocalCheckpointCompleted);
+    }      
+    
+    restarter.restartAll(false, true, true);
+    NdbSleep_SecSleep(3);
+    CHECK(restarter.waitClusterNoStart() == 0);
+    restarter.insertErrorInNode(node1, 5046);
+    restarter.startAll();
+    CHECK(restarter.waitClusterStarted() == 0);
+  } while(false);
+  
+  return result;
+}
+
 NDBT_TESTSUITE(testSystemRestart);
 TESTCASE("SR1", 
 	 "Basic system restart test. Focus on testing restart from REDO log.\n"
@@ -1398,6 +1446,12 @@
   INITIALIZER(runClearTable);
   STEP(runBug24664);
   FINALIZER(runClearTable);
+}
+TESTCASE("Bug27434",
+	 "")
+{
+  INITIALIZER(runWaitStarted);
+  STEP(runBug27434);
 }
 NDBT_TESTSUITE_END(testSystemRestart);
 

--- 1.9/storage/ndb/test/src/NdbRestarts.cpp	2007-03-27 16:06:53 +02:00
+++ 1.10/storage/ndb/test/src/NdbRestarts.cpp	2007-03-27 16:06:53 +02:00
@@ -607,6 +607,7 @@
   5026,
   7139,
   7132,
+  5046,
 
   //LCP
   8000,
@@ -630,8 +631,8 @@
     int nodeId = _restarter.getDbNodeId(randomId);
     int error = NFDuringNR_codes[i];
     
-    g_info << _restart->m_name << ": node = " << nodeId 
-	   << " error code = " << error << endl;
+    g_err << _restart->m_name << ": node = " << nodeId 
+	  << " error code = " << error << endl;
     
     CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
 	  "Could not restart node "<< nodeId);
Thread
bk commit into 5.1 tree (jonas:1.2505) BUG#27434jonas27 Mar