Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-03-27 16:06:47+02:00, jonas@stripped +14 -0
ndb - bug#27434
Add new take-over step, PREPARE_COPY_FRAG
storage/ndb/include/kernel/GlobalSignalNumbers.h@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +5 -3
Add new signals
storage/ndb/include/kernel/signaldata/CopyFrag.hpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +37 -0
Add new signals
storage/ndb/include/ndb_version.h.in@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +1 -0
Add version code for PREPARE_COPY_FRAG
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp@stripped, 2007-03-27
16:06:45+02:00, jonas@stripped +4 -0
Add new signals
storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +3 -1
Add new error code
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +5 -1
Add new take-over state (PREPARE_COPY)
storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +5 -0
add new signals
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +87 -3
Add new take-over step, PREPARE_COPY_FRAG
storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +1 -0
Add new take-over step, PREPARE_COPY_FRAG
storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +3 -0
Add new take-over step, PREPARE_COPY_FRAG
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +34 -0
Add new take-over step, PREPARE_COPY_FRAG
storage/ndb/test/ndbapi/testSystemRestart.cpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +54 -0
testcase
storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +4 -0
new testcase
storage/ndb/test/src/NdbRestarts.cpp@stripped, 2007-03-27 16:06:45+02:00,
jonas@stripped +3 -2
Add new error code to NFDuringNR_codes
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: perch.ndb.mysql.com
# Root: /home/jonas/src/51-telco
--- 1.17/storage/ndb/include/ndb_version.h.in 2007-03-27 16:06:53 +02:00
+++ 1.18/storage/ndb/include/ndb_version.h.in 2007-03-27 16:06:53 +02:00
@@ -75,6 +75,7 @@
#define NDBD_NODE_VERSION_REP MAKE_VERSION(6,1,1)
#define NDBD_255_NODES_VERSION MAKE_VERSION(5,1,4)
+#define NDBD_PREPARE_COPY_FRAG_VERSION MAKE_VERSION(6,2,1)
#endif
--- 1.87/storage/ndb/test/run-test/daily-basic-tests.txt 2007-03-27 16:06:53 +02:00
+++ 1.88/storage/ndb/test/run-test/daily-basic-tests.txt 2007-03-27 16:06:53 +02:00
@@ -804,6 +804,10 @@
cmd: testNodeRestart
args: -n Bug27466 T1
+max-time: 1500
+cmd: testSystemRestart
+args: -n Bug27434 T1
+
max-time: 1000
cmd: test_event
args: -l 10 -n Bug27169 T1
--- 1.35/storage/ndb/include/kernel/GlobalSignalNumbers.h 2007-03-27 16:06:53 +02:00
+++ 1.36/storage/ndb/include/kernel/GlobalSignalNumbers.h 2007-03-27 16:06:53 +02:00
@@ -196,9 +196,11 @@
/* 132 not unused */
/* 133 not unused */
#define GSN_CM_HEARTBEAT 134 /* distr. */
-/* 135 unused */
-/* 136 unused */
-/* 137 unused */
+
+#define GSN_PREPARE_COPY_FRAG_REQ 135
+#define GSN_PREPARE_COPY_FRAG_REF 136
+#define GSN_PREPARE_COPY_FRAG_CONF 137
+
#define GSN_CM_NODEINFOCONF 138 /* distr. */
#define GSN_CM_NODEINFOREF 139 /* distr. */
#define GSN_CM_NODEINFOREQ 140 /* distr. */
--- 1.5/storage/ndb/include/kernel/signaldata/CopyFrag.hpp 2007-03-27 16:06:53 +02:00
+++ 1.6/storage/ndb/include/kernel/signaldata/CopyFrag.hpp 2007-03-27 16:06:53 +02:00
@@ -95,4 +95,41 @@
STATIC_CONST( SignalLength = 3 );
};
+struct PrepareCopyFragReq
+{
+ STATIC_CONST( SignalLength = 6 );
+
+ Uint32 senderRef;
+ Uint32 senderData;
+ Uint32 tableId;
+ Uint32 fragId;
+ Uint32 copyNodeId;
+ Uint32 startingNodeId;
+};
+
+struct PrepareCopyFragRef
+{
+ Uint32 senderRef;
+ Uint32 senderData;
+ Uint32 tableId;
+ Uint32 fragId;
+ Uint32 copyNodeId;
+ Uint32 startingNodeId;
+ Uint32 errorCode;
+
+ STATIC_CONST( SignalLength = 7 );
+};
+
+struct PrepareCopyFragConf
+{
+ STATIC_CONST( SignalLength = 6 );
+
+ Uint32 senderRef;
+ Uint32 senderData;
+ Uint32 tableId;
+ Uint32 fragId;
+ Uint32 copyNodeId;
+ Uint32 startingNodeId;
+};
+
#endif
--- 1.19/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2007-03-27 16:06:53
+02:00
+++ 1.20/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2007-03-27 16:06:53
+02:00
@@ -632,5 +632,9 @@
,{ GSN_ROUTE_ORD, "ROUTE_ORD" }
,{ GSN_NODE_VERSION_REP, "NODE_VERSION_REP" }
+
+ ,{ GSN_PREPARE_COPY_FRAG_REQ, "PREPARE_COPY_FRAG_REQ" }
+ ,{ GSN_PREPARE_COPY_FRAG_REF, "PREPARE_COPY_FRAG_REF" }
+ ,{ GSN_PREPARE_COPY_FRAG_CONF, "PREPARE_COPY_FRAG_CONF" }
};
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
--- 1.40/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2007-03-27 16:06:53 +02:00
+++ 1.41/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2007-03-27 16:06:53 +02:00
@@ -3,7 +3,7 @@
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4029
-Next DBLQH 5045
+Next DBLQH 5047
Next DBDICT 6007
Next DBDIH 7183
Next DBTC 8040
@@ -177,6 +177,8 @@
time-out handling. They can also be used to test multiple node failure
handling.
+5045: Crash in PREPARE_COPY_FRAG_REQ
+5046: Crash if LQHKEYREQ (NrCopy) comes when frag-state is incorrect
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH
-------------------------------------------------
--- 1.38/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2007-03-27 16:06:53 +02:00
+++ 1.39/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2007-03-27 16:06:53 +02:00
@@ -544,7 +544,8 @@
TO_WAIT_ENDING = 21,
ENDING = 22,
- STARTING_LOCAL_FRAGMENTS = 24
+ STARTING_LOCAL_FRAGMENTS = 24,
+ PREPARE_COPY = 25
};
enum ToSlaveStatus {
TO_SLAVE_IDLE = 0,
@@ -671,6 +672,8 @@
void execNODE_FAILREP(Signal *);
void execCOPY_FRAGCONF(Signal *);
void execCOPY_FRAGREF(Signal *);
+ void execPREPARE_COPY_FRAG_REF(Signal*);
+ void execPREPARE_COPY_FRAG_CONF(Signal*);
void execDIADDTABREQ(Signal *);
void execDIGETNODESREQ(Signal *);
void execDIRELEASEREQ(Signal *);
@@ -1113,6 +1116,7 @@
void sendStartTo(Signal *, Uint32 takeOverPtr);
void startNextCopyFragment(Signal *, Uint32 takeOverPtr);
void toCopyFragLab(Signal *, Uint32 takeOverPtr);
+ void toStartCopyFrag(Signal *, TakeOverRecordPtr);
void startHsAddFragConfLab(Signal *);
void prepareSendCreateFragReq(Signal *, Uint32 takeOverPtr);
void sendUpdateTo(Signal *, Uint32 takeOverPtr, Uint32 updateState);
--- 1.23/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2007-03-27 16:06:53 +02:00
+++ 1.24/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2007-03-27 16:06:53 +02:00
@@ -259,6 +259,11 @@
addRecSignal(GSN_START_FRAGREF,
&Dbdih::execSTART_FRAGREF);
+
+ addRecSignal(GSN_PREPARE_COPY_FRAG_REF,
+ &Dbdih::execPREPARE_COPY_FRAG_REF);
+ addRecSignal(GSN_PREPARE_COPY_FRAG_CONF,
+ &Dbdih::execPREPARE_COPY_FRAG_CONF);
apiConnectRecord = 0;
connectRecord = 0;
--- 1.124/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-03-27 16:06:53 +02:00
+++ 1.125/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-03-27 16:06:53 +02:00
@@ -3139,6 +3139,81 @@
TakeOverRecordPtr takeOverPtr;
RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
+ /**
+ * Inform starting node that TakeOver is about to start
+ */
+ Uint32 nodeId = takeOverPtr.p->toStartingNode;
+
+ if (getNodeInfo(nodeId).m_version >= NDBD_PREPARE_COPY_FRAG_VERSION)
+ {
+ jam();
+ TabRecordPtr tabPtr;
+ tabPtr.i = takeOverPtr.p->toCurrentTabref;
+ ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
+
+ FragmentstorePtr fragPtr;
+ getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
+ Uint32 nodes[MAX_REPLICAS];
+ extractNodeInfo(fragPtr.p, nodes);
+
+ PrepareCopyFragReq* req= (PrepareCopyFragReq*)signal->getDataPtrSend();
+ req->senderRef = reference();
+ req->senderData = takeOverPtrI;
+ req->tableId = takeOverPtr.p->toCurrentTabref;
+ req->fragId = takeOverPtr.p->toCurrentFragid;
+ req->copyNodeId = nodes[0]; // Src
+ req->startingNodeId = takeOverPtr.p->toStartingNode; // Dst
+ Uint32 ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
+
+ sendSignal(ref, GSN_PREPARE_COPY_FRAG_REQ, signal,
+ PrepareCopyFragReq::SignalLength, JBB);
+
+ takeOverPtr.p->toMasterStatus = TakeOverRecord::PREPARE_COPY;
+ return;
+ }
+
+ toStartCopyFrag(signal, takeOverPtr);
+}
+
+void
+Dbdih::execPREPARE_COPY_FRAG_REF(Signal* signal)
+{
+ jamEntry();
+ PrepareCopyFragRef ref = *(PrepareCopyFragRef*)signal->getDataPtr();
+
+ TakeOverRecordPtr takeOverPtr;
+ RETURN_IF_TAKE_OVER_INTERRUPTED(ref.senderData, takeOverPtr);
+
+ ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::PREPARE_COPY);
+
+ /**
+ * Treat this as copy frag ref
+ */
+ CopyFragRef * cfref = (CopyFragRef*)signal->getDataPtrSend();
+ cfref->userPtr = ref.senderData;
+ cfref->startingNodeId = ref.startingNodeId;
+ cfref->errorCode = ref.errorCode;
+ cfref->tableId = ref.tableId;
+ cfref->fragId = ref.fragId;
+ cfref->sendingNodeId = ref.copyNodeId;
+ takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG;
+ execCOPY_FRAGREF(signal);
+}
+
+void
+Dbdih::execPREPARE_COPY_FRAG_CONF(Signal* signal)
+{
+ PrepareCopyFragConf conf = *(PrepareCopyFragConf*)signal->getDataPtr();
+
+ TakeOverRecordPtr takeOverPtr;
+ RETURN_IF_TAKE_OVER_INTERRUPTED(conf.senderData, takeOverPtr);
+
+ toStartCopyFrag(signal, takeOverPtr);
+}
+
+void
+Dbdih::toStartCopyFrag(Signal* signal, TakeOverRecordPtr takeOverPtr)
+{
CreateReplicaRecordPtr createReplicaPtr;
createReplicaPtr.i = 0;
ptrAss(createReplicaPtr, createReplicaRecord);
@@ -3162,8 +3237,8 @@
createReplicaPtr.p->hotSpareUse = true;
createReplicaPtr.p->dataNodeId = takeOverPtr.p->toStartingNode;
- prepareSendCreateFragReq(signal, takeOverPtrI);
-}//Dbdih::toCopyFragLab()
+ prepareSendCreateFragReq(signal, takeOverPtr.i);
+}//Dbdih::toStartCopy()
void Dbdih::prepareSendCreateFragReq(Signal* signal, Uint32 takeOverPtrI)
{
@@ -4555,12 +4630,21 @@
ok = true;
jam();
//-----------------------------------------------------------------------
- // The starting node will discover the problem. We will receive either
+ // The copying node will discover the problem. We will receive either
// COPY_FRAGREQ or COPY_FRAGCONF and then we can release the take over
// record and end the process. If the copying node should also die then
// we will try to send prepare create fragment and will then discover
// that the starting node has failed.
//-----------------------------------------------------------------------
+ break;
+ case TakeOverRecord::PREPARE_COPY:
+ ok = true;
+ jam();
+ /**
+ * We're waiting for the starting node...which just died...
+ * endTakeOver
+ */
+ endTakeOver(takeOverPtr.i);
break;
case TakeOverRecord::COPY_ACTIVE:
ok = true;
--- 1.62/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2007-03-27 16:06:53 +02:00
+++ 1.63/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2007-03-27 16:06:53 +02:00
@@ -2148,6 +2148,7 @@
void execSTORED_PROCCONF(Signal* signal);
void execSTORED_PROCREF(Signal* signal);
void execCOPY_FRAGREQ(Signal* signal);
+ void execPREPARE_COPY_FRAG_REQ(Signal* signal);
void execUPDATE_FRAG_DIST_KEY_ORD(Signal*);
void execCOPY_ACTIVEREQ(Signal* signal);
void execCOPY_STATEREQ(Signal* signal);
--- 1.23/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp 2007-03-27 16:06:53 +02:00
+++ 1.24/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp 2007-03-27 16:06:53 +02:00
@@ -303,6 +303,9 @@
addRecSignal(GSN_UPDATE_FRAG_DIST_KEY_ORD,
&Dblqh::execUPDATE_FRAG_DIST_KEY_ORD);
+ addRecSignal(GSN_PREPARE_COPY_FRAG_REQ,
+ &Dblqh::execPREPARE_COPY_FRAG_REQ);
+
initData();
#ifdef VM_TRACE
--- 1.153/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2007-03-27 16:06:53 +02:00
+++ 1.154/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2007-03-27 16:06:53 +02:00
@@ -3622,6 +3622,7 @@
{
ndbout_c("fragptr.p->fragStatus: %d",
fragptr.p->fragStatus);
+ CRASH_INSERTION(5046);
}
ndbassert(fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION);
fragptr.p->m_copy_started_state = Fragrecord::AC_NR_COPY;
@@ -9997,6 +9998,39 @@
return md5_hash(Tmp, keyLen);
}//Dblqh::calculateHash()
+
+/**
+ * PREPARE COPY FRAG REQ
+ */
+void
+Dblqh::execPREPARE_COPY_FRAG_REQ(Signal* signal)
+{
+ jamEntry();
+ PrepareCopyFragReq req = *(PrepareCopyFragReq*)signal->getDataPtr();
+
+ CRASH_INSERTION(5045);
+
+ tabptr.i = req.tableId;
+ ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
+ ndbrequire(getFragmentrec(signal, req.fragId));
+ fragptr.p->m_copy_started_state = Fragrecord::AC_IGNORED;
+ fragptr.p->fragStatus = Fragrecord::ACTIVE_CREATION;
+ fragptr.p->logFlag = Fragrecord::STATE_FALSE;
+
+ /**
+ *
+ */
+
+ PrepareCopyFragConf* conf = (PrepareCopyFragConf*)signal->getDataPtrSend();
+ conf->senderData = req.senderData;
+ conf->senderRef = reference();
+ conf->tableId = req.tableId;
+ conf->fragId = req.fragId;
+ conf->copyNodeId = req.copyNodeId;
+ conf->startingNodeId = req.startingNodeId;
+ sendSignal(req.senderRef, GSN_PREPARE_COPY_FRAG_CONF,
+ signal, PrepareCopyFragConf::SignalLength, JBB);
+}
/* *************************************** */
/* COPY_FRAGREQ: Start copying a fragment */
--- 1.13/storage/ndb/test/ndbapi/testSystemRestart.cpp 2007-03-27 16:06:53 +02:00
+++ 1.14/storage/ndb/test/ndbapi/testSystemRestart.cpp 2007-03-27 16:06:53 +02:00
@@ -1219,6 +1219,54 @@
return result;
}
+int
+runBug27434(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int result = NDBT_OK;
+ NdbRestarter restarter;
+ Ndb* pNdb = GETNDB(step);
+ const Uint32 nodeCount = restarter.getNumDbNodes();
+
+ if (nodeCount < 2)
+ return NDBT_OK;
+
+ int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
+ int dump[] = { DumpStateOrd::DihStartLcpImmediately };
+
+ int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
+ NdbLogEventHandle handle =
+ ndb_mgm_create_logevent_handle(restarter.handle, filter);
+
+ struct ndb_logevent event;
+
+ do {
+ int node1 = restarter.getDbNodeId(rand() % nodeCount);
+ CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0);
+ NdbSleep_SecSleep(3);
+ CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
+
+ CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
+
+ for (Uint32 i = 0; i<3; i++)
+ {
+ CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
+ while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+ event.type != NDB_LE_LocalCheckpointStarted);
+ while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
+ event.type != NDB_LE_LocalCheckpointCompleted);
+ }
+
+ restarter.restartAll(false, true, true);
+ NdbSleep_SecSleep(3);
+ CHECK(restarter.waitClusterNoStart() == 0);
+ restarter.insertErrorInNode(node1, 5046);
+ restarter.startAll();
+ CHECK(restarter.waitClusterStarted() == 0);
+ } while(false);
+
+ return result;
+}
+
NDBT_TESTSUITE(testSystemRestart);
TESTCASE("SR1",
"Basic system restart test. Focus on testing restart from REDO log.\n"
@@ -1398,6 +1446,12 @@
INITIALIZER(runClearTable);
STEP(runBug24664);
FINALIZER(runClearTable);
+}
+TESTCASE("Bug27434",
+ "")
+{
+ INITIALIZER(runWaitStarted);
+ STEP(runBug27434);
}
NDBT_TESTSUITE_END(testSystemRestart);
--- 1.9/storage/ndb/test/src/NdbRestarts.cpp 2007-03-27 16:06:53 +02:00
+++ 1.10/storage/ndb/test/src/NdbRestarts.cpp 2007-03-27 16:06:53 +02:00
@@ -607,6 +607,7 @@
5026,
7139,
7132,
+ 5046,
//LCP
8000,
@@ -630,8 +631,8 @@
int nodeId = _restarter.getDbNodeId(randomId);
int error = NFDuringNR_codes[i];
- g_info << _restart->m_name << ": node = " << nodeId
- << " error code = " << error << endl;
+ g_err << _restart->m_name << ": node = " << nodeId
+ << " error code = " << error << endl;
CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
"Could not restart node "<< nodeId);
| Thread |
|---|
| • bk commit into 5.1 tree (jonas:1.2505) BUG#27434 | jonas | 27 Mar |