Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-04-25 08:30:39+02:00, jonas@stripped +4 -0
ndb - bug#36245
NF_COMPLETEREP can get lost on cascading master failure
causing *big* pain and misery
storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2008-04-25 08:30:37+02:00, jonas@stripped +2 -2
add new error code
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp@stripped, 2008-04-25 08:30:37+02:00, jonas@stripped +3 -3
new error insert
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2008-04-25 08:30:37+02:00, jonas@stripped +39 -22
fix bug by sending NF_COMPLETEREP from all nodes
storage/ndb/src/ndbapi/ClusterMgr.cpp@stripped, 2008-04-25 08:30:38+02:00, jonas@stripped +5 -2
only signal NF_COMPLETEREP once to TransporterFacade
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: perch.ndb.mysql.com
# Root: /home/jonas/src/drop6
--- 1.32/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-04-25 08:30:41 +02:00
+++ 1.33/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-04-25 08:30:41 +02:00
@@ -1,4 +1,4 @@
-Next QMGR 1
+Next QMGR 937
Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
@@ -6,7 +6,7 @@
Next DBLQH 5051
Next DBDICT 6007
Next DBDIH 7211
-Next DBTC 8063
+Next DBTC 8064
Next CMVMI 9000
Next BACKUP 10022
Next DBUTIL 11002
--- 1.112/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-04-25 08:30:41 +02:00
+++ 1.113/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-04-25 08:30:41 +02:00
@@ -2713,6 +2713,8 @@
if (seizeCacheRecord(signal) != 0) {
return;
}//if
+
+ CRASH_INSERTION(8063);
TcConnectRecord * const regTcPtr = tcConnectptr.p;
CacheRecord * const regCachePtr = cachePtr.p;
@@ -4571,9 +4573,7 @@
CLEAR_ERROR_INSERT_VALUE;
return;
}//if
- if (ERROR_INSERTED(8030)) {
- systemErrorLab(signal, __LINE__);
- }//if
+ CRASH_INSERTION(8030);
if (ERROR_INSERTED(8025)) {
SET_ERROR_INSERT_VALUE(8026);
return;
--- 1.43/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2008-04-25 08:30:41 +02:00
+++ 1.44/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2008-04-25 08:30:41 +02:00
@@ -2605,28 +2605,37 @@
progError(__LINE__, 0, buf);
systemErrorLab(signal, __LINE__);
}//if
- if (cpresident == getOwnNodeId()) {
+
+ if (cpresident == getOwnNodeId())
+ {
jam();
- /**
- * Prepare a NFCompleteRep and send to all connected API's
- * They can then abort all transaction waiting for response from
- * the failed node
- */
- NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
- nfComp->blockNo = QMGR_REF;
- nfComp->nodeId = getOwnNodeId();
- nfComp->failedNodeId = failedNodePtr.i;
+
+ CRASH_INSERTION(936);
+ }
- for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
+ /**
+ * Prepare a NFCompleteRep and send to all connected API's
+ * They can then abort all transaction waiting for response from
+ * the failed node
+ *
+ * NOTE: This is sent from all nodes, as otherwise we would need
+ * take-over if cpresident dies befor sending this
+ */
+ NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
+ nfComp->blockNo = QMGR_REF;
+ nfComp->nodeId = getOwnNodeId();
+ nfComp->failedNodeId = failedNodePtr.i;
+
+ for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
+ {
+ jam();
+ ptrAss(nodePtr, nodeRec);
+ if (nodePtr.p->phase == ZAPI_ACTIVE){
jam();
- ptrAss(nodePtr, nodeRec);
- if (nodePtr.p->phase == ZAPI_ACTIVE){
- jam();
- sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBA);
- }//if
- }//for
- }
+ sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
+ NFCompleteRep::SignalLength, JBA);
+ }//if
+ }//for
return;
}//Qmgr::execNDB_FAILCONF()
@@ -3651,9 +3660,17 @@
jam();
NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
}//if
- sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
- NodeFailRep::SignalLength, JBB);
-
+
+ if (ERROR_INSERTED(936))
+ {
+ sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
+ 200, NodeFailRep::SignalLength);
+ }
+ else
+ {
+ sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
+ NodeFailRep::SignalLength, JBB);
+ }
guard0 = cnoCommitFailedNodes - 1;
arrGuard(guard0, MAX_NDB_NODES);
/**--------------------------------------------------------------------
--- 1.32/storage/ndb/src/ndbapi/ClusterMgr.cpp 2008-04-25 08:30:41 +02:00
+++ 1.33/storage/ndb/src/ndbapi/ClusterMgr.cpp 2008-04-25 08:30:41 +02:00
@@ -417,8 +417,11 @@
const NodeId nodeId = nfComp->failedNodeId;
assert(nodeId > 0 && nodeId < MAX_NDB_NODES);
- theFacade.ReportNodeFailureComplete(nodeId);
- theNodes[nodeId].nfCompleteRep = true;
+ if (theNodes[nodeId].nfCompleteRep == false)
+ {
+ theFacade.ReportNodeFailureComplete(nodeId);
+ theNodes[nodeId].nfCompleteRep = true;
+ }
}
void
| Thread |
|---|
| • bk commit into 5.1 tree (jonas:1.2203) BUG#36245 | jonas | 25 Apr |