#At file:///home/tomas/mysql_src/mysql-5.1-telco-6.2-merge/
2774 Tomas Ulin 2008-12-16
Bug #41462 Mysqld/ndbapi disconnects too agressively during node restart
modified:
storage/ndb/src/ndbapi/ClusterMgr.cpp
storage/ndb/src/ndbapi/ClusterMgr.hpp
storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp
storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp
storage/ndb/src/ndbapi/Ndbif.cpp
=== modified file 'storage/ndb/src/ndbapi/ClusterMgr.cpp'
--- a/storage/ndb/src/ndbapi/ClusterMgr.cpp 2008-04-25 07:14:29 +0000
+++ b/storage/ndb/src/ndbapi/ClusterMgr.cpp 2008-12-16 20:51:49 +0000
@@ -556,8 +556,7 @@ ClusterMgr::reportNodeFailed(NodeId node
theFacade.ReportNodeDead(nodeId);
}
- theNode.nfCompleteRep = false;
- if(noOfAliveNodes == 0)
+ if (noOfConnectedNodes == 0)
{
if (!global_flag_skip_invalidate_cache &&
theFacade.m_globalDictCache)
@@ -568,6 +567,10 @@ ClusterMgr::reportNodeFailed(NodeId node
m_connect_count ++;
m_cluster_state = CS_waiting_for_clean_cache;
}
+ }
+ theNode.nfCompleteRep = false;
+ if(noOfAliveNodes == 0)
+ {
NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
=== modified file 'storage/ndb/src/ndbapi/ClusterMgr.hpp'
--- a/storage/ndb/src/ndbapi/ClusterMgr.hpp 2007-05-09 14:31:16 +0000
+++ b/storage/ndb/src/ndbapi/ClusterMgr.hpp 2008-12-16 20:51:49 +0000
@@ -85,7 +85,6 @@ public:
const Node & getNodeInfo(NodeId) const;
Uint32 getNoOfConnectedNodes() const;
- bool isClusterAlive() const;
void hb_received(NodeId);
Uint32 m_connect_count;
@@ -144,11 +143,6 @@ ClusterMgr::getNoOfConnectedNodes() cons
return noOfConnectedNodes;
}
-inline
-bool
-ClusterMgr::isClusterAlive() const {
- return noOfAliveNodes != 0;
-}
inline
void
ClusterMgr::hb_received(NodeId nodeId) {
=== modified file 'storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp'
--- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp 2008-09-24 12:27:11 +0000
+++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp 2008-12-16 20:51:49 +0000
@@ -1080,6 +1080,8 @@ NdbEventBuffer::NdbEventBuffer(Ndb *ndb)
// initialize lists
bzero(&g_empty_gci_container, sizeof(Gci_container));
init_gci_containers();
+
+ m_alive_node_bit_mask.clear();
}
NdbEventBuffer::~NdbEventBuffer()
@@ -1836,11 +1838,16 @@ NdbEventBuffer::complete_bucket(Gci_cont
void
NdbEventBuffer::execSUB_GCP_COMPLETE_REP(const SubGcpCompleteRep * const rep,
- Uint32 len)
+ Uint32 len, int complete_cluster_failure)
{
- if (unlikely(m_active_op_count == 0))
+ if (!complete_cluster_failure)
{
- return;
+ m_alive_node_bit_mask.set(refToNode(rep->senderRef));
+
+ if (unlikely(m_active_op_count == 0))
+ {
+ return;
+ }
}
DBUG_ENTER_EVENT("NdbEventBuffer::execSUB_GCP_COMPLETE_REP");
@@ -2089,13 +2096,15 @@ NdbEventBuffer::report_node_connected(Ui
}
void
-NdbEventBuffer::report_node_failure(Uint32 node_id)
+NdbEventBuffer::report_node_failure_completed(Uint32 node_id)
{
+ m_alive_node_bit_mask.clear(node_id);
+
NdbEventOperation* op= m_ndb->getEventOperation(0);
if (op == 0)
return;
- DBUG_ENTER("NdbEventBuffer::report_node_failure");
+ DBUG_ENTER("NdbEventBuffer::report_node_failure_completed");
SubTableData data;
LinearSectionPtr ptr[3];
bzero(&data, sizeof(data));
@@ -2110,7 +2119,7 @@ NdbEventBuffer::report_node_failure(Uint
data.flags = SubTableData::LOG;
Uint64 gci = Uint64((m_latestGCI >> 32) + 1) << 32;
- find_max_known_gci(&gci);
+ bool found = find_max_known_gci(&gci);
data.gci_hi = Uint32(gci >> 32);
data.gci_lo = Uint32(gci);
@@ -2120,21 +2129,15 @@ NdbEventBuffer::report_node_failure(Uint
*/
// no need to lock()/unlock(), receive thread calls this
insert_event(&op->m_impl, data, ptr, data.senderData);
- DBUG_VOID_RETURN;
-}
-
-void
-NdbEventBuffer::completeClusterFailed()
-{
- NdbEventOperation* op= m_ndb->getEventOperation(0);
- if (op == 0)
- return;
- DBUG_ENTER("NdbEventBuffer::completeClusterFailed");
+ if (!m_alive_node_bit_mask.isclear())
+ DBUG_VOID_RETURN;
+ /*
+ * Cluster failure
+ */
- Uint64 gci = Uint64((m_latestGCI >> 32) + 1) << 32;
- bool found = find_max_known_gci(&gci);
+ DBUG_PRINT("info", ("Cluster failure"));
Uint64 * array = m_known_gci.getBase();
Uint32 mask = m_known_gci.size() - 1;
@@ -2169,18 +2172,10 @@ NdbEventBuffer::completeClusterFailed()
/**
* Inject new event
*/
- SubTableData data;
- LinearSectionPtr ptr[3];
- bzero(&data, sizeof(data));
- bzero(ptr, sizeof(ptr));
-
data.tableId = ~0;
data.requestInfo = 0;
SubTableData::setOperation(data.requestInfo,
NdbDictionary::Event::_TE_CLUSTER_FAILURE);
- data.flags = SubTableData::LOG;
- data.gci_hi = Uint32(gci >> 32);
- data.gci_lo = Uint32(gci);
/**
* Insert this event for each operation
@@ -2212,7 +2207,7 @@ NdbEventBuffer::completeClusterFailed()
rep.gci_lo= gci & 0xFFFFFFFF;
rep.gcp_complete_rep_count= cnt;
rep.flags = 0;
- execSUB_GCP_COMPLETE_REP(&rep, SubGcpCompleteRep::SignalLength);
+ execSUB_GCP_COMPLETE_REP(&rep, SubGcpCompleteRep::SignalLength, 1);
DBUG_VOID_RETURN;
}
=== modified file 'storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp'
--- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp 2008-02-11 13:24:17 +0000
+++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp 2008-12-16 20:51:49 +0000
@@ -424,8 +424,7 @@ public:
and added to all event ops listed as active or pending delete
in m_dropped_ev_op using insertDataL, includeing the blob
event ops referenced by a regular event op.
- - NdbEventBuffer::report_node_failure
- - NdbEventBuffer::completeClusterFailed
+ - NdbEventBuffer::report_node_failure_completed
TE_ACTIVE is sent from the kernel on initial execute/start of the
event op, but is also internally generetad on node connect like
@@ -528,12 +527,12 @@ public:
int insertDataL(NdbEventOperationImpl *op,
const SubTableData * const sdata, Uint32 len,
LinearSectionPtr ptr[3]);
- void execSUB_GCP_COMPLETE_REP(const SubGcpCompleteRep * const, Uint32 len);
+ void execSUB_GCP_COMPLETE_REP(const SubGcpCompleteRep * const, Uint32 len,
+ int complete_cluster_failure= 0);
void complete_outof_order_gcis();
void report_node_connected(Uint32 node_id);
- void report_node_failure(Uint32 node_id);
- void completeClusterFailed();
+ void report_node_failure_completed(Uint32 node_id);
// used by user thread
Uint64 getLatestGCI();
@@ -664,6 +663,8 @@ private:
void complete_bucket(Gci_container*);
bool find_max_known_gci(Uint64 * res) const;
void resize_known_gci();
+
+ Bitmask<(unsigned int)_NDB_NODE_BITMASK_SIZE> m_alive_node_bit_mask;
};
inline
=== modified file 'storage/ndb/src/ndbapi/Ndbif.cpp'
--- a/storage/ndb/src/ndbapi/Ndbif.cpp 2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/ndbapi/Ndbif.cpp 2008-12-16 20:51:49 +0000
@@ -269,13 +269,7 @@ Ndb::report_node_failure_completed(Uint3
{
// node failed
// eventOperations in the ndb object should be notified
- theEventBuffer->report_node_failure(node_id);
- if(!theImpl->m_transporter_facade->theClusterMgr->isClusterAlive())
- {
- // cluster is unavailable,
- // eventOperations in the ndb object should be notified
- theEventBuffer->completeClusterFailed();
- }
+ theEventBuffer->report_node_failure_completed(node_id);
}
abortTransactionsAfterNodeFailure(node_id);
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (tomas.ulin:2774) Bug#41462 | Tomas Ulin | 19 Dec |