From: Date: December 16 2008 9:51pm Subject: bzr commit into mysql-5.1 branch (tomas.ulin:2774) Bug#41462 List-Archive: http://lists.mysql.com/commits/62087 X-Bug: 41462 Message-Id: <20081216205159.41A4B6110E7@linux.local> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit #At file:///home/tomas/mysql_src/mysql-5.1-telco-6.2-merge/ 2774 Tomas Ulin 2008-12-16 Bug #41462 Mysqld/ndbapi disconnects too agressively during node restart modified: storage/ndb/src/ndbapi/ClusterMgr.cpp storage/ndb/src/ndbapi/ClusterMgr.hpp storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp storage/ndb/src/ndbapi/Ndbif.cpp === modified file 'storage/ndb/src/ndbapi/ClusterMgr.cpp' --- a/storage/ndb/src/ndbapi/ClusterMgr.cpp 2008-04-25 07:14:29 +0000 +++ b/storage/ndb/src/ndbapi/ClusterMgr.cpp 2008-12-16 20:51:49 +0000 @@ -556,8 +556,7 @@ ClusterMgr::reportNodeFailed(NodeId node theFacade.ReportNodeDead(nodeId); } - theNode.nfCompleteRep = false; - if(noOfAliveNodes == 0) + if (noOfConnectedNodes == 0) { if (!global_flag_skip_invalidate_cache && theFacade.m_globalDictCache) @@ -568,6 +567,10 @@ ClusterMgr::reportNodeFailed(NodeId node m_connect_count ++; m_cluster_state = CS_waiting_for_clean_cache; } + } + theNode.nfCompleteRep = false; + if(noOfAliveNodes == 0) + { NFCompleteRep rep; for(Uint32 i = 1; isenderRef)); + + if (unlikely(m_active_op_count == 0)) + { + return; + } } DBUG_ENTER_EVENT("NdbEventBuffer::execSUB_GCP_COMPLETE_REP"); @@ -2089,13 +2096,15 @@ NdbEventBuffer::report_node_connected(Ui } void -NdbEventBuffer::report_node_failure(Uint32 node_id) +NdbEventBuffer::report_node_failure_completed(Uint32 node_id) { + m_alive_node_bit_mask.clear(node_id); + NdbEventOperation* op= m_ndb->getEventOperation(0); if (op == 0) return; - DBUG_ENTER("NdbEventBuffer::report_node_failure"); + DBUG_ENTER("NdbEventBuffer::report_node_failure_completed"); SubTableData data; LinearSectionPtr ptr[3]; bzero(&data, sizeof(data)); @@ -2110,7 +2119,7 @@ NdbEventBuffer::report_node_failure(Uint data.flags = SubTableData::LOG; Uint64 gci = Uint64((m_latestGCI >> 32) + 1) << 32; - find_max_known_gci(&gci); + bool found = find_max_known_gci(&gci); data.gci_hi = Uint32(gci >> 32); data.gci_lo = Uint32(gci); @@ -2120,21 +2129,15 @@ NdbEventBuffer::report_node_failure(Uint */ // no need to lock()/unlock(), receive thread calls this insert_event(&op->m_impl, data, ptr, data.senderData); - DBUG_VOID_RETURN; -} - -void -NdbEventBuffer::completeClusterFailed() -{ - NdbEventOperation* op= m_ndb->getEventOperation(0); - if (op == 0) - return; - DBUG_ENTER("NdbEventBuffer::completeClusterFailed"); + if (!m_alive_node_bit_mask.isclear()) + DBUG_VOID_RETURN; + /* + * Cluster failure + */ - Uint64 gci = Uint64((m_latestGCI >> 32) + 1) << 32; - bool found = find_max_known_gci(&gci); + DBUG_PRINT("info", ("Cluster failure")); Uint64 * array = m_known_gci.getBase(); Uint32 mask = m_known_gci.size() - 1; @@ -2169,18 +2172,10 @@ NdbEventBuffer::completeClusterFailed() /** * Inject new event */ - SubTableData data; - LinearSectionPtr ptr[3]; - bzero(&data, sizeof(data)); - bzero(ptr, sizeof(ptr)); - data.tableId = ~0; data.requestInfo = 0; SubTableData::setOperation(data.requestInfo, NdbDictionary::Event::_TE_CLUSTER_FAILURE); - data.flags = SubTableData::LOG; - data.gci_hi = Uint32(gci >> 32); - data.gci_lo = Uint32(gci); /** * Insert this event for each operation @@ -2212,7 +2207,7 @@ NdbEventBuffer::completeClusterFailed() rep.gci_lo= gci & 0xFFFFFFFF; rep.gcp_complete_rep_count= cnt; rep.flags = 0; - execSUB_GCP_COMPLETE_REP(&rep, SubGcpCompleteRep::SignalLength); + execSUB_GCP_COMPLETE_REP(&rep, SubGcpCompleteRep::SignalLength, 1); DBUG_VOID_RETURN; } === modified file 'storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp' --- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp 2008-02-11 13:24:17 +0000 +++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp 2008-12-16 20:51:49 +0000 @@ -424,8 +424,7 @@ public: and added to all event ops listed as active or pending delete in m_dropped_ev_op using insertDataL, includeing the blob event ops referenced by a regular event op. - - NdbEventBuffer::report_node_failure - - NdbEventBuffer::completeClusterFailed + - NdbEventBuffer::report_node_failure_completed TE_ACTIVE is sent from the kernel on initial execute/start of the event op, but is also internally generetad on node connect like @@ -528,12 +527,12 @@ public: int insertDataL(NdbEventOperationImpl *op, const SubTableData * const sdata, Uint32 len, LinearSectionPtr ptr[3]); - void execSUB_GCP_COMPLETE_REP(const SubGcpCompleteRep * const, Uint32 len); + void execSUB_GCP_COMPLETE_REP(const SubGcpCompleteRep * const, Uint32 len, + int complete_cluster_failure= 0); void complete_outof_order_gcis(); void report_node_connected(Uint32 node_id); - void report_node_failure(Uint32 node_id); - void completeClusterFailed(); + void report_node_failure_completed(Uint32 node_id); // used by user thread Uint64 getLatestGCI(); @@ -664,6 +663,8 @@ private: void complete_bucket(Gci_container*); bool find_max_known_gci(Uint64 * res) const; void resize_known_gci(); + + Bitmask<(unsigned int)_NDB_NODE_BITMASK_SIZE> m_alive_node_bit_mask; }; inline === modified file 'storage/ndb/src/ndbapi/Ndbif.cpp' --- a/storage/ndb/src/ndbapi/Ndbif.cpp 2008-11-13 13:15:56 +0000 +++ b/storage/ndb/src/ndbapi/Ndbif.cpp 2008-12-16 20:51:49 +0000 @@ -269,13 +269,7 @@ Ndb::report_node_failure_completed(Uint3 { // node failed // eventOperations in the ndb object should be notified - theEventBuffer->report_node_failure(node_id); - if(!theImpl->m_transporter_facade->theClusterMgr->isClusterAlive()) - { - // cluster is unavailable, - // eventOperations in the ndb object should be notified - theEventBuffer->completeClusterFailed(); - } + theEventBuffer->report_node_failure_completed(node_id); } abortTransactionsAfterNodeFailure(node_id);