From: Ole John Aske Date: November 8 2010 12:13pm Subject: bzr push into mysql-5.1-telco-7.0-spj-scan-vs-scan branch (ole.john.aske:3342 to 3343) List-Archive: http://lists.mysql.com/commits/123077 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============1034254926==" --===============1034254926== MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline 3343 Ole John Aske 2010-11-08 spj-svs: Introduce usage of 'Node sequence' as part of nodefailure detection. Implemented based on current implementation in NdbScanOperation.cpp modified: storage/ndb/src/ndbapi/NdbQueryOperation.cpp 3342 Ole John Aske 2010-11-08 spj-svs: Corrected a potential mutex issue accessing 'm_finalBatchFrags' within ::nextRootResult) m_finalBatchFrags requires the PullGuard mutex to be locked before accesing it. However, in ::nextRootResult() it was accessed wo/ mutex. Moved the mutex violating logic inside :awaitMoreResults() where the mutex is locked, and extended the return values from ::awaitMoreResults() to include 'FetchResult_noMoreCache'. modified: storage/ndb/src/ndbapi/NdbQueryOperation.cpp storage/ndb/src/ndbapi/NdbQueryOperationImpl.hpp === modified file 'storage/ndb/src/ndbapi/NdbQueryOperation.cpp' --- a/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2010-11-08 09:38:33 +0000 +++ b/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2010-11-08 12:12:08 +0000 @@ -1793,12 +1793,12 @@ NdbQueryImpl::awaitMoreResults(bool forc assert (m_scanTransaction); assert (m_state==Executing); - Ndb* const ndb = m_transaction.getNdb(); + NdbImpl* const ndb = m_transaction.getNdb()->theImpl; { /* This part needs to be done under mutex due to synchronization with * receiver thread. */ - PollGuard poll_guard(*ndb->theImpl); + PollGuard poll_guard(*ndb); /* There may be pending (asynchronous received, mutex protected) errors * from TC / datanodes. Propogate these into m_error.code in 'API space'. @@ -1829,31 +1829,27 @@ NdbQueryImpl::awaitMoreResults(bool forc : FetchResult_noMoreData; } + TransporterFacade* tp = ndb->m_transporter_facade; + const Uint32 timeout = ndb->get_waitfor_timeout(); + const Uint32 nodeId = m_transaction.getConnectedNodeId(); + const Uint32 seq = m_transaction.theNodeSequence; + /* More results are on the way, so we wait for them.*/ const FetchResult waitResult = static_cast - (poll_guard.wait_scan(3*ndb->theImpl->get_waitfor_timeout(), - m_transaction.getConnectedNodeId(), + (poll_guard.wait_scan(3*timeout, + nodeId, forceSend)); - switch (waitResult) { - case FetchResult_ok: // SCAN_TABREF, may have setFetchTerminated() w/ errors - break; - case FetchResult_timeOut: + if (tp->getNodeSequence(nodeId) != seq) + setFetchTerminated(Err_NodeFailCausedAbort,false); + else if (likely(waitResult == FetchResult_ok)) + continue; + else if (waitResult == FetchResult_timeOut) setFetchTerminated(Err_ReceiveTimedOut,false); - break; - case FetchResult_nodeFail: + else setFetchTerminated(Err_NodeFailCausedAbort,false); - break; - default: - assert(false); - } - assert (!m_error.code); - /* Getting here is not an error. PollGuard::wait_scan() will return - * when a complete batch (for a fragment) is available for *any* active - * scan in this transaction. So we must wait again for the next arriving - * batch. - */ + assert (!m_error.code); } // while(!hasReceivedError()) } // Terminates scope of 'PollGuard' @@ -2636,7 +2632,9 @@ NdbQueryImpl::sendFetchMore(NdbRootFragm secs[ScanNextReq::ReceiverIdsSectionNum].sectionIter = &receiverIdIter; secs[ScanNextReq::ReceiverIdsSectionNum].sz = 1; - TransporterFacade* const facade = ndb.theImpl->m_transporter_facade; + TransporterFacade* const tp = ndb.theImpl->m_transporter_facade; + Uint32 nodeId = m_transaction.getConnectedNodeId(); + Uint32 seq = m_transaction.theNodeSequence; /* This part needs to be done under mutex due to synchronization with * receiver thread. @@ -2648,29 +2646,24 @@ NdbQueryImpl::sendFetchMore(NdbRootFragm // Errors arrived inbetween ::await released mutex, and fetchMore grabbed it return -1; } - const int res = - facade->sendSignal(&tSignal, - getNdbTransaction().getConnectedNodeId(), - secs, - 1); - if (unlikely(res == -1)) + if (tp->getNodeSequence(nodeId) != seq || + tp->sendSignal(&tSignal, nodeId, secs, 1) != 0) { setErrorCode(Err_NodeFailCausedAbort); return -1; } - m_pendingFrags++; - assert(m_pendingFrags <= getRootFragCount()); - if (forceSend) { // Flush signals to TC. - facade->forceSend(ndb.theNdbBlockNumber); + tp->forceSend(ndb.theNdbBlockNumber); } else { - facade->checkForceSend(ndb.theNdbBlockNumber); + tp->checkForceSend(ndb.theNdbBlockNumber); } + m_pendingFrags++; + assert(m_pendingFrags <= getRootFragCount()); return 0; } // NdbQueryImpl::sendFetchMore() @@ -2679,36 +2672,38 @@ NdbQueryImpl::closeTcCursor(bool forceSe { assert (m_queryDef.isScanQuery()); - Ndb* const ndb = m_transaction.getNdb(); - Uint32 timeout = ndb->theImpl->get_waitfor_timeout(); + NdbImpl* const ndb = m_transaction.getNdb()->theImpl; + + TransporterFacade* tp = ndb->m_transporter_facade; + const Uint32 timeout = ndb->get_waitfor_timeout(); + const Uint32 nodeId = m_transaction.getConnectedNodeId(); + const Uint32 seq = m_transaction.theNodeSequence; /* This part needs to be done under mutex due to synchronization with * receiver thread. */ - PollGuard poll_guard(*ndb->theImpl); + PollGuard poll_guard(*ndb); -//Uint32 seq = m_transaction->theNodeSequence; -//if (seq != tp->getNodeSequence(m_transaction.getConnectedNodeId())) // TODO -//{} + if (unlikely(tp->getNodeSequence(nodeId) != seq)) + { + setErrorCode(Err_NodeFailCausedAbort); + return -1; // Transporter disconnected and reconnected, no need to close + } /* Wait for outstanding scan results from current batch fetch */ while (m_pendingFrags > 0) { - const FetchResult waitResult = static_cast - (poll_guard.wait_scan(3*timeout, - m_transaction.getConnectedNodeId(), - forceSend)); - switch (waitResult) { - case FetchResult_ok: // SCAN_TABREF, may have setFetchTerminated() w/ errors - break; - case FetchResult_timeOut: - setFetchTerminated(Err_ReceiveTimedOut,false); - break; - case FetchResult_nodeFail: + const FetchResult result = static_cast + (poll_guard.wait_scan(3*timeout, nodeId, forceSend)); + + if (unlikely(tp->getNodeSequence(nodeId) != seq)) setFetchTerminated(Err_NodeFailCausedAbort,false); - break; - default: - assert(false); + else if (unlikely(result != FetchResult_ok)) + { + if (result == FetchResult_timeOut) + setFetchTerminated(Err_ReceiveTimedOut,false); + else + setFetchTerminated(Err_NodeFailCausedAbort,false); } if (hasReceivedError()) { @@ -2733,21 +2728,17 @@ NdbQueryImpl::closeTcCursor(bool forceSe /* Wait for close to be confirmed: */ while (m_pendingFrags > 0) { - const FetchResult waitResult = static_cast - (poll_guard.wait_scan(3*timeout, - m_transaction.getConnectedNodeId(), - forceSend)); - switch (waitResult) { - case FetchResult_ok: - break; - case FetchResult_timeOut: - setFetchTerminated(Err_ReceiveTimedOut,false); - break; - case FetchResult_nodeFail: + const FetchResult result = static_cast + (poll_guard.wait_scan(3*timeout, nodeId, forceSend)); + + if (unlikely(tp->getNodeSequence(nodeId) != seq)) setFetchTerminated(Err_NodeFailCausedAbort,false); - break; - default: - assert(false); + if (unlikely(result != FetchResult_ok)) + { + if (result == FetchResult_timeOut) + setFetchTerminated(Err_ReceiveTimedOut,false); + else + setFetchTerminated(Err_NodeFailCausedAbort,false); } if (hasReceivedError()) { --===============1034254926== MIME-Version: 1.0 Content-Type: text/bzr-bundle; charset="us-ascii"; name="bzr/ole.john.aske@stripped" Content-Transfer-Encoding: 7bit Content-Disposition: inline # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: ole.john.aske@stripped\ # itigmpnjo5cvk395 # target_branch: file:///home/oleja/mysql/mysql-5.1-telco-7.0-spj-\ # scan-scan/ # testament_sha1: 122e9363c53a725e0b910735532097dc9f354477 # timestamp: 2010-11-08 13:13:09 +0100 # source_branch: bzr+ssh://oaske@stripped/bzrroot/server\ # /mysql-5.1-telco-7.0-spj/ # base_revision_id: ole.john.aske@stripped\ # bf0qh8yqksrnmcvj # # Begin bundle IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWX0ZSCsAA3V/gFUQAARx9/// e7H+AL////5gB52vpet7HpUIA6K6AA6KYaUNJk0GRoNB6jRiAA0A0AANACUQTRtJiU9NSNDQaDTQ ANAAAAAhQTPSgxADRiaAAAADINAYmDERNNFTPU2RqZT1PKHkCNpGRkZBhBowA4yZNGIYmmAgYE0w RgmJppoAMIJIgQJo0Ewp6amTSeDSZT9Sn6o/TU2mlPTUepkbyUoABGbD5MbsXtcVyt5Xg7MKXZKj 9eoV+leKzZ65xFlcpRjKX+lRpJkqYvrhSTnqWbLpUZC+jUmNgq5HFBpiEEaUAdh5syZ2hBMKmc6J Ro0WEAj2BEgEP1ogwwJeRjifyRSDG4yPMxMkfifL9crMwI3y/dGm/Be12vVRdTTAYVFBTYNS04tI Qd4tF2RyJK9bKqkwoRm6DuHcGEYweOO3ZXSEP8cANqFwOrgnKhaJOUGGCzraUV2KlX8RfWfj1jFm ZrbKrwOOI6LrOJHCyZI0w35AUQQBMhkiBezwkAiBEsBiUG2cIg0vT+rt1lh2F8bbKTLrxmWlTdE0 w6O7ubYSBkyGamLnBla0302uREZimGwKBYNcTUAgjlGAOXG0aaMbFhERRIMhicIQQeUbMRSSSQRX EcCKsyCdFWFNpqUZpOBU9LJJGlIZAyRkTqxfocNJXK+rEYTBcjLpYVTGJ3m3Brz3lKDiJpGQfDrG cURFhImFoRUG6Q/ou8LnQSYK2pjrJQxSqbHc/VMR2VCNxq5KZqCIR1EmJlHcbXPylfWQ6jYm+TnZ N5UqqIdiT+DcbXz21WBnbTDUBUCjWClZQtJUZJOMlhDPVjYMCsu6CvXd0K0Iw5gNDo2dNUGbPhmD ue2hFgYe98CWIZQ3OksMGkaq7PUej23SKksNpI455RxriXRTCzlTV46idbWccW0xnftCVeBOw6BM ZTFn6cqy3ynDjeqMSMImmQ2Di7nInCoaaJkhSm+T0hBa1eTNlTNbqmztEpayRqty9J4cwbFqiYZv Hg9RNlIQLgoDlTovrAYxzHSsxxg/W6tmMScFklHQUuqCltnlp1HGLGPGcm9awSu2SxycUVCT0WSx IYhLFa8heM4yTMMMmYr2n6mb5LtB0eG9nRFDE6yrXvYkHV2UOT3MJVpFAo97Jj6cx/GYOa1U6xPB QCs8hhRQ3VQkkqxlbImQH0YP/f/oZFRFFadPDiipmuy+wsOoRCiFSM5IiOd7IMwt4yzIldU6JOui t0WsqjYkORD31B+e0+yZf2rSLQLhUcgiYT9Urz4tBDYq0Uwd0lk4eqV4px70ihmLENYrA8A79R8G SO0fh+sDAchIDHphcdYe/3Sw+4VkegSI8Cw17DPkFxh/Te814+NxaGVSMhdKZLYkrvrieXyIJGr9 NyRxYU39PTUzMwfOoJpWAR3B4i/CkHEXM5cMQ5chBFlxwOezJpxTMTwag8DWQGP/CIDCYKG19ZlX VudQBJs/wN48rpmCO7dnaC9adAzIgh1FJHlJjynIfpgoc7GPdckaaHO80g+IE1WiAHmX3o5SRRtN foRzZtaL1VCkBODK85dd1TeelPbVU9YlBK73sNbd3lmXuWQHGMpA1EWXm8ZSxYlwGoaQGLYaNsEt YT8UPcuS0uDjGGEblUJkGciKOmQjEpWigEhJAddcGQwwkfiQQSbERYG1Xm3mA5AYPiXb0uCJpXCM iNnV/kGAHC4CyGw1AuwwSDkoZ9rjZ/C9NHOb+t2Z97OXmaYugzbesWsHTSXQMgrXMjejR6ge2fJt lDEyEyydgYktArtZPDYSKB1mwXKkbvayaQmVYLWLjD89/TzBvzZBoPlCwMrhAtCCUPSGJIxpDkzB Gy3VNLP8yuVqTIiILrfhIVQi5vaB0roVZjC2xFYyW5iaNtk5HgibcQoUjwahRah3aKQwk7UV80c1 yCYyIA0T/QyNJxn2+lMqJ13R93ELQjKEkUC5H8uFaSikYywBk7LRB0wdXOX7e0XGeP1YJJkyBgGy F56USPLUtwazWM/Z6mxGQFbmEH7QMEwLH5jGg9EjiRzwS8AuAitXs2JQUbRwOffhnZk0CdRvG9tC SXBhUPcopVWnNbiEeYaApBBgNiZHUY3XEJk4gllA7wZRCqSKkrslcEZCAx4Y+6JZyzRyE2ChHXP6 CYLXIPcqJNQVH8G/y07IE61TuHE1gXMBhirDt4RQaOL9vsM1MUi+l4P97kjKuXfXXnS5K8AzxSLB 8RQKoCB3vMxNhaAKmNkB5IgyYYaKBoEpFQBgelIqC8R1IopOkotshUhwaeqiVJsengFQy5ykoiap iKcqalZVUFAwNERTr2HKj6WJEi5ldm9T5I2kvmg/LH7nbkEHiY0Pr2Z8+gI6g7kyY+5nMizhWItu PYtJkmtOITLxYdgH5T4XCrJHP+SA/i7kinChIPoykFY= --===============1034254926==--