From: Pekka Nousiainen Date: June 4 2011 2:08pm Subject: bzr push into mysql-5.1-telco-7.0-wl4124-new0 branch (pekka.nousiainen:4388 to 4389) List-Archive: http://lists.mysql.com/commits/138666 Message-Id: <20110604140856.573745586E@sama.localdomain> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4389 Pekka Nousiainen 2011-06-04 [merge] merge main to wl4124-new0 modified: storage/ndb/include/transporter/TransporterRegistry.hpp storage/ndb/src/common/transporter/Transporter.cpp storage/ndb/src/common/transporter/Transporter.hpp storage/ndb/src/common/transporter/TransporterRegistry.cpp storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp storage/ndb/src/kernel/blocks/suma/Suma.cpp storage/ndb/src/kernel/blocks/suma/Suma.hpp storage/ndb/src/kernel/blocks/trix/Trix.cpp storage/ndb/src/kernel/blocks/trix/Trix.hpp storage/ndb/src/mgmsrv/MgmtSrvr.cpp storage/ndb/src/mgmsrv/MgmtSrvr.hpp storage/ndb/src/mgmsrv/Services.cpp storage/ndb/test/ndbapi/testRestartGci.cpp storage/ndb/test/run-test/atrt-backtrace.sh storage/ndb/test/run-test/atrt-gather-result.sh storage/ndb/test/run-test/atrt-setup.sh storage/ndb/test/run-test/autotest-boot.sh storage/ndb/test/run-test/autotest-run.sh 4388 Jonas Oreland 2011-05-31 [merge] ndb - merge 70-main back into mysql-5.1-telco-7.0-wl4124-new0 modified: mysql-test/suite/ndb/r/ndb_restore_misc.result mysql-test/suite/ndb/t/ndb_restore_misc.test storage/ndb/include/kernel/signaldata/ScanTab.hpp storage/ndb/include/mgmapi/mgmapi_config_parameters.h storage/ndb/include/ndb_constants.h storage/ndb/include/ndbapi/NdbDictionary.hpp storage/ndb/include/ndbapi/NdbScanOperation.hpp storage/ndb/src/common/debugger/signaldata/ScanTab.cpp storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp storage/ndb/src/kernel/blocks/dbtux/DbtuxDebug.cpp storage/ndb/src/kernel/blocks/dbtux/DbtuxGen.cpp storage/ndb/src/kernel/blocks/dbutil/DbUtil.cpp storage/ndb/src/kernel/blocks/suma/Suma.cpp storage/ndb/src/kernel/blocks/trix/Trix.cpp storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp storage/ndb/src/ndbapi/ndberror.c storage/ndb/tools/restore/Restore.cpp storage/ndb/tools/restore/consumer_restore.cpp === modified file 'storage/ndb/include/transporter/TransporterRegistry.hpp' --- a/storage/ndb/include/transporter/TransporterRegistry.hpp 2011-04-09 15:48:21 +0000 +++ b/storage/ndb/include/transporter/TransporterRegistry.hpp 2011-06-01 07:40:49 +0000 @@ -118,7 +118,7 @@ public: NOTE! Connection should be closed if function returns false */ - bool connect_server(NDB_SOCKET_TYPE sockfd) const; + bool connect_server(NDB_SOCKET_TYPE sockfd, BaseString& errormsg) const; bool connect_client(NdbMgmHandle *h); === modified file 'storage/ndb/src/common/transporter/Transporter.cpp' --- a/storage/ndb/src/common/transporter/Transporter.cpp 2011-02-01 23:27:25 +0000 +++ b/storage/ndb/src/common/transporter/Transporter.cpp 2011-06-01 07:40:49 +0000 @@ -130,18 +130,25 @@ Transporter::configure(const Transporter bool -Transporter::connect_server(NDB_SOCKET_TYPE sockfd) { +Transporter::connect_server(NDB_SOCKET_TYPE sockfd, + BaseString& msg) { // all initial negotiation is done in TransporterRegistry::connect_server DBUG_ENTER("Transporter::connect_server"); - if(m_connected) + if (m_connected) + { + msg.assfmt("line: %u : already connected ??", __LINE__); DBUG_RETURN(false); + } // Cache the connect address my_socket_connect_address(sockfd, &m_connect_address); if (!connect_server_impl(sockfd)) + { + msg.assfmt("line: %u : connect_server_impl failed", __LINE__); DBUG_RETURN(false); + } m_connected = true; === modified file 'storage/ndb/src/common/transporter/Transporter.hpp' --- a/storage/ndb/src/common/transporter/Transporter.hpp 2011-02-01 23:27:25 +0000 +++ b/storage/ndb/src/common/transporter/Transporter.hpp 2011-06-01 07:40:49 +0000 @@ -49,7 +49,7 @@ public: */ virtual bool connect_client(); bool connect_client(NDB_SOCKET_TYPE sockfd); - bool connect_server(NDB_SOCKET_TYPE socket); + bool connect_server(NDB_SOCKET_TYPE socket, BaseString& errormsg); /** * Blocking === modified file 'storage/ndb/src/common/transporter/TransporterRegistry.cpp' --- a/storage/ndb/src/common/transporter/TransporterRegistry.cpp 2011-04-09 15:48:21 +0000 +++ b/storage/ndb/src/common/transporter/TransporterRegistry.cpp 2011-06-01 07:40:49 +0000 @@ -64,7 +64,8 @@ SocketServer::Session * TransporterServi DBUG_RETURN(0); } - if (!m_transporter_registry->connect_server(sockfd)) + BaseString msg; + if (!m_transporter_registry->connect_server(sockfd, msg)) { NDB_CLOSE_SOCKET(sockfd); DBUG_RETURN(0); @@ -305,7 +306,8 @@ TransporterRegistry::init(NodeId nodeId) } bool -TransporterRegistry::connect_server(NDB_SOCKET_TYPE sockfd) const +TransporterRegistry::connect_server(NDB_SOCKET_TYPE sockfd, + BaseString & msg) const { DBUG_ENTER("TransporterRegistry::connect_server(sockfd)"); @@ -314,6 +316,7 @@ TransporterRegistry::connect_server(NDB_ SocketInputStream s_input(sockfd); char buf[11+1+11+1]; // if (s_input.gets(buf, sizeof(buf)) == 0) { + msg.assfmt("line: %u : Failed to get nodeid from client", __LINE__); DBUG_PRINT("error", ("Failed to read 'hello' from client")); DBUG_RETURN(false); } @@ -328,6 +331,7 @@ TransporterRegistry::connect_server(NDB_ // ok, but with no checks on transporter configuration compatability break; default: + msg.assfmt("line: %u : Incorrect reply from client: >%s<", __LINE__, buf); DBUG_PRINT("error", ("Failed to parse 'hello' from client, buf: '%.*s'", (int)sizeof(buf), buf)); DBUG_RETURN(false); @@ -341,6 +345,7 @@ TransporterRegistry::connect_server(NDB_ if (nodeId < 0 || nodeId >= (int)maxTransporters) { + msg.assfmt("line: %u : Incorrect reply from client: >%s<", __LINE__, buf); DBUG_PRINT("error", ("Out of range nodeId: %d from client", nodeId)); DBUG_RETURN(false); @@ -350,6 +355,8 @@ TransporterRegistry::connect_server(NDB_ Transporter *t= theTransporters[nodeId]; if (t == 0) { + msg.assfmt("line: %u : Incorrect reply from client: >%s<, node: %u", + __LINE__, buf, nodeId); DBUG_PRINT("error", ("No transporter available for node id %d", nodeId)); DBUG_RETURN(false); } @@ -357,6 +364,11 @@ TransporterRegistry::connect_server(NDB_ // Check that the transporter should be connecting if (performStates[nodeId] != TransporterRegistry::CONNECTING) { + msg.assfmt("line: %u : Incorrect state for node %u state: %s (%u)", + __LINE__, nodeId, + getPerformStateString(performStates[nodeId]), + performStates[nodeId]); + DBUG_PRINT("error", ("Transporter for node id %d in wrong state", nodeId)); DBUG_RETURN(false); @@ -376,15 +388,21 @@ TransporterRegistry::connect_server(NDB_ SocketOutputStream s_output(sockfd); if (s_output.println("%d %d", t->getLocalNodeId(), t->m_type) < 0) { + msg.assfmt("line: %u : Failed to reply to connecting socket (node: %u)", + __LINE__, nodeId); DBUG_PRINT("error", ("Send of reply failed")); DBUG_RETURN(false); } // Setup transporter (transporter responsible for closing sockfd) - bool res = t->connect_server(sockfd); + bool res = t->connect_server(sockfd, msg); if (res && performStates[nodeId] != TransporterRegistry::CONNECTING) { + msg.assfmt("line: %u : Incorrect state for node %u state: %s (%u)", + __LINE__, nodeId, + getPerformStateString(performStates[nodeId]), + performStates[nodeId]); // Connection suceeded, but not connecting anymore, return // false to close the connection DBUG_RETURN(false); === modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp' --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2011-05-31 08:28:58 +0000 +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2011-05-31 12:28:59 +0000 @@ -1204,11 +1204,16 @@ void Dbtc::handleApiFailState(Signal* si capiConnectClosing[TfailedApiNode]--; releaseApiCon(signal, TapiConnectptr); TlocalApiConnectptr.p->apiFailState = ZFALSE; - if (capiConnectClosing[TfailedApiNode] == 0) { + if (capiConnectClosing[TfailedApiNode] == 0) + { jam(); - signal->theData[0] = TfailedApiNode; - signal->theData[1] = cownref; - sendSignal(capiFailRef, GSN_API_FAILCONF, signal, 2, JBB); + + /** + * Perform block-level cleanups (e.g assembleFragments...) + */ + Callback cb = {safe_cast(&Dbtc::apiFailBlockCleanupCallback), + TfailedApiNode}; + simBlockNodeFailure(signal, TfailedApiNode, cb); }//if }//Dbtc::handleApiFailState() @@ -8629,7 +8634,7 @@ Dbtc::apiFailBlockCleanupCallback(Signal jamEntry(); signal->theData[0] = failedNodeId; - signal->theData[1] = cownref; + signal->theData[1] = reference(); sendSignal(capiFailRef, GSN_API_FAILCONF, signal, 2, JBB); } @@ -13136,6 +13141,21 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal) return; } #endif + + if (arg == 7019 && signal->getLength() == 2) + { + jam(); + Uint32 nodeId = signal->theData[1]; + if (nodeId < MAX_NODES && nodeId < NDB_ARRAY_SIZE(capiConnectClosing)) + { + warningEvent(" DBTC: capiConnectClosing[%u]: %u", + nodeId, capiConnectClosing[nodeId]); + } + else + { + warningEvent(" DBTC: dump-7019 to unknown node: %u", nodeId); + } + } }//Dbtc::execDUMP_STATE_ORD() void Dbtc::execDBINFO_SCANREQ(Signal *signal) === modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp' --- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2011-04-15 13:52:53 +0000 +++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2011-05-31 12:28:59 +0000 @@ -77,6 +77,7 @@ #endif +#define QMGR_MAX_FAIL_STATE_BLOCKS 5 class Qmgr : public SimulatedBlock { public: @@ -90,10 +91,7 @@ public: NORMAL = 0, WAITING_FOR_CLOSECOMCONF_ACTIVE = 1, /* Node had phase ZAPI_ACTIVE */ WAITING_FOR_CLOSECOMCONF_NOTACTIVE = 2, /* Node had phase != ZAPI_ACTIVE */ - WAITING_FOR_FAILCONF1 = 3, - WAITING_FOR_FAILCONF2 = 4, - WAITING_FOR_FAILCONF3 = 5, - WAITING_FOR_FAILCONF4 = 7, + WAITING_FOR_API_FAILCONF = 3, WAITING_FOR_NDB_FAILCONF = 6 }; @@ -215,8 +213,9 @@ public: BlockReference blockRef; Uint64 m_secret; Uint64 m_alloc_timeout; + Uint16 m_failconf_blocks[QMGR_MAX_FAIL_STATE_BLOCKS]; - NodeRec() { } + NodeRec() { bzero(m_failconf_blocks, sizeof(m_failconf_blocks)); } }; /* p2c: size = 52 bytes */ typedef Ptr NodeRecPtr; @@ -484,7 +483,9 @@ private: const NodeId theNodes[]); void handleApiCloseComConf(Signal* signal); - + void add_failconf_block(NodeRecPtr, Uint32 block); + bool remove_failconf_block(NodeRecPtr, Uint32 block); + bool is_empty_failconf_block(NodeRecPtr) const; /* Wait this time until we try to join the */ /* cluster again */ === modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp' --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2011-05-24 15:06:09 +0000 +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2011-05-31 12:28:59 +0000 @@ -2928,23 +2928,71 @@ void Qmgr::checkStartInterface(Signal* s if(((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 60) == 0) { jam(); - char buf[100]; - BaseString::snprintf(buf, sizeof(buf), - "Failure handling of node %d has not completed" - " in %d min - state = %d", - nodePtr.i, - (getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1)/60, - nodePtr.p->failState); - warningEvent("%s", buf); - if (((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 300) == 0) + char buf[256]; + if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB) { jam(); - /** - * Also dump DIH nf-state - */ - signal->theData[0] = 7019; - signal->theData[1] = nodePtr.i; - sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB); + BaseString::snprintf(buf, sizeof(buf), + "Failure handling of node %d has not completed" + " in %d min - state = %d", + nodePtr.i, + (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60, + nodePtr.p->failState); + warningEvent("%s", buf); + if (((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 300) == 0) + { + jam(); + /** + * Also dump DIH nf-state + */ + signal->theData[0] = 7019; + signal->theData[1] = nodePtr.i; + sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB); + } + } + else + { + jam(); + BaseString::snprintf(buf, sizeof(buf), + "Failure handling of api %u has not completed" + " in %d min - state = %d", + nodePtr.i, + (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60, + nodePtr.p->failState); + warningEvent("%s", buf); + if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF) + { + jam(); + compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5); + BaseString::snprintf(buf, sizeof(buf), + " Waiting for blocks: %u %u %u %u %u", + nodePtr.p->m_failconf_blocks[0], + nodePtr.p->m_failconf_blocks[1], + nodePtr.p->m_failconf_blocks[2], + nodePtr.p->m_failconf_blocks[3], + nodePtr.p->m_failconf_blocks[4]); + warningEvent("%s", buf); + + for (Uint32 i = 0; im_failconf_blocks); + i++) + { + jam(); + if (nodePtr.p->m_failconf_blocks[i] != 0) + { + jam(); + signal->theData[0] = 7019; + signal->theData[1] = nodePtr.i; + sendSignal(numberToRef(nodePtr.p->m_failconf_blocks[i], + getOwnNodeId()), + GSN_DUMP_STATE_ORD, signal, 2, JBB); + } + else + { + jam(); + break; + } + } + } } } } @@ -2969,7 +3017,7 @@ void Qmgr::sendApiFailReq(Signal* signal { jamEntry(); signal->theData[0] = failedNodeNo; - signal->theData[1] = QMGR_REF; + signal->theData[1] = QMGR_REF; /* We route the ApiFailReq signals via CMVMI * This is done to ensure that they are received after @@ -2982,12 +3030,18 @@ void Qmgr::sendApiFailReq(Signal* signal &signal->theData[0], 2)); SectionHandle handle(this, routedSignalSectionI); - + /* RouteOrd data */ RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0]; routeOrd->srcRef = reference(); routeOrd->gsn = GSN_API_FAILREQ; + NodeRecPtr failedNodePtr; + failedNodePtr.i = failedNodeNo; + ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); + failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF; + + /* Send ROUTE_ORD signals to CMVMI via JBA * CMVMI will then immediately send the API_FAILREQ * signals to the destination block(s) using JBB @@ -2997,16 +3051,20 @@ void Qmgr::sendApiFailReq(Signal* signal */ if (!sumaOnly) { + jam(); + add_failconf_block(failedNodePtr, DBTC); routeOrd->dstRef = DBTC_REF; sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBA, &handle); + add_failconf_block(failedNodePtr, DBDICT); routeOrd->dstRef = DBDICT_REF; sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBA, &handle); + add_failconf_block(failedNodePtr, DBSPJ); routeOrd->dstRef = DBSPJ_REF; sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, @@ -3014,11 +3072,11 @@ void Qmgr::sendApiFailReq(Signal* signal } /* Suma always notified */ + add_failconf_block(failedNodePtr, SUMA); routeOrd->dstRef = SUMA_REF; sendSignal(CMVMI_REF, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBA, &handle); - }//Qmgr::sendApiFailReq() void Qmgr::execAPI_FAILREQ(Signal* signal) @@ -3042,35 +3100,118 @@ void Qmgr::execAPI_FAILCONF(Signal* sign failedNodePtr.i = signal->theData[0]; ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); - if (failedNodePtr.p->failState == WAITING_FOR_FAILCONF1) + Uint32 block = refToMain(signal->theData[1]); + if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF || + !remove_failconf_block(failedNodePtr, block)) + { + jam(); + ndbout << "execAPI_FAILCONF from " << block + << " failedNodePtr.p->failState = " + << (Uint32)(failedNodePtr.p->failState) + << " blocks: "; + for (Uint32 i = 0;im_failconf_blocks);i++) + { + printf("%u ", failedNodePtr.p->m_failconf_blocks[i]); + } + ndbout << endl; + systemErrorLab(signal, __LINE__); + }//if + + if (is_empty_failconf_block(failedNodePtr)) { jam(); - failedNodePtr.p->failState = WAITING_FOR_FAILCONF2; + failedNodePtr.p->failState = NORMAL; + + /** + * When we set this state, connection will later be opened + * in checkStartInterface + */ + } + return; +}//Qmgr::execAPI_FAILCONF() + +void +Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block) +{ + // Check that it does not already exists!! + Uint32 pos = 0; + for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++) + { + jam(); + if (nodePtr.p->m_failconf_blocks[pos] == 0) + { + jam(); + break; + } + else if (nodePtr.p->m_failconf_blocks[pos] == block) + { + jam(); + break; + } } - else if (failedNodePtr.p->failState == WAITING_FOR_FAILCONF2) + + ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks)); + ndbassert(nodePtr.p->m_failconf_blocks[pos] != block); + if (nodePtr.p->m_failconf_blocks[pos] == block) { jam(); - failedNodePtr.p->failState = WAITING_FOR_FAILCONF3; + /** + * Already in list!! + */ +#ifdef ERROR_INSERT + ndbrequire(false); +#endif + return; } - else if (failedNodePtr.p->failState == WAITING_FOR_FAILCONF3) + ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0); + nodePtr.p->m_failconf_blocks[pos] = block; +} + +bool +Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block) +{ + // Check that it does exists!! + Uint32 pos = 0; + for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++) { jam(); - failedNodePtr.p->failState = WAITING_FOR_FAILCONF4; + if (nodePtr.p->m_failconf_blocks[pos] == 0) + { + jam(); + break; + } + else if (nodePtr.p->m_failconf_blocks[pos] == block) + { + jam(); + break; + } } - else if (failedNodePtr.p->failState == WAITING_FOR_FAILCONF4) + + if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) || + nodePtr.p->m_failconf_blocks[pos] != block) { jam(); - failedNodePtr.p->failState = NORMAL; + /** + * Not found!! + */ + return false; } - else + + nodePtr.p->m_failconf_blocks[pos] = 0; + for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++) { jam(); - ndbout << "failedNodePtr.p->failState = " - << (Uint32)(failedNodePtr.p->failState) << endl; - systemErrorLab(signal, __LINE__); - }//if - return; -}//Qmgr::execAPI_FAILCONF() + nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos]; + } + + return true; +} + +bool +Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const +{ + return nodePtr.p->m_failconf_blocks[0] == 0; +} void Qmgr::execNDB_FAILCONF(Signal* signal) { @@ -3999,7 +4140,6 @@ void Qmgr::handleApiCloseComConf(Signal* */ jam(); sendApiFailReq(signal, nodeId, false); // !sumaOnly - failedNodePtr.p->failState = WAITING_FOR_FAILCONF1; arbitRec.code = ArbitCode::ApiFail; handleArbitApiFail(signal, nodeId); } @@ -4010,7 +4150,6 @@ void Qmgr::handleApiCloseComConf(Signal* */ jam(); sendApiFailReq(signal, nodeId, true); // sumaOnly - failedNodePtr.p->failState = WAITING_FOR_FAILCONF4; } if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM) === modified file 'storage/ndb/src/kernel/blocks/suma/Suma.cpp' --- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp 2011-05-31 08:28:58 +0000 +++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp 2011-05-31 12:28:59 +0000 @@ -906,6 +906,7 @@ void Suma::execAPI_FAILREQ(Signal* signa c_failedApiNodes.set(failedApiNode); c_subscriber_nodes.clear(failedApiNode); c_subscriber_per_node[failedApiNode] = 0; + c_failedApiNodesState[failedApiNode] = __LINE__; check_start_handover(signal); @@ -925,6 +926,8 @@ CONF: signal->theData[1] = reference(); sendSignal(QMGR_REF, GSN_API_FAILCONF, signal, 2, JBB); + c_failedApiNodesState[failedApiNode] = 0; + DBUG_VOID_RETURN; }//execAPI_FAILREQ() @@ -946,6 +949,7 @@ Suma::api_fail_block_cleanup_callback(Si signal->theData[1] = reference(); sendSignal(QMGR_REF, GSN_API_FAILCONF, signal, 2, JBB); c_failedApiNodes.clear(failedNodeId); + c_failedApiNodesState[failedNodeId] = 0; } void @@ -953,9 +957,11 @@ Suma::api_fail_block_cleanup(Signal* sig { jam(); + c_failedApiNodesState[failedNode] = __LINE__; + Callback cb = {safe_cast(&Suma::api_fail_block_cleanup_callback), failedNode}; - + simBlockNodeFailure(signal, failedNode, cb); } @@ -984,6 +990,7 @@ Suma::api_fail_gci_list(Signal* signal, c_gcp_list.release(gcp); + c_failedApiNodesState[nodeId] = __LINE__; signal->theData[0] = SumaContinueB::API_FAIL_GCI_LIST; signal->theData[1] = nodeId; sendSignal(SUMA_REF, GSN_CONTINUEB, signal, 2, JBB); @@ -1006,11 +1013,13 @@ Suma::api_fail_gci_list(Signal* signal, Ptr subOpPtr; if (c_subOpPool.seize(subOpPtr)) { + c_failedApiNodesState[nodeId] = __LINE__; signal->theData[2] = subOpPtr.i; sendSignal(SUMA_REF, GSN_CONTINUEB, signal, 6, JBB); } else { + c_failedApiNodesState[nodeId] = __LINE__; sendSignal(SUMA_REF, GSN_CONTINUEB, signal, 3, JBB); } @@ -1046,6 +1055,7 @@ Suma::api_fail_subscriber_list(Signal* s { jam(); sendSignal(SUMA_REF, GSN_CONTINUEB, signal, 3, JBB); + c_failedApiNodesState[nodeId] = __LINE__; return; } } @@ -1064,6 +1074,7 @@ Suma::api_fail_subscriber_list(Signal* s { jam(); c_subscriptions.first(iter); + c_failedApiNodesState[nodeId] = __LINE__; } else { @@ -1079,6 +1090,7 @@ Suma::api_fail_subscriber_list(Signal* s * We restart from this bucket :-( */ c_subscriptions.next(bucket, iter); + c_failedApiNodesState[nodeId] = __LINE__; } else { @@ -1090,6 +1102,7 @@ Suma::api_fail_subscriber_list(Signal* s { jam(); api_fail_block_cleanup(signal, nodeId); + c_failedApiNodesState[nodeId] = __LINE__; return; } @@ -1104,11 +1117,18 @@ Suma::api_fail_subscriber_list(Signal* s if (empty) { + jam(); + c_failedApiNodesState[nodeId] = __LINE__; signal->theData[0] = SumaContinueB::API_FAIL_SUBSCRIPTION; signal->theData[1] = subOpPtr.i; signal->theData[2] = RNIL; sendSignal(SUMA_REF, GSN_CONTINUEB, signal, 3, JBB); } + else + { + jam(); + c_failedApiNodesState[nodeId] = __LINE__; + } } void @@ -1169,6 +1189,7 @@ Suma::api_fail_subscription(Signal* sign if (!ptr.isNull()) { jam(); + c_failedApiNodesState[nodeId] = __LINE__; signal->theData[0] = SumaContinueB::API_FAIL_SUBSCRIPTION; signal->theData[1] = subOpPtr.i; signal->theData[2] = ptr.i; @@ -1187,6 +1208,8 @@ Suma::api_fail_subscription(Signal* sign if (c_subscriptions.next(iter)) { + jam(); + c_failedApiNodesState[nodeId] = __LINE__; signal->theData[0] = SumaContinueB::API_FAIL_SUBSCRIBER_LIST; signal->theData[1] = nodeId; signal->theData[2] = subOpPtr.i; @@ -1748,6 +1771,29 @@ Suma::execDUMP_STATE_ORD(Signal* signal) sendSignalWithDelay(reference(), GSN_DUMP_STATE_ORD, signal, 100, 2); return; } + + if (tCase == 7019 && signal->getLength() == 2) + { + jam(); + Uint32 nodeId = signal->theData[1]; + if (nodeId < MAX_NODES) + { + warningEvent(" Suma 7019 %u line: %u", nodeId, + c_failedApiNodesState[nodeId]); + warningEvent(" c_connected_nodes.get(): %u", + c_connected_nodes.get(nodeId)); + warningEvent(" c_failedApiNodes.get(): %u", + c_failedApiNodes.get(nodeId)); + warningEvent(" c_subscriber_nodes.get(): %u", + c_subscriber_nodes.get(nodeId)); + warningEvent(" c_subscriber_per_node[%u]: %u", + nodeId, c_subscriber_per_node[nodeId]); + } + else + { + warningEvent(" SUMP: dump-7019 to unknown node: %u", nodeId); + } + } } void Suma::execDBINFO_SCANREQ(Signal *signal) === modified file 'storage/ndb/src/kernel/blocks/suma/Suma.hpp' --- a/storage/ndb/src/kernel/blocks/suma/Suma.hpp 2011-05-19 09:38:03 +0000 +++ b/storage/ndb/src/kernel/blocks/suma/Suma.hpp 2011-05-31 12:28:59 +0000 @@ -369,7 +369,8 @@ public: Uint32 c_maxBufferedEpochs; NodeBitmask c_failedApiNodes; - + Uint32 c_failedApiNodesState[MAX_NODES]; + /** * Functions */ === modified file 'storage/ndb/src/kernel/blocks/trix/Trix.cpp' --- a/storage/ndb/src/kernel/blocks/trix/Trix.cpp 2011-05-31 08:28:58 +0000 +++ b/storage/ndb/src/kernel/blocks/trix/Trix.cpp 2011-05-31 12:35:28 +0000 @@ -1785,7 +1785,8 @@ Trix::statMetaGetHeadCB(Signal* signal, if (ret != 0) { jam(); - statOpError(signal, stat, ret, __LINE__); + Uint32 supress[] = { GetTabInfoRef::TableNotDefined, 0 }; + statOpError(signal, stat, ret, __LINE__, supress); return; } g_statMetaHead.tableId = meta.m_conf.tableId; @@ -2057,8 +2058,8 @@ Trix::statUtilPrepareConf(Signal* signal util.m_prepareId = utilConf->prepareId; const Uint32 ot = send.m_operationType; - if (ERROR_INSERTED(18011) && ot == UtilPrepareReq::Read || - ERROR_INSERTED(18012) && ot != UtilPrepareReq::Read) + if ((ERROR_INSERTED(18011) && ot == UtilPrepareReq::Read) || + (ERROR_INSERTED(18012) && ot != UtilPrepareReq::Read)) { jam(); CLEAR_ERROR_INSERT_VALUE; @@ -2226,7 +2227,7 @@ Trix::statUtilReleaseConf(Signal* signal void Trix::statReadHeadDone(Signal* signal, StatOp& stat) { - StatOp::Data& data = stat.m_data; + //UNUSED StatOp::Data& data = stat.m_data; D("statReadHeadDone" << V(stat)); switch (stat.m_requestType) { @@ -2470,9 +2471,9 @@ Trix::statCleanExecute(Signal* signal, S releaseSections(handle); const Uint32 rt = stat.m_requestType; - if (ERROR_INSERTED(18021) && rt == IndexStatReq::RT_CLEAN_NEW || - ERROR_INSERTED(18022) && rt == IndexStatReq::RT_CLEAN_OLD || - ERROR_INSERTED(18023) && rt == IndexStatReq::RT_CLEAN_ALL) + if ((ERROR_INSERTED(18021) && rt == IndexStatReq::RT_CLEAN_NEW) || + (ERROR_INSERTED(18022) && rt == IndexStatReq::RT_CLEAN_OLD) || + (ERROR_INSERTED(18023) && rt == IndexStatReq::RT_CLEAN_ALL)) { jam(); CLEAR_ERROR_INSERT_VALUE; @@ -2891,7 +2892,7 @@ Trix::statDataPtr(StatOp& stat, Uint32 i const SysTable& sysTable = *send.m_sysTable; ndbrequire(i < sysTable.columnCount); - const SysColumn& c = sysTable.columnList[i]; + //UNUSED const SysColumn& c = sysTable.columnList[i]; if (&sysTable == &g_statMetaHead) { @@ -3077,12 +3078,24 @@ Trix::statOpConf(Signal* signal, StatOp& void Trix::statOpError(Signal* signal, StatOp& stat, - Uint32 errorCode, Uint32 errorLine) + Uint32 errorCode, Uint32 errorLine, + const Uint32 * supress) { D("statOpError" << V(stat) << V(errorCode) << V(errorLine)); + if (supress) + { + for (Uint32 i = 0; supress[i] != 0; i++) + { + if (errorCode == supress[i]) + { + goto do_supress; + } + } + } statOpEvent(stat, "W", "error %u line %u", errorCode, errorLine); +do_supress: ndbrequire(stat.m_errorCode == 0); stat.m_errorCode = errorCode; stat.m_errorLine = errorLine; @@ -3133,7 +3146,7 @@ Trix::statOpRef(Signal* signal, const In void Trix::statOpEvent(StatOp& stat, const char* level, const char* msg, ...) { - const IndexStatImplReq* req = &stat.m_req; + //UNUSED const IndexStatImplReq* req = &stat.m_req; StatOp::Data& data = stat.m_data; char tmp1[100]; === modified file 'storage/ndb/src/kernel/blocks/trix/Trix.hpp' --- a/storage/ndb/src/kernel/blocks/trix/Trix.hpp 2011-05-30 08:24:14 +0000 +++ b/storage/ndb/src/kernel/blocks/trix/Trix.hpp 2011-05-31 12:31:39 +0000 @@ -408,7 +408,8 @@ private: // conf and ref void statOpSuccess(Signal*, StatOp&); void statOpConf(Signal*, StatOp&); - void statOpError(Signal*, StatOp&, Uint32 errorCode, Uint32 errorLine); + void statOpError(Signal*, StatOp&, Uint32 errorCode, Uint32 errorLine, + const Uint32 * supress = 0); void statOpAbort(Signal*, StatOp&); void statOpRef(Signal*, StatOp&); void statOpRef(Signal*, const IndexStatImplReq*, Uint32 errorCode, Uint32 errorLine); === modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.cpp' --- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2011-05-12 09:26:38 +0000 +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2011-06-01 07:40:49 +0000 @@ -3763,11 +3763,12 @@ MgmtSrvr::getConnectionDbParameter(int n } -bool MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd) +bool +MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd, BaseString& msg) { DBUG_ENTER("MgmtSrvr::transporter_connect"); TransporterRegistry* tr= theFacade->get_registry(); - if (!tr->connect_server(sockfd)) + if (!tr->connect_server(sockfd, msg)) DBUG_RETURN(false); /* === modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.hpp' --- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp 2011-04-15 08:09:04 +0000 +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp 2011-06-01 07:40:49 +0000 @@ -345,7 +345,7 @@ public: int getConnectionDbParameter(int node1, int node2, int param, int *value, BaseString& msg); - bool transporter_connect(NDB_SOCKET_TYPE sockfd); + bool transporter_connect(NDB_SOCKET_TYPE sockfd, BaseString& errormsg); const char *get_connect_address(Uint32 node_id); void get_connected_nodes(NodeBitmask &connected_nodes) const; === modified file 'storage/ndb/src/mgmsrv/Services.cpp' --- a/storage/ndb/src/mgmsrv/Services.cpp 2011-04-07 10:55:42 +0000 +++ b/storage/ndb/src/mgmsrv/Services.cpp 2011-06-01 07:40:49 +0000 @@ -1786,13 +1786,14 @@ void MgmApiSession::transporter_connect(Parser_t::Context &ctx, Properties const &args) { - if (!m_mgmsrv.transporter_connect(m_socket)) + BaseString errormsg; + if (!m_mgmsrv.transporter_connect(m_socket, errormsg)) { // Connection not allowed or failed g_eventLogger->warning("Failed to convert connection " - "from '%s' to transporter", - name()); - + "from '%s' to transporter: %s", + name(), + errormsg.c_str()); // Close the socket to indicate failure to other side } else === modified file 'storage/ndb/test/ndbapi/testRestartGci.cpp' --- a/storage/ndb/test/ndbapi/testRestartGci.cpp 2011-05-23 16:13:34 +0000 +++ b/storage/ndb/test/ndbapi/testRestartGci.cpp 2011-05-31 10:54:22 +0000 @@ -116,7 +116,7 @@ int runInsertRememberGci(NDBT_Context* c int i = 0; ndbout_c("Inserting %u records", records); - Uint64 minGci = 0xffffffffffffffff; + Uint64 minGci = ~Uint64(0); Uint64 maxGci = 0; Uint32 numAuthorBits = ctx->getTab()->getExtraRowAuthorBits(); Uint32 authorMask = (1 << numAuthorBits) -1; === modified file 'storage/ndb/test/run-test/atrt-backtrace.sh' --- a/storage/ndb/test/run-test/atrt-backtrace.sh 2011-02-02 00:40:07 +0000 +++ b/storage/ndb/test/run-test/atrt-backtrace.sh 2011-06-01 08:55:03 +0000 @@ -16,7 +16,7 @@ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # Does not work on Windows (gcc only) -if uname | grep -iq cygwin +if [ `uname | grep -ic cygwin || true` -ne 0 ] then exit fi === modified file 'storage/ndb/test/run-test/atrt-gather-result.sh' --- a/storage/ndb/test/run-test/atrt-gather-result.sh 2011-04-08 11:06:53 +0000 +++ b/storage/ndb/test/run-test/atrt-gather-result.sh 2011-06-01 11:52:51 +0000 @@ -21,12 +21,13 @@ mkdir -p result cd result rm -rf * -if uname | grep -iq cygwin; then +if [ `uname | grep -ic cygwin || true` -ne 0 ] +then while [ $# -gt 0 ] do SAVE_IFS=$IFS IFS=":" - declare -a ARR=($1) + declare -a ARR="($1)" IFS=$SAVE_IFS DIR=`dirname "${ARR[1]}"` REMOTE_DIR=`cygpath -u $DIR` === modified file 'storage/ndb/test/run-test/atrt-setup.sh' --- a/storage/ndb/test/run-test/atrt-setup.sh 2011-04-08 11:06:53 +0000 +++ b/storage/ndb/test/run-test/atrt-setup.sh 2011-06-01 08:55:03 +0000 @@ -20,7 +20,7 @@ LOCAL_DIR=$2 REMOTE_DIR=$3 verbose= -if uname | grep -iq cygwin +if [ `uname | grep -ic cygwin || true` -ne 0 ] then verbose=1 fi @@ -46,7 +46,7 @@ then exit 0; fi -if uname | grep -iq cygwin +if [ `uname | grep -ic cygwin || true` -ne 0 ] then LOCAL_DIR=`cygpath -u $LOCAL_DIR` REMOTE_DIR=`cygpath -u $REMOTE_DIR` === modified file 'storage/ndb/test/run-test/autotest-boot.sh' --- a/storage/ndb/test/run-test/autotest-boot.sh 2011-02-01 23:27:25 +0000 +++ b/storage/ndb/test/run-test/autotest-boot.sh 2011-06-01 08:55:03 +0000 @@ -31,7 +31,7 @@ VERSION="autotest-boot.sh version 1.01" DATE=`date '+%Y-%m-%d'` if [ `uname -s` != "SunOS" ] then - if uname | grep -iq cygwin + if [ `uname | grep -ic cygwin || true` -ne 0 ] then HOST=`hostname` else @@ -247,7 +247,7 @@ then if [ -z "$clone1" ] then cd $dst_place0 - if uname | grep -iq cygwin + if [ `uname | grep -ic cygwin || true` -ne 0 ] then install_dir_dos=`cygpath -w $install_dir` cmd /c cscript win/configure.js WITH_NDBCLUSTER_STORAGE_ENGINE --without-plugins=archive,blackhole,example,federated === modified file 'storage/ndb/test/run-test/autotest-run.sh' --- a/storage/ndb/test/run-test/autotest-run.sh 2011-02-01 23:27:25 +0000 +++ b/storage/ndb/test/run-test/autotest-run.sh 2011-06-01 08:55:03 +0000 @@ -31,7 +31,7 @@ VERSION="autotest-run.sh version 1.00" DATE=`date '+%Y-%m-%d'` if [ `uname -s` != "SunOS" ] then - if uname | grep -iq cygwin + if [ `uname | grep -ic cygwin || true` -ne 0 ] then HOST=`hostname` # Returns windows CRLF @@ -245,8 +245,8 @@ choose_conf(){ count_hosts(){ ch="CHOOSE_host" - cnt=$(for i in `grep $ch $1 | sed 's!,! !g'` ; do echo $i; done\ - | grep $ch | sort | uniq | wc -l) + list=`grep $ch $1 | sed 's!,! !g'` + cnt=`for i in $list; do echo $i; done | grep $ch | sort | uniq | wc -l` echo $cnt } @@ -278,7 +278,7 @@ rm -rf $res_dir/* $run_dir/* cd $run_dir mkdir run -if uname | grep -iq cygwin +if [ `uname | grep -ic cygwin || true` -ne 0 ] then run_dir=`cygpath -m $run_dir` install_dir0=`cygpath -u $install_dir0` No bundle (reason: useless for push emails).