From: Date: February 14 2007 5:05am Subject: bk commit into 5.0 tree (tomas:1.2388) BUG#26293 List-Archive: http://lists.mysql.com/commits/19822 X-Bug: 26293 Message-Id: <20070214040553.B3380640F3@poseidon.mysql.com> Below is the list of changes that have just been committed into a local 5.0 repository of tomas. When tomas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2388 07/02/14 11:05:38 tomas@stripped +7 -0 Bug#26293 cluster mgmt node sometimes doesn't receive events from all nodes on restart - signals where sometimes sent too early when setting up subscriptions ndb/src/ndbapi/SignalSender.hpp 1.3 07/02/14 11:05:31 tomas@stripped +1 -1 manke metchd const ndb/src/ndbapi/SignalSender.cpp 1.9 07/02/14 11:05:31 tomas@stripped +2 -0 assert to see that node is sendable when signal is sent ndb/src/ndbapi/ClusterMgr.hpp 1.14 07/02/14 11:05:31 tomas@stripped +1 -0 added status variable m_api_reg_conf in cluster manager to correctly be able to determine if a node is sendable ndb/src/ndbapi/ClusterMgr.cpp 1.32 07/02/14 11:05:31 tomas@stripped +4 -1 added status variable m_api_reg_conf in cluster manager to correctly be able to determine if a node is sendable ndb/src/mgmsrv/MgmtSrvr.cpp 1.112 07/02/14 11:05:31 tomas@stripped +74 -20 bug in that signals where sent prior to api reg conf arrived, causing thrown away signals and subsequent hangs in mgmtserver also add retry if node connected but not yet received api reg conf ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 1.31 07/02/14 11:05:31 tomas@stripped +19 -1 added dump for active subscriptions in cmvmi ndb/include/kernel/signaldata/DumpStateOrd.hpp 1.11 07/02/14 11:05:31 tomas@stripped +4 -0 added dump for active subscriptions in cmvmi # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: tomas # Host: poseidon.mysql.com # Root: /home/tomas/mysql-5.0-telco-gca --- 1.10/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2007-01-12 03:12:09 +07:00 +++ 1.11/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2007-02-14 11:05:31 +07:00 @@ -107,6 +107,10 @@ public: CmvmiDumpLongSignalMemory = 2601, CmvmiSetRestartOnErrorInsert = 2602, CmvmiTestLongSigWithDelay = 2603, + CmvmiDumpSubscriptions = 2604, /* note: done to respective outfile + to be able to debug if events + for some reason does not end up + in clusterlog */ // 7000 DIH // 7001 DIH // 7002 DIH --- 1.30/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 2007-01-18 03:18:46 +07:00 +++ 1.31/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 2007-02-14 11:05:31 +07:00 @@ -897,7 +897,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* sign case TimeToWaitAlive: // QMGR - case HeartbeatIntervalDbDb: // TODO ev till Ndbcnt också + case HeartbeatIntervalDbDb: // TODO possibly Ndbcnt too case HeartbeatIntervalDbApi: case ArbitTimeout: sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB); @@ -1105,6 +1105,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal } } + if (arg == DumpStateOrd::CmvmiDumpSubscriptions) + { + SubscriberPtr ptr; + subscribers.first(ptr); + g_eventLogger.info("List subscriptions:"); + while(ptr.i != RNIL) + { + g_eventLogger.info("Subscription: %u, nodeId: %u, ref: 0x%x", + ptr.i, refToNode(ptr.p->blockRef), ptr.p->blockRef); + for(Uint32 i = 0; i < LogLevel::LOGLEVEL_CATEGORIES; i++) + { + Uint32 level = ptr.p->logLevel.getLogLevel((LogLevel::EventCategory)i); + g_eventLogger.info("Category %u Level %u", i, level); + } + subscribers.next(ptr); + } + } + if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), --- 1.111/ndb/src/mgmsrv/MgmtSrvr.cpp 2007-01-23 11:44:36 +07:00 +++ 1.112/ndb/src/mgmsrv/MgmtSrvr.cpp 2007-02-14 11:05:31 +07:00 @@ -704,7 +704,7 @@ int MgmtSrvr::okToSendTo(NodeId nodeId, return WRONG_PROCESS_TYPE; // Check if we have contact with it if(unCond){ - if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) + if(theFacade->theClusterMgr->getNodeInfo(nodeId).m_api_reg_conf) return 0; } else if (theFacade->get_node_alive(nodeId) == true) @@ -1562,32 +1562,85 @@ MgmtSrvr::status(int nodeId, } int -MgmtSrvr::setEventReportingLevelImpl(int nodeId, +MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg, const EventSubscribeReq& ll) { SignalSender ss(theFacade); - ss.lock(); - - SimpleSignal ssig; - EventSubscribeReq * dst = - CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend()); - ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ, - EventSubscribeReq::SignalLength); - *dst = ll; - - NodeBitmask nodes; + NdbNodeBitmask nodes; + int retries = 30; nodes.clear(); - Uint32 max = (nodeId == 0) ? (nodeId = 1, MAX_NDB_NODES) : nodeId; - for(; (Uint32) nodeId <= max; nodeId++) + while (1) { - if (nodeTypes[nodeId] != NODE_TYPE_DB) - continue; - if (okToSendTo(nodeId, true)) - continue; - if (ss.sendSignal(nodeId, &ssig) == SEND_OK) + Uint32 nodeId, max; + ss.lock(); + SimpleSignal ssig; + EventSubscribeReq * dst = + CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend()); + ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ, + EventSubscribeReq::SignalLength); + *dst = ll; + + if (nodeId_arg == 0) + { + // all nodes + nodeId = 1; + max = MAX_NDB_NODES; + } + else { - nodes.set(nodeId); + // only one node + max = nodeId = nodeId_arg; + } + // first make sure nodes are sendable + for(; nodeId <= max; nodeId++) + { + if (nodeTypes[nodeId] != NODE_TYPE_DB) + continue; + if (okToSendTo(nodeId, true)) + { + if (theFacade->theClusterMgr->getNodeInfo(nodeId).connected == false) + { + // node not connected we can safely skip this one + continue; + } + // api_reg_conf not recevied yet, need to retry + break; + } } + if (nodeId <= max) + { + if (--retries) + { + ss.unlock(); + NdbSleep_MilliSleep(100); + continue; + } + return SEND_OR_RECEIVE_FAILED; + } + + if (nodeId_arg == 0) + { + // all nodes + nodeId = 1; + max = MAX_NDB_NODES; + } + else + { + // only one node + max = nodeId = nodeId_arg; + } + // now send to all sendable nodes nodes + // note, lock is held, so states have not changed + for(; (Uint32) nodeId <= max; nodeId++) + { + if (nodeTypes[nodeId] != NODE_TYPE_DB) + continue; + if (theFacade->theClusterMgr->getNodeInfo(nodeId).connected == false) + continue; // node is not connected, skip + if (ss.sendSignal(nodeId, &ssig) == SEND_OK) + nodes.set(nodeId); + } + break; } if (nodes.isclear()) @@ -1598,6 +1651,7 @@ MgmtSrvr::setEventReportingLevelImpl(int int error = 0; while (!nodes.isclear()) { + Uint32 nodeId; SimpleSignal *signal = ss.waitFor(); int gsn = signal->readSignalNumber(); nodeId = refToNode(signal->header.theSendersBlockRef); --- 1.31/ndb/src/ndbapi/ClusterMgr.cpp 2007-01-23 11:44:36 +07:00 +++ 1.32/ndb/src/ndbapi/ClusterMgr.cpp 2007-02-14 11:05:31 +07:00 @@ -327,7 +327,7 @@ ClusterMgr::showState(NodeId nodeId){ ClusterMgr::Node::Node() : m_state(NodeState::SL_NOTHING) { compatible = nfCompleteRep = true; - connected = defined = m_alive = false; + connected = defined = m_alive = m_api_reg_conf = false; m_state.m_connected_nodes.clear(); } @@ -401,6 +401,8 @@ ClusterMgr::execAPI_REGCONF(const Uint32 node.m_info.m_version); } + node.m_api_reg_conf = true; + node.m_state = apiRegConf->nodeState; if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED || node.m_state.startLevel == NodeState::SL_SINGLEUSER)){ @@ -519,6 +521,7 @@ ClusterMgr::reportDisconnected(NodeId no noOfConnectedNodes--; theNodes[nodeId].connected = false; + theNodes[nodeId].m_api_reg_conf = false; theNodes[nodeId].m_state.m_connected_nodes.clear(); reportNodeFailed(nodeId, true); --- 1.13/ndb/src/ndbapi/ClusterMgr.hpp 2007-01-23 11:44:36 +07:00 +++ 1.14/ndb/src/ndbapi/ClusterMgr.hpp 2007-02-14 11:05:31 +07:00 @@ -65,6 +65,7 @@ public: bool compatible; // Version is compatible bool nfCompleteRep; // NF Complete Rep has arrived bool m_alive; // Node is alive + bool m_api_reg_conf;// API_REGCONF has arrived NodeInfo m_info; NodeState m_state; --- 1.8/ndb/src/ndbapi/SignalSender.cpp 2007-01-23 11:44:36 +07:00 +++ 1.9/ndb/src/ndbapi/SignalSender.cpp 2007-02-14 11:05:31 +07:00 @@ -140,6 +140,8 @@ SignalSender::getNoOfConnectedNodes() co SendStatus SignalSender::sendSignal(Uint16 nodeId, const SimpleSignal * s){ + assert(getNodeInfo(nodeId).m_api_reg_conf == true || + s->readSignalNumber() == GSN_API_REGREQ); return theFacade->theTransporterRegistry->prepareSend(&s->header, 1, // JBB &s->theData[0], --- 1.2/ndb/src/ndbapi/SignalSender.hpp 2006-12-24 02:04:18 +07:00 +++ 1.3/ndb/src/ndbapi/SignalSender.hpp 2007-02-14 11:05:31 +07:00 @@ -32,7 +32,7 @@ public: Uint32 theData[25]; LinearSectionPtr ptr[3]; - int readSignalNumber() {return header.theVerId_signalNumber; } + int readSignalNumber() const {return header.theVerId_signalNumber; } Uint32 *getDataPtrSend() { return theData; } const Uint32 *getDataPtr() const { return theData; }