#At file:///home/msvensson/mysql/6.3-bug45899/ based on revid:jonas@strippedrqfv8drfi1wk7
2993 Magnus Blåudd 2009-07-03
Bug#45899 api nodes (re-)connecting to starting data nodes too fast ...
- The ndbd starts to accept incoming connections from other nodes quite early
in startup, initially only ndbd and mgm nodes are allowed to connect. During
this time all api nodes will quickly be disconnected, since the api nodes
will retry connect quite rapidly this may exhaust the available ports in the
machine.
- Since all incoming connections are handled by a separate thread, we can
allow it to sleep and wait for a little while. Hopefully the incoming connection
will be allowed after the 300*300ms pool loop. But even if that is not the case
the rate of connection attempts have been reduced.
modified:
storage/ndb/include/transporter/TransporterRegistry.hpp
storage/ndb/src/common/transporter/TransporterRegistry.cpp
=== modified file 'storage/ndb/include/transporter/TransporterRegistry.hpp'
--- a/storage/ndb/include/transporter/TransporterRegistry.hpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/include/transporter/TransporterRegistry.hpp 2009-07-03 10:04:32 +0000
@@ -370,6 +370,8 @@ private:
int m_shm_own_pid;
int m_transp_count;
+
+ bool allow_wait_connecting() const;
};
inline void
=== modified file 'storage/ndb/src/common/transporter/TransporterRegistry.cpp'
--- a/storage/ndb/src/common/transporter/TransporterRegistry.cpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/common/transporter/TransporterRegistry.cpp 2009-07-03 10:04:32 +0000
@@ -217,6 +217,19 @@ TransporterRegistry::init(NodeId nodeId)
}
bool
+TransporterRegistry::allow_wait_connecting() const {
+ DBUG_ENTER("TransporterRegistry::allow_wait_connecting");
+
+ // Check if any node is in connecting
+ for (unsigned i=0; i<maxTransporters; i++) {
+ if (performStates[i] == CONNECTING)
+ DBUG_RETURN(false);
+ }
+
+ DBUG_RETURN(true);
+}
+
+bool
TransporterRegistry::connect_server(NDB_SOCKET_TYPE sockfd)
{
DBUG_ENTER("TransporterRegistry::connect_server");
@@ -256,9 +269,20 @@ TransporterRegistry::connect_server(NDB_
DBUG_RETURN(false);
}
- //check that the transporter should be connected
- if (performStates[nodeId] != TransporterRegistry::CONNECTING) {
- DBUG_PRINT("error", ("Transporter in wrong state for this node id from client"));
+ // check that the transporter should be connected
+ int loops = 0;
+ while (performStates[nodeId] != TransporterRegistry::CONNECTING) {
+ DBUG_PRINT("info", ("Transporter[%d] is not in CONNECTING", nodeId));
+ if (allow_wait_connecting() && loops++ < 300)
+ {
+ DBUG_PRINT("info", ("Allowing connection to wait for CONNECTING"));
+ g_eventLogger->info("Connection from node %d is waiting for CONNECTING",
+ nodeId);
+ NdbSleep_MilliSleep(300);
+ continue;
+ }
+
+ DBUG_PRINT("error", ("Disconnecting node %d, not CONNECTING"));
DBUG_RETURN(false);
}
Attachment: [text/bzr-bundle] bzr/magnus.blaudd@sun.com-20090703100432-t6weotm8rcyelou3.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.3 branch (magnus.blaudd:2993)Bug#45899 | Magnus Blåudd | 3 Jul |