List:Commits« Previous MessageNext Message »
From:jack andrews Date:July 17 2009 11:59am
Subject:bzr commit into mysql-5.1-telco-6.3 branch (jack:2999) Bug#45899
View as plain text  
#At file:///D:/repo/rate-limit-6-3-bug45899/ based on revid:magnus.blaudd@stripped

 2999 jack andrews	2009-07-17
      Bug #45899  	api nodes (re-)connecting to starting data nodes too fast ...
        . committed on behalf of magnus
      
      
      Delay further connection attempts after 3 disconnects
      during "handshake" phase. Increase the delay with number of
      rejected connects, but limit to max 10 seconds.
      
      3 failed connects -> 1 second "block"
      4                 -> 2
      ..                   ..
      13                -> 10
      
      
      NOTE! Remove printouts
      
       storage/ndb/src/common/transporter/Transporter.cpp         |   54 +++++++++++++
       storage/ndb/src/common/transporter/Transporter.hpp         |    8 +
       storage/ndb/src/common/transporter/TransporterRegistry.cpp |   12 ++
       3 files changed, 73 insertions(+), 1 deletion(-)

    modified:
      storage/ndb/src/common/transporter/Transporter.cpp
      storage/ndb/src/common/transporter/Transporter.hpp
      storage/ndb/src/common/transporter/TransporterRegistry.cpp
=== modified file 'storage/ndb/src/common/transporter/Transporter.cpp'
--- a/storage/ndb/src/common/transporter/Transporter.cpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/common/transporter/Transporter.cpp	2009-07-17 11:56:54 +0000
@@ -43,6 +43,8 @@ Transporter::Transporter(TransporterRegi
   : m_s_port(s_port), remoteNodeId(rNodeId), localNodeId(lNodeId),
     isServer(lNodeId==serverNodeId),
     m_packer(_signalId, _checksum),  isMgmConnection(_isMgmConnection),
+    m_connection_refused_counter(0),
+    m_connect_block_end(0),
     m_type(_type),
     m_transporter_registry(t_reg)
 {
@@ -181,6 +183,7 @@ Transporter::connect_client(NDB_SOCKET_T
   char buf[256];
   if (s_input.gets(buf, 256) == 0) {
     NDB_CLOSE_SOCKET(sockfd);
+    connection_refused();
     DBUG_RETURN(false);
   }
 
@@ -193,10 +196,17 @@ Transporter::connect_client(NDB_SOCKET_T
     // ok, but with no checks on transporter configuration compatability
     break;
   default:
+    connection_refused();
     NDB_CLOSE_SOCKET(sockfd);
     DBUG_RETURN(false);
   }
 
+  /*
+     At this point the server has accepted the connection
+     and any connection block should be reset
+   */
+  reset_connection_block();
+
   DBUG_PRINT("info", ("nodeId=%d remote_transporter_type=%d",
 		      nodeId, remote_transporter_type));
 
@@ -209,6 +219,7 @@ Transporter::connect_client(NDB_SOCKET_T
       NDB_CLOSE_SOCKET(sockfd);
       g_eventLogger->error("Incompatible configuration: transporter type "
                            "mismatch with node %d", nodeId);
+      ndbout_c("ratelimit: wrong transporter type");
       DBUG_RETURN(false);
     }
   }
@@ -241,3 +252,46 @@ Transporter::doDisconnect() {
   m_connected= false;
   disconnectImpl();
 }
+
+static const Uint32 MAX_BLOCK_TIME = 10; // seconds
+static const Uint32 MIN_CONNECTIONS_REFUSED = 3;
+
+void
+Transporter::connection_refused()
+{
+  m_connection_refused_counter++;
+
+  if (m_connection_refused_counter < MIN_CONNECTIONS_REFUSED)
+    return; // Not blocked yet
+
+  if (m_connect_block_end == 0)
+    g_eventLogger->info("Connection to %d blocked", remoteNodeId);
+
+  // Calculate time when block should expire, limit to MAX_BLOCK_TIME
+  m_connect_block_end = NdbTick_CurrentMillisecond() +
+    min(MAX_BLOCK_TIME,
+        m_connection_refused_counter - MIN_CONNECTIONS_REFUSED) * 1000;
+}
+
+void
+Transporter::reset_connection_block()
+{
+  m_connection_refused_counter = 0;
+  m_connect_block_end = 0;
+}
+
+bool
+Transporter::is_connect_blocked(void)
+{
+  if (m_connect_block_end == 0)
+    return false;
+
+  if (NdbTick_CurrentMillisecond() > m_connect_block_end)
+  {
+    g_eventLogger->info("Connection to %d unblocked", remoteNodeId);
+    m_connect_block_end = 0;
+    return false;
+  }
+
+  return true; // Blocked
+}

=== modified file 'storage/ndb/src/common/transporter/Transporter.hpp'
--- a/storage/ndb/src/common/transporter/Transporter.hpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/common/transporter/Transporter.hpp	2009-07-17 11:56:54 +0000
@@ -30,6 +30,7 @@
 
 #include <NdbMutex.h>
 #include <NdbThread.h>
+#include <NdbTick.h>
 
 class Transporter {
   friend class TransporterRegistry;
@@ -93,6 +94,8 @@ public:
   void set_status_overloaded(bool val) {
     m_transporter_registry.set_status_overloaded(remoteNodeId, val);
   }
+
+  bool is_connect_blocked();
   
 protected:
   Transporter(TransporterRegistry &,
@@ -156,6 +159,11 @@ private:
   SocketClient *m_socket_client;
   struct in_addr m_connect_address;
 
+  Uint32 m_connection_refused_counter;
+  NDB_TICKS m_connect_block_end;
+  void connection_refused();
+  void reset_connection_block();
+
 protected:
   Uint32 getErrorCount();
   Uint32 m_errorCount;

=== modified file 'storage/ndb/src/common/transporter/TransporterRegistry.cpp'
--- a/storage/ndb/src/common/transporter/TransporterRegistry.cpp	2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/common/transporter/TransporterRegistry.cpp	2009-07-17 11:56:54 +0000
@@ -1276,8 +1276,18 @@ TransporterRegistry::start_clients_threa
 	  /**
 	   * First, we try to connect (if we have a port number).
 	   */
+
 	  if (t->get_s_port())
-	    connected= t->connect_client();
+          {
+            // When ndbd is starting up, it won't allow
+            // ndbapi clients to connect until it's started
+            // The transporter will detect this case and
+            // limit rapid reconnect attempts
+            if (t->is_connect_blocked())
+              continue; // Too many refused connections
+
+            connected= t->connect_client();
+          }
 
 	  /**
 	   * If dynamic, get the port for connecting from the management server


Attachment: [text/bzr-bundle] bzr/jack@sun.com-20090717115654-j3785ne2a926q6bh.bundle
Thread
bzr commit into mysql-5.1-telco-6.3 branch (jack:2999) Bug#45899jack andrews17 Jul