List:Commits« Previous MessageNext Message »
From:Magnus Blåudd Date:November 17 2009 6:13pm
Subject:bzr commit into mysql-5.1-telco-7.0 branch (magnus.blaudd:3231)
Bug#48669
View as plain text  
#At file:///home/msvensson/mysql/7.0-bug48669/ based on revid:magnus.blaudd@stripped05-paa4ezojfen04h61

 3231 Magnus Blåudd	2009-11-17
      Bug#48669 ndb_mgmd needs --no-wait option
       - part2, adds checksum to CONFIG_CHECK protocol
     @ storage/ndb/include/kernel/signaldata/ConfigChange.hpp
        Add new errorcode and message
     @ storage/ndb/src/mgmsrv/Config.cpp
        add 'checksum' to Config, using the already existing checksum that is created
        when config is packed, thus it should be endian safe.
     @ storage/ndb/src/mgmsrv/Config.hpp
        add Config::checksum
     @ storage/ndb/src/mgmsrv/ConfigManager.cpp
        send checksum in CONFIG_CHECK_REQ
        compare checksum when receiving CONFIG_CHECK_REQ
     @ storage/ndb/src/mgmsrv/testConfig.cpp
        Add tests for Config::checksum
     @ storage/ndb/test/ndbapi/testMgmd.cpp
        Add test for starting two ndb_mgmd's from different config files using --nowait-nodes, it will
        be detected by checksum and the node with lowest nodeid will shutdown

    modified:
      storage/ndb/include/kernel/signaldata/ConfigChange.hpp
      storage/ndb/src/mgmsrv/Config.cpp
      storage/ndb/src/mgmsrv/Config.hpp
      storage/ndb/src/mgmsrv/ConfigManager.cpp
      storage/ndb/src/mgmsrv/testConfig.cpp
      storage/ndb/test/ndbapi/testMgmd.cpp
=== modified file 'storage/ndb/include/kernel/signaldata/ConfigChange.hpp'
--- a/storage/ndb/include/kernel/signaldata/ConfigChange.hpp	2009-09-25 07:57:29 +0000
+++ b/storage/ndb/include/kernel/signaldata/ConfigChange.hpp	2009-11-17 18:13:29 +0000
@@ -204,11 +204,13 @@ class ConfigCheckReq  {
   friend class ConfigManager;
 
 public:
-  STATIC_CONST( SignalLength = 2 );
+  STATIC_CONST( SignalLengthBeforeChecksum = 2 );
+  STATIC_CONST( SignalLength = 3 );
 
 private:
   Uint32 state;
   Uint32 generation;
+  Uint32 checksum;
 };
 
 
@@ -246,7 +248,8 @@ class ConfigCheckRef  {
 
   enum ErrorCode {
     WrongState                 = 1,
-    WrongGeneration            = 2
+    WrongGeneration            = 2,
+    WrongChecksum              = 3
   };
 
   static const char* errorMessage(Uint32 error) {
@@ -255,6 +258,8 @@ class ConfigCheckRef  {
       return "Wrong state";
     case WrongGeneration:
       return "Wrong generation";
+    case WrongChecksum:
+      return "Wrong checksum";
 
     default:
       return "ConfigCheckRef, unknown error";

=== modified file 'storage/ndb/src/mgmsrv/Config.cpp'
--- a/storage/ndb/src/mgmsrv/Config.cpp	2009-10-09 12:47:49 +0000
+++ b/storage/ndb/src/mgmsrv/Config.cpp	2009-11-17 18:13:29 +0000
@@ -824,3 +824,19 @@ Config::get_nodemask(NodeBitmask& mask,
   }
 }
 
+
+Uint32
+Config::checksum(void) const {
+  Uint32 chk;
+
+  UtilBuffer buf;
+  pack(buf);
+
+  // Checksum is the last 4 bytes in buffer
+  const char* chk_ptr = (const char*)buf.get_data();
+  chk_ptr += buf.length() - sizeof(Uint32);
+  chk = *(Uint32*) chk_ptr;
+
+  return chk;
+}
+

=== modified file 'storage/ndb/src/mgmsrv/Config.hpp'
--- a/storage/ndb/src/mgmsrv/Config.hpp	2009-09-25 07:57:29 +0000
+++ b/storage/ndb/src/mgmsrv/Config.hpp	2009-11-17 18:13:29 +0000
@@ -120,6 +120,14 @@ public:
   bool equal(const Config*, const unsigned* exclude = NULL) const;
 
   /*
+    Return the checksum of the config. The checksum can be used to compare
+    two configs without having the whole config available(for example on
+    a remote host). It can also be printed to log files for manual verification
+    that same config is used
+  */
+  Uint32 checksum(void) const;
+
+  /*
     Return bitmask of all defined nodes of a certain type
     returns all defined nodes by default.
    */

=== modified file 'storage/ndb/src/mgmsrv/ConfigManager.cpp'
--- a/storage/ndb/src/mgmsrv/ConfigManager.cpp	2009-11-13 11:24:05 +0000
+++ b/storage/ndb/src/mgmsrv/ConfigManager.cpp	2009-11-17 18:13:29 +0000
@@ -1323,6 +1323,22 @@ ConfigManager::execCONFIG_CHANGE_REQ(Sig
 }
 
 
+static Uint32
+config_check_checksum(const Config* config)
+{
+  Config copy(config);
+
+  // Make constants of a few values in SYSTEM section that are
+  // not part of the  checksum used for "config check"
+  copy.setName("CHECKSUM");
+  copy.setPrimaryMgmNode(0);
+
+  Uint32 checksum = copy.checksum();
+
+  return checksum;
+}
+
+
 void
 ConfigManager::execCONFIG_CHECK_REQ(SignalSender& ss, SimpleSignal* sig)
 {
@@ -1337,12 +1353,24 @@ ConfigManager::execCONFIG_CHECK_REQ(Sign
 
   Uint32 generation = m_config->getGeneration();
 
-  g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "   \
-                       "generation: %d, other_generation: %d, " \
-                       "our state: %d, other state: %d",
-                       nodeId, generation, other_generation,
-                       m_config_state, other_state);
+  // checksum
+  Uint32 checksum = config_check_checksum(m_config);
+  Uint32 other_checksum = req->checksum;
+  if (sig->header.theLength == ConfigCheckReq::SignalLengthBeforeChecksum)
+  {
+    // Other side uses old version without checksum, use our checksum to
+    // bypass the checks
+    g_eventLogger->debug("Other mgmd does not have checksum, using own");
+    other_checksum = checksum;
+  }
 
+  g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "
+                       "Our generation: %d, other generation: %d, "
+                       "our state: %d, other state: %d, "
+                       "our checksum: 0x%.8x, other checksum: 0x%.8x",
+                       nodeId, generation, other_generation,
+                       m_config_state, other_state,
+                       checksum, other_checksum);
 
   switch (m_config_state)
   {
@@ -1376,6 +1404,17 @@ ConfigManager::execCONFIG_CHECK_REQ(Sign
                          m_config_state, other_state);
       return;
     }
+
+    if (other_checksum != checksum)
+    {
+      g_eventLogger->warning("Refusing other node, it has different "
+                             "checksum: 0x%.8x, expected: 0x%.8x",
+                             other_checksum, checksum);
+      sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
+                         generation, other_generation,
+                         m_config_state, other_state);
+      return;
+    }
     break;
 
   case CS_CONFIRMED:
@@ -1393,7 +1432,18 @@ ConfigManager::execCONFIG_CHECK_REQ(Sign
 
     if (other_generation == generation)
     {
-      ;// OK
+      // Same generation, make sure it has same checksum
+      if (other_checksum != checksum)
+      {
+        g_eventLogger->warning("Refusing other node, it has different "
+                               "checksum: 0x%.8x, expected: 0x%.8x",
+                               other_checksum, checksum);
+        sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
+                           generation, other_generation,
+                           m_config_state, other_state);
+        return;
+      }
+      // OK!
     }
     else if (other_generation < generation)
     {
@@ -1429,17 +1479,15 @@ ConfigManager::sendConfigCheckReq(Signal
     CAST_PTR(ConfigCheckReq, ssig.getDataPtrSend());
   req->state =        m_config_state;
   req->generation =   m_config->getGeneration();
+  req->checksum =     config_check_checksum(m_config);
 
-  BaseString buf("Sending CONFIG_CHECK_REQ to node(s) ");
-  unsigned i = 0;
-  while((i = to.find(i+1)) != NodeBitmask::NotFound)
-    buf.appfmt("%d ", i);
-  g_eventLogger->debug(buf);
+  g_eventLogger->debug("Sending CONFIG_CHECK_REQ to %s",
+                       BaseString::getPrettyText(to).c_str());
 
   require(m_waiting_for.isclear());
   m_waiting_for = ss.broadcastSignal(to, ssig, MGM_CONFIG_MAN,
-                                GSN_CONFIG_CHECK_REQ,
-                                ConfigCheckReq::SignalLength);
+                                     GSN_CONFIG_CHECK_REQ,
+                                     ConfigCheckReq::SignalLength);
 }
 
 static bool
@@ -1559,7 +1607,8 @@ ConfigManager::execCONFIG_CHECK_REF(Sign
                       m_config_state);
 
   assert(ref->generation != ref->expected_generation ||
-         ref->state != ref->expected_state);
+         ref->state != ref->expected_state ||
+         ref->error == ConfigCheckRef::WrongChecksum);
   if((Uint32)m_config_state != ref->state)
   {
     // The config state changed while this check was in the air
@@ -1638,6 +1687,15 @@ ConfigManager::execCONFIG_CHECK_REF(Sign
     break;
   }
 
+  if (ref->error == ConfigCheckRef::WrongChecksum &&
+      m_node_id < nodeId)
+  {
+    g_eventLogger->warning("Ignoring CONFIG_CHECK_REF for wrong checksum "
+                           "other node has higher node id and should "
+                           "shutdown");
+    return;
+  }
+
   g_eventLogger->error("Terminating");
   exit(1);
 }

=== modified file 'storage/ndb/src/mgmsrv/testConfig.cpp'
--- a/storage/ndb/src/mgmsrv/testConfig.cpp	2009-11-03 16:40:26 +0000
+++ b/storage/ndb/src/mgmsrv/testConfig.cpp	2009-11-17 18:13:29 +0000
@@ -260,6 +260,40 @@ print_restart_info(void)
   fprintf(stderr, "\n");
 }
 
+
+static void
+checksum_config(void)
+{
+  Config* c1=
+    create_config("[ndbd]", "NoOfReplicas=1",
+                  "[ndb_mgmd]", "HostName=localhost",
+                  "[mysqld]", NULL);
+  Config* c2=
+    create_config("[ndbd]", "NoOfReplicas=1",
+                  "[ndb_mgmd]", "HostName=localhost",
+                  "[mysqld]", "[mysqld]", NULL);
+
+  ndbout_c("== checksum tests ==");
+  Uint32 c1_check = c1->checksum();
+  Uint32 c2_check = c2->checksum();
+  ndbout_c("c1->checksum(): 0x%x", c1_check);
+  ndbout_c("c2->checksum(): 0x%x", c2_check);
+   // Different config should not have same checksum
+  CHECK(c1_check != c2_check);
+
+  // Same config should have same checksum
+  CHECK(c1_check == c1->checksum());
+
+  // Copied config should have same checksum
+  Config c1_copy(c1);
+  CHECK(c1_check == c1_copy.checksum());
+
+  ndbout_c("==================");
+
+  delete c1;
+  delete c2;
+}
+
 #include <NdbTap.hpp>
 
 TAPTEST(MgmConfig)
@@ -267,6 +301,7 @@ TAPTEST(MgmConfig)
   ndb_init();
   diff_config();
   CHECK(check_params());
+  checksum_config();
 #if 0
   print_restart_info();
 #endif

=== modified file 'storage/ndb/test/ndbapi/testMgmd.cpp'
--- a/storage/ndb/test/ndbapi/testMgmd.cpp	2009-11-13 11:24:05 +0000
+++ b/storage/ndb/test/ndbapi/testMgmd.cpp	2009-11-17 18:13:29 +0000
@@ -666,7 +666,7 @@ int runTestNowaitNodes(NDBT_Context* ctx
 
   }
 
-  // Create new config.ini and restart the second mgmd from it config
+  // Create new config.ini
   g_err << "** Create config2.ini" << endl;
   CHECK(ConfigFactory::put(config, "ndb_mgmd", 1, "ArbitrationDelay", 100));
   CHECK(ConfigFactory::write_config_ini(config,
@@ -675,36 +675,112 @@ int runTestNowaitNodes(NDBT_Context* ctx
                                              NULL).c_str()));
 
   g_err << "** Reload second mgmd from config2.ini" << endl;
-  Mgmd* mgmd2 = mgmds[1];
-  CHECK(mgmd2->stop());
-  // Start from config2.ini
+  {
+    Mgmd* mgmd2 = mgmds[1];
+    CHECK(mgmd2->stop());
+    // Start from config2.ini
+    CHECK(mgmd2->start_from_config_ini(wd.path(),
+                                       "-f config2.ini",
+                                       "--reload", NULL));
+    CHECK(mgmd2->connect(config));
+    CHECK(mgmd2->wait_confirmed_config());
+
+    CHECK(file_exists(path(wd.path(),
+                           "ndb_1_config.bin.1",
+                           NULL).c_str()));
+    CHECK(file_exists(path(wd.path(),
+                           "ndb_2_config.bin.1",
+                           NULL).c_str()));
+
+    // Both ndb_mgmd(s) should have reloaded and new binary config exist
+    CHECK(file_exists(path(wd.path(),
+                           "ndb_1_config.bin.2",
+                           NULL).c_str()));
+    CHECK(file_exists(path(wd.path(),
+                           "ndb_2_config.bin.2",
+                           NULL).c_str()));
+  }
+
+  // Stop the ndb_mgmd(s)
+  for (unsigned i = 0; i < mgmds.size(); i++)
+    CHECK(mgmds[i]->stop());
+
+  return NDBT_OK;
+}
+
+
+int runTestNowaitNodes2(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int ret;
+  NDBT_Workingdir wd("test_mgmd"); // temporary working directory
+
+  // Create config.ini
+  Properties config = ConfigFactory::create(2);
+  CHECK(ConfigFactory::write_config_ini(config,
+                                        path(wd.path(),
+                                             "config.ini",
+                                             NULL).c_str()));
+
+  g_err << "** Start mgmd1 from config.ini" << endl;
+  Mgmd* mgmd1 = new Mgmd(1);
+  CHECK(mgmd1->start_from_config_ini(wd.path(),
+                                     "--initial",
+                                     "--nowait-nodes=1-255",
+                                     NULL));
+  CHECK(mgmd1->connect(config));
+  CHECK(mgmd1->wait_confirmed_config());
+
+  // check config files exist
+  CHECK(file_exists(path(wd.path(),
+                         "ndb_1_config.bin.1",
+                         NULL).c_str()));
+
+  g_err << "** Create config2.ini" << endl;
+  CHECK(ConfigFactory::put(config, "ndb_mgmd", 1, "ArbitrationDelay", 100));
+  CHECK(ConfigFactory::write_config_ini(config,
+                                        path(wd.path(),
+                                             "config2.ini",
+                                             NULL).c_str()));
+
+  g_err << "** Start mgmd2 from config2.ini" << endl;
+  Mgmd* mgmd2 = new Mgmd(2);
+  CHECK(mgmd2->start_from_config_ini(wd.path(),
+                                     "-f config2.ini",
+                                     "--initial",
+                                     "--nowait-nodes=1-255",
+                                     NULL));
+  CHECK(mgmd2->wait(ret));
+  CHECK(ret == 1);
+
+  CHECK(mgmd1->stop());
+
+  g_err << "** Start mgmd2 again from config2.ini" << endl;
   CHECK(mgmd2->start_from_config_ini(wd.path(),
                                      "-f config2.ini",
-                                     "--reload", NULL));
+                                     "--initial",
+                                     "--nowait-nodes=1-255",
+                                     NULL));
+
+
   CHECK(mgmd2->connect(config));
   CHECK(mgmd2->wait_confirmed_config());
 
-  CHECK(file_exists(path(wd.path(),
-                         "ndb_1_config.bin.1",
-                         NULL).c_str()));
+  // check config files exist
   CHECK(file_exists(path(wd.path(),
                          "ndb_2_config.bin.1",
                          NULL).c_str()));
 
-  // Both ndb_mgmd(s) should have reloaded and new binary config exist
-  CHECK(file_exists(path(wd.path(),
-                         "ndb_1_config.bin.2",
-                         NULL).c_str()));
-  CHECK(file_exists(path(wd.path(),
-                         "ndb_2_config.bin.2",
-                         NULL).c_str()));
+  g_err << "** Start mgmd1 from config.ini, mgmd2 should shutdown" << endl;
+  CHECK(mgmd1->start_from_config_ini(wd.path(),
+                                     "--initial",
+                                     "--nowait-nodes=1-255",
+                                     NULL));
+  CHECK(mgmd2->wait(ret));
+  CHECK(ret == 1);
 
-  // Stop the ndb_mgmd(s)
-  for (unsigned i = 0; i < mgmds.size(); i++)
-      CHECK(mgmds[i]->stop());
+  CHECK(mgmd1->stop());
 
   return NDBT_OK;
-
 }
 
 NDBT_TESTSUITE(testMgmd);
@@ -733,6 +809,13 @@ TESTCASE("NowaitNodes",
 {
   INITIALIZER(runTestNowaitNodes);
 }
+TESTCASE("NowaitNodes2",
+         "Test that one mgmd(of 2) can start alone with usage "
+         "of --nowait-nodes, then start the second mgmd from different "
+         "configuration and the one with lowest nodeid should shutdown")
+{
+  INITIALIZER(runTestNowaitNodes2);
+}
 
 NDBT_TESTSUITE_END(testMgmd);
 

Attachment: [text/bzr-bundle] bzr/magnus.blaudd@sun.com-20091117181329-sdukudj6u8l503up.bundle
Thread
bzr commit into mysql-5.1-telco-7.0 branch (magnus.blaudd:3231)Bug#48669Magnus Blåudd17 Nov