#At file:///home/msvensson/mysql/7.0-bug48669/ based on revid:magnus.blaudd@stripped05-paa4ezojfen04h61
3231 Magnus Blåudd 2009-11-17
Bug#48669 ndb_mgmd needs --no-wait option
- part2, adds checksum to CONFIG_CHECK protocol
@ storage/ndb/include/kernel/signaldata/ConfigChange.hpp
Add new errorcode and message
@ storage/ndb/src/mgmsrv/Config.cpp
add 'checksum' to Config, using the already existing checksum that is created
when config is packed, thus it should be endian safe.
@ storage/ndb/src/mgmsrv/Config.hpp
add Config::checksum
@ storage/ndb/src/mgmsrv/ConfigManager.cpp
send checksum in CONFIG_CHECK_REQ
compare checksum when receiving CONFIG_CHECK_REQ
@ storage/ndb/src/mgmsrv/testConfig.cpp
Add tests for Config::checksum
@ storage/ndb/test/ndbapi/testMgmd.cpp
Add test for starting two ndb_mgmd's from different config files using --nowait-nodes, it will
be detected by checksum and the node with lowest nodeid will shutdown
modified:
storage/ndb/include/kernel/signaldata/ConfigChange.hpp
storage/ndb/src/mgmsrv/Config.cpp
storage/ndb/src/mgmsrv/Config.hpp
storage/ndb/src/mgmsrv/ConfigManager.cpp
storage/ndb/src/mgmsrv/testConfig.cpp
storage/ndb/test/ndbapi/testMgmd.cpp
=== modified file 'storage/ndb/include/kernel/signaldata/ConfigChange.hpp'
--- a/storage/ndb/include/kernel/signaldata/ConfigChange.hpp 2009-09-25 07:57:29 +0000
+++ b/storage/ndb/include/kernel/signaldata/ConfigChange.hpp 2009-11-17 18:13:29 +0000
@@ -204,11 +204,13 @@ class ConfigCheckReq {
friend class ConfigManager;
public:
- STATIC_CONST( SignalLength = 2 );
+ STATIC_CONST( SignalLengthBeforeChecksum = 2 );
+ STATIC_CONST( SignalLength = 3 );
private:
Uint32 state;
Uint32 generation;
+ Uint32 checksum;
};
@@ -246,7 +248,8 @@ class ConfigCheckRef {
enum ErrorCode {
WrongState = 1,
- WrongGeneration = 2
+ WrongGeneration = 2,
+ WrongChecksum = 3
};
static const char* errorMessage(Uint32 error) {
@@ -255,6 +258,8 @@ class ConfigCheckRef {
return "Wrong state";
case WrongGeneration:
return "Wrong generation";
+ case WrongChecksum:
+ return "Wrong checksum";
default:
return "ConfigCheckRef, unknown error";
=== modified file 'storage/ndb/src/mgmsrv/Config.cpp'
--- a/storage/ndb/src/mgmsrv/Config.cpp 2009-10-09 12:47:49 +0000
+++ b/storage/ndb/src/mgmsrv/Config.cpp 2009-11-17 18:13:29 +0000
@@ -824,3 +824,19 @@ Config::get_nodemask(NodeBitmask& mask,
}
}
+
+Uint32
+Config::checksum(void) const {
+ Uint32 chk;
+
+ UtilBuffer buf;
+ pack(buf);
+
+ // Checksum is the last 4 bytes in buffer
+ const char* chk_ptr = (const char*)buf.get_data();
+ chk_ptr += buf.length() - sizeof(Uint32);
+ chk = *(Uint32*) chk_ptr;
+
+ return chk;
+}
+
=== modified file 'storage/ndb/src/mgmsrv/Config.hpp'
--- a/storage/ndb/src/mgmsrv/Config.hpp 2009-09-25 07:57:29 +0000
+++ b/storage/ndb/src/mgmsrv/Config.hpp 2009-11-17 18:13:29 +0000
@@ -120,6 +120,14 @@ public:
bool equal(const Config*, const unsigned* exclude = NULL) const;
/*
+ Return the checksum of the config. The checksum can be used to compare
+ two configs without having the whole config available(for example on
+ a remote host). It can also be printed to log files for manual verification
+ that same config is used
+ */
+ Uint32 checksum(void) const;
+
+ /*
Return bitmask of all defined nodes of a certain type
returns all defined nodes by default.
*/
=== modified file 'storage/ndb/src/mgmsrv/ConfigManager.cpp'
--- a/storage/ndb/src/mgmsrv/ConfigManager.cpp 2009-11-13 11:24:05 +0000
+++ b/storage/ndb/src/mgmsrv/ConfigManager.cpp 2009-11-17 18:13:29 +0000
@@ -1323,6 +1323,22 @@ ConfigManager::execCONFIG_CHANGE_REQ(Sig
}
+static Uint32
+config_check_checksum(const Config* config)
+{
+ Config copy(config);
+
+ // Make constants of a few values in SYSTEM section that are
+ // not part of the checksum used for "config check"
+ copy.setName("CHECKSUM");
+ copy.setPrimaryMgmNode(0);
+
+ Uint32 checksum = copy.checksum();
+
+ return checksum;
+}
+
+
void
ConfigManager::execCONFIG_CHECK_REQ(SignalSender& ss, SimpleSignal* sig)
{
@@ -1337,12 +1353,24 @@ ConfigManager::execCONFIG_CHECK_REQ(Sign
Uint32 generation = m_config->getGeneration();
- g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. " \
- "generation: %d, other_generation: %d, " \
- "our state: %d, other state: %d",
- nodeId, generation, other_generation,
- m_config_state, other_state);
+ // checksum
+ Uint32 checksum = config_check_checksum(m_config);
+ Uint32 other_checksum = req->checksum;
+ if (sig->header.theLength == ConfigCheckReq::SignalLengthBeforeChecksum)
+ {
+ // Other side uses old version without checksum, use our checksum to
+ // bypass the checks
+ g_eventLogger->debug("Other mgmd does not have checksum, using own");
+ other_checksum = checksum;
+ }
+ g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "
+ "Our generation: %d, other generation: %d, "
+ "our state: %d, other state: %d, "
+ "our checksum: 0x%.8x, other checksum: 0x%.8x",
+ nodeId, generation, other_generation,
+ m_config_state, other_state,
+ checksum, other_checksum);
switch (m_config_state)
{
@@ -1376,6 +1404,17 @@ ConfigManager::execCONFIG_CHECK_REQ(Sign
m_config_state, other_state);
return;
}
+
+ if (other_checksum != checksum)
+ {
+ g_eventLogger->warning("Refusing other node, it has different "
+ "checksum: 0x%.8x, expected: 0x%.8x",
+ other_checksum, checksum);
+ sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
+ generation, other_generation,
+ m_config_state, other_state);
+ return;
+ }
break;
case CS_CONFIRMED:
@@ -1393,7 +1432,18 @@ ConfigManager::execCONFIG_CHECK_REQ(Sign
if (other_generation == generation)
{
- ;// OK
+ // Same generation, make sure it has same checksum
+ if (other_checksum != checksum)
+ {
+ g_eventLogger->warning("Refusing other node, it has different "
+ "checksum: 0x%.8x, expected: 0x%.8x",
+ other_checksum, checksum);
+ sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
+ generation, other_generation,
+ m_config_state, other_state);
+ return;
+ }
+ // OK!
}
else if (other_generation < generation)
{
@@ -1429,17 +1479,15 @@ ConfigManager::sendConfigCheckReq(Signal
CAST_PTR(ConfigCheckReq, ssig.getDataPtrSend());
req->state = m_config_state;
req->generation = m_config->getGeneration();
+ req->checksum = config_check_checksum(m_config);
- BaseString buf("Sending CONFIG_CHECK_REQ to node(s) ");
- unsigned i = 0;
- while((i = to.find(i+1)) != NodeBitmask::NotFound)
- buf.appfmt("%d ", i);
- g_eventLogger->debug(buf);
+ g_eventLogger->debug("Sending CONFIG_CHECK_REQ to %s",
+ BaseString::getPrettyText(to).c_str());
require(m_waiting_for.isclear());
m_waiting_for = ss.broadcastSignal(to, ssig, MGM_CONFIG_MAN,
- GSN_CONFIG_CHECK_REQ,
- ConfigCheckReq::SignalLength);
+ GSN_CONFIG_CHECK_REQ,
+ ConfigCheckReq::SignalLength);
}
static bool
@@ -1559,7 +1607,8 @@ ConfigManager::execCONFIG_CHECK_REF(Sign
m_config_state);
assert(ref->generation != ref->expected_generation ||
- ref->state != ref->expected_state);
+ ref->state != ref->expected_state ||
+ ref->error == ConfigCheckRef::WrongChecksum);
if((Uint32)m_config_state != ref->state)
{
// The config state changed while this check was in the air
@@ -1638,6 +1687,15 @@ ConfigManager::execCONFIG_CHECK_REF(Sign
break;
}
+ if (ref->error == ConfigCheckRef::WrongChecksum &&
+ m_node_id < nodeId)
+ {
+ g_eventLogger->warning("Ignoring CONFIG_CHECK_REF for wrong checksum "
+ "other node has higher node id and should "
+ "shutdown");
+ return;
+ }
+
g_eventLogger->error("Terminating");
exit(1);
}
=== modified file 'storage/ndb/src/mgmsrv/testConfig.cpp'
--- a/storage/ndb/src/mgmsrv/testConfig.cpp 2009-11-03 16:40:26 +0000
+++ b/storage/ndb/src/mgmsrv/testConfig.cpp 2009-11-17 18:13:29 +0000
@@ -260,6 +260,40 @@ print_restart_info(void)
fprintf(stderr, "\n");
}
+
+static void
+checksum_config(void)
+{
+ Config* c1=
+ create_config("[ndbd]", "NoOfReplicas=1",
+ "[ndb_mgmd]", "HostName=localhost",
+ "[mysqld]", NULL);
+ Config* c2=
+ create_config("[ndbd]", "NoOfReplicas=1",
+ "[ndb_mgmd]", "HostName=localhost",
+ "[mysqld]", "[mysqld]", NULL);
+
+ ndbout_c("== checksum tests ==");
+ Uint32 c1_check = c1->checksum();
+ Uint32 c2_check = c2->checksum();
+ ndbout_c("c1->checksum(): 0x%x", c1_check);
+ ndbout_c("c2->checksum(): 0x%x", c2_check);
+ // Different config should not have same checksum
+ CHECK(c1_check != c2_check);
+
+ // Same config should have same checksum
+ CHECK(c1_check == c1->checksum());
+
+ // Copied config should have same checksum
+ Config c1_copy(c1);
+ CHECK(c1_check == c1_copy.checksum());
+
+ ndbout_c("==================");
+
+ delete c1;
+ delete c2;
+}
+
#include <NdbTap.hpp>
TAPTEST(MgmConfig)
@@ -267,6 +301,7 @@ TAPTEST(MgmConfig)
ndb_init();
diff_config();
CHECK(check_params());
+ checksum_config();
#if 0
print_restart_info();
#endif
=== modified file 'storage/ndb/test/ndbapi/testMgmd.cpp'
--- a/storage/ndb/test/ndbapi/testMgmd.cpp 2009-11-13 11:24:05 +0000
+++ b/storage/ndb/test/ndbapi/testMgmd.cpp 2009-11-17 18:13:29 +0000
@@ -666,7 +666,7 @@ int runTestNowaitNodes(NDBT_Context* ctx
}
- // Create new config.ini and restart the second mgmd from it config
+ // Create new config.ini
g_err << "** Create config2.ini" << endl;
CHECK(ConfigFactory::put(config, "ndb_mgmd", 1, "ArbitrationDelay", 100));
CHECK(ConfigFactory::write_config_ini(config,
@@ -675,36 +675,112 @@ int runTestNowaitNodes(NDBT_Context* ctx
NULL).c_str()));
g_err << "** Reload second mgmd from config2.ini" << endl;
- Mgmd* mgmd2 = mgmds[1];
- CHECK(mgmd2->stop());
- // Start from config2.ini
+ {
+ Mgmd* mgmd2 = mgmds[1];
+ CHECK(mgmd2->stop());
+ // Start from config2.ini
+ CHECK(mgmd2->start_from_config_ini(wd.path(),
+ "-f config2.ini",
+ "--reload", NULL));
+ CHECK(mgmd2->connect(config));
+ CHECK(mgmd2->wait_confirmed_config());
+
+ CHECK(file_exists(path(wd.path(),
+ "ndb_1_config.bin.1",
+ NULL).c_str()));
+ CHECK(file_exists(path(wd.path(),
+ "ndb_2_config.bin.1",
+ NULL).c_str()));
+
+ // Both ndb_mgmd(s) should have reloaded and new binary config exist
+ CHECK(file_exists(path(wd.path(),
+ "ndb_1_config.bin.2",
+ NULL).c_str()));
+ CHECK(file_exists(path(wd.path(),
+ "ndb_2_config.bin.2",
+ NULL).c_str()));
+ }
+
+ // Stop the ndb_mgmd(s)
+ for (unsigned i = 0; i < mgmds.size(); i++)
+ CHECK(mgmds[i]->stop());
+
+ return NDBT_OK;
+}
+
+
+int runTestNowaitNodes2(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int ret;
+ NDBT_Workingdir wd("test_mgmd"); // temporary working directory
+
+ // Create config.ini
+ Properties config = ConfigFactory::create(2);
+ CHECK(ConfigFactory::write_config_ini(config,
+ path(wd.path(),
+ "config.ini",
+ NULL).c_str()));
+
+ g_err << "** Start mgmd1 from config.ini" << endl;
+ Mgmd* mgmd1 = new Mgmd(1);
+ CHECK(mgmd1->start_from_config_ini(wd.path(),
+ "--initial",
+ "--nowait-nodes=1-255",
+ NULL));
+ CHECK(mgmd1->connect(config));
+ CHECK(mgmd1->wait_confirmed_config());
+
+ // check config files exist
+ CHECK(file_exists(path(wd.path(),
+ "ndb_1_config.bin.1",
+ NULL).c_str()));
+
+ g_err << "** Create config2.ini" << endl;
+ CHECK(ConfigFactory::put(config, "ndb_mgmd", 1, "ArbitrationDelay", 100));
+ CHECK(ConfigFactory::write_config_ini(config,
+ path(wd.path(),
+ "config2.ini",
+ NULL).c_str()));
+
+ g_err << "** Start mgmd2 from config2.ini" << endl;
+ Mgmd* mgmd2 = new Mgmd(2);
+ CHECK(mgmd2->start_from_config_ini(wd.path(),
+ "-f config2.ini",
+ "--initial",
+ "--nowait-nodes=1-255",
+ NULL));
+ CHECK(mgmd2->wait(ret));
+ CHECK(ret == 1);
+
+ CHECK(mgmd1->stop());
+
+ g_err << "** Start mgmd2 again from config2.ini" << endl;
CHECK(mgmd2->start_from_config_ini(wd.path(),
"-f config2.ini",
- "--reload", NULL));
+ "--initial",
+ "--nowait-nodes=1-255",
+ NULL));
+
+
CHECK(mgmd2->connect(config));
CHECK(mgmd2->wait_confirmed_config());
- CHECK(file_exists(path(wd.path(),
- "ndb_1_config.bin.1",
- NULL).c_str()));
+ // check config files exist
CHECK(file_exists(path(wd.path(),
"ndb_2_config.bin.1",
NULL).c_str()));
- // Both ndb_mgmd(s) should have reloaded and new binary config exist
- CHECK(file_exists(path(wd.path(),
- "ndb_1_config.bin.2",
- NULL).c_str()));
- CHECK(file_exists(path(wd.path(),
- "ndb_2_config.bin.2",
- NULL).c_str()));
+ g_err << "** Start mgmd1 from config.ini, mgmd2 should shutdown" << endl;
+ CHECK(mgmd1->start_from_config_ini(wd.path(),
+ "--initial",
+ "--nowait-nodes=1-255",
+ NULL));
+ CHECK(mgmd2->wait(ret));
+ CHECK(ret == 1);
- // Stop the ndb_mgmd(s)
- for (unsigned i = 0; i < mgmds.size(); i++)
- CHECK(mgmds[i]->stop());
+ CHECK(mgmd1->stop());
return NDBT_OK;
-
}
NDBT_TESTSUITE(testMgmd);
@@ -733,6 +809,13 @@ TESTCASE("NowaitNodes",
{
INITIALIZER(runTestNowaitNodes);
}
+TESTCASE("NowaitNodes2",
+ "Test that one mgmd(of 2) can start alone with usage "
+ "of --nowait-nodes, then start the second mgmd from different "
+ "configuration and the one with lowest nodeid should shutdown")
+{
+ INITIALIZER(runTestNowaitNodes2);
+}
NDBT_TESTSUITE_END(testMgmd);
Attachment: [text/bzr-bundle] bzr/magnus.blaudd@sun.com-20091117181329-sdukudj6u8l503up.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-7.0 branch (magnus.blaudd:3231)Bug#48669 | Magnus Blåudd | 17 Nov |