#At file:///home/jonas/src/telco-6.3/
2722 Jonas Oreland 2008-10-28
ndb - bug#40370 - master node can die during node-restart
- fix incorrect assertion
- improve testNodeRestart -n pnr to restart half cluster
instead of just 2 nodes at a time
modified:
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
storage/ndb/test/ndbapi/testNodeRestart.cpp
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2008-09-12 07:21:54 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2008-10-28 10:57:58 +0000
@@ -8870,25 +8870,27 @@ void Dbdih::copyGciLab(Signal* signal, C
{
if(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE)
{
+ jam();
/**
* There can currently only be two waiting
*/
for (Uint32 i = 0; i<CopyGCIMaster::WAIT_CNT; i++)
{
+ jam();
if (c_copyGCIMaster.m_waiting[i] == CopyGCIReq::IDLE)
{
jam();
c_copyGCIMaster.m_waiting[i] = reason;
return;
}
-
- /**
- * Code should *not* request more than WAIT_CNT copy-gci's
- * so this is an internal error
- */
- ndbrequire(false);
- return;
}
+
+ /**
+ * Code should *not* request more than WAIT_CNT copy-gci's
+ * so this is an internal error
+ */
+ ndbrequire(false);
+ return;
}
c_copyGCIMaster.m_copyReason = reason;
=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-08-21 22:14:40 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-10-28 10:57:58 +0000
@@ -2093,18 +2093,48 @@ err:
}
int
+max_cnt(int arr[], int cnt)
+{
+ int res = 0;
+
+ for (int i = 0; i<cnt ; i++)
+ {
+ if (arr[i] > res)
+ {
+ res = arr[i];
+ }
+ }
+ return res;
+}
+
+int
runPnr(NDBT_Context* ctx, NDBT_Step* step)
{
int loops = ctx->getNumLoops();
NdbRestarter res;
bool lcp = ctx->getProperty("LCP", (unsigned)0);
- if (res.getNumDbNodes() < 4)
+ int nodegroups[MAX_NDB_NODES];
+ bzero(nodegroups, sizeof(nodegroups));
+
+ for (int i = 0; i<res.getNumDbNodes(); i++)
{
- ctx->stopTest();
- return NDBT_OK;
+ int node = res.getDbNodeId(i);
+ nodegroups[res.getNodeGroup(node)]++;
}
+ for (int i = 0; i<MAX_NDB_NODES; i++)
+ {
+ if (nodegroups[i] && nodegroups[i] == 1)
+ {
+ /**
+ * nodegroup with only 1 member, can't run test
+ */
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+ }
+
for (int i = 0; i<loops && ctx->isTestStopped() == false; i++)
{
if (lcp)
@@ -2113,24 +2143,40 @@ runPnr(NDBT_Context* ctx, NDBT_Step* ste
res.dumpStateAllNodes(&lcpdump, 1);
}
- int nodes[2];
- nodes[0] = res.getNode(NdbRestarter::NS_RANDOM);
- nodes[1] = res.getRandomNodeOtherNodeGroup(nodes[0], rand());
+ int ng_copy[MAX_NDB_NODES];
+ memcpy(ng_copy, nodegroups, sizeof(ng_copy));
- ndbout_c("restarting %u %u", nodes[0], nodes[1]);
+ Vector<int> nodes;
+ printf("restarting ");
+ while (max_cnt(ng_copy, MAX_NDB_NODES) > 1)
+ {
+ int node = res.getNode(NdbRestarter::NS_RANDOM);
+ int ng = res.getNodeGroup(node);
+ if (ng_copy[ng] > 1)
+ {
+ printf("%u ", node);
+ nodes.push_back(node);
+ ng_copy[ng]--;
+ }
+ }
+ printf("\n");
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
- res.dumpStateOneNode(nodes[0], val2, 2);
- res.dumpStateOneNode(nodes[1], val2, 2);
+ for (Uint32 j = 0; j<nodes.size(); j++)
+ {
+ res.dumpStateOneNode(nodes[j], val2, 2);
+ }
int kill[] = { 9999, 1000, 3000 };
- res.dumpStateOneNode(nodes[0], kill, 3);
- res.dumpStateOneNode(nodes[1], kill, 3);
+ for (Uint32 j = 0; j<nodes.size(); j++)
+ {
+ res.dumpStateOneNode(nodes[j], kill, 3);
+ }
- if (res.waitNodesNoStart(nodes, 2))
+ if (res.waitNodesNoStart(nodes.getBase(), nodes.size()))
return NDBT_FAILED;
- if (res.startNodes(nodes, 2))
+ if (res.startNodes(nodes.getBase(), nodes.size()))
return NDBT_FAILED;
if (res.waitClusterStarted())
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (jonas:2722) Bug#40370 | Jonas Oreland | 28 Oct |